Skip to content

Commit

Permalink
Merge pull request #44 from cloudsufi/PLUGIN-1762-and-1764
Browse files Browse the repository at this point in the history
[PLUGIN-1763] Added File Identifier for Source plugins.
  • Loading branch information
vikasrathee-cs authored Mar 26, 2024
2 parents 9660439 + bb8e68e commit 6da4aff
Show file tree
Hide file tree
Showing 15 changed files with 356 additions and 35 deletions.
12 changes: 12 additions & 0 deletions docs/GoogleDrive-batchsource.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Properties
----------
### Basic

**Identifier Type:** Identifier specifies whether the given ID for Google Drive entity is a file or directory.

**Directory Identifier:** Identifier of the source folder.

This comes after `folders/` in the URL. For example, if the URL is
Expand All @@ -17,6 +19,16 @@ https://drive.google.com/drive/folders/1dyUEebJaFnWa3Z4n0BFMVAXQ7mfUH11g?resourc
```
Then the Directory Identifier would be `1dyUEebJaFnWa3Z4n0BFMVAXQ7mfUH11g`.

**File Identifier:** Identifier of the file.

This comes after `file/d/ or document/d/ or spreadsheets/d/` in the URL. For example, if the URL is
```
https://docs.google.com/file/d/17W3vOhBwe0i24OdVNsbz8rAMClzUitKeAbumTqWFrkows
```

Then the File Identifier would be `17W3vOhBwe0i24OdVNsbz8rAMClzUitKeAbumTqWFrkows`.
**Note:** Either Directory Identifier or File Identifier should have a value.

**File Metadata Properties:** Properties that represent metadata of files.
They will be a part of output structured record. Descriptions for properties can be view at
[Drive API file reference](https://developers.google.com/drive/api/v3/reference/files).
Expand Down
11 changes: 11 additions & 0 deletions docs/GoogleSheets-batchsource.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Properties
----------
### Basic

**Identifier Type:** Identifier specifies whether the given ID for Google Drive entity is a file or directory.

**Directory Identifier:** Identifier of the source folder.

This comes after `folders/` in the URL. For example, if the URL is
Expand All @@ -17,6 +19,15 @@ https://drive.google.com/drive/folders/1dyUEebJaFnWa3Z4n0BFMVAXQ7mfUH11g?resourc
```
Then the Directory Identifier would be `1dyUEebJaFnWa3Z4n0BFMVAXQ7mfUH11g`.

**File Identifier:** Identifier of the spreadsheet file.

This comes after `spreadsheets/d/` in the URL. For example, if the URL is
```
https://docs.google.com/spreadsheets/d/17W3vOhBwe0i24OdVNsbz8rAMClzUitKeAbumTqWFrkows
```
Then the File Identifier would be `17W3vOhBwe0i24OdVNsbz8rAMClzUitKeAbumTqWFrkows`.
**Note:** Either Directory Identifier or File Identifier should have a value.

### Filtering

**Filter:** Filter that can be applied to the files in the selected directory.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public abstract class GoogleAuthBaseConfig extends PluginConfig {
public static final String ACCESS_TOKEN = "accessToken";
public static final String ACCESS_TOKEN_LABEL = "Access Token";
public static final String OAUTH_METHOD = "oAuthMethod";
public static final String FILE_IDENTIFIER = "fileIdentifier";
public static final String IDENTIFIER_TYPE = "identifierType";

private static final String IS_SET_FAILURE_MESSAGE_PATTERN = "'%s' property is empty or macro is not available.";

Expand Down Expand Up @@ -127,12 +129,27 @@ public abstract class GoogleAuthBaseConfig extends PluginConfig {
@Macro
protected String serviceAccountJson;

@Name(IDENTIFIER_TYPE)
@Nullable
@Description("Identifier specifies whether the given Id for Google Drive entity is a file or directory")
private String identifierType;

@Name(DIRECTORY_IDENTIFIER)
@Nullable
@Macro
@Description("Identifier of the folder. This comes after “folders/” in the URL. For example, if the URL was " +
"“https://drive.google.com/drive/folders/1dyUEebJaFnWa3Z4n0BFMVAXQ7mfUH11g?resourcekey=0-XVijrJSp3E3gkdJp20MpCQ”, "
+ "then the Directory Identifier would be “1dyUEebJaFnWa3Z4n0BFMVAXQ7mfUH11g”.")
private String directoryIdentifier;

@Nullable
@Macro
@Name(FILE_IDENTIFIER)
@Description("Identifier of the file. This comes after “file/d/ or spreadsheets/d/ or document/d/” in the URL. " +
"For example, if the URL was “https://drive.google.com/file/d/16npTpL3ozkAzB5kLQ-oQD3IlTZhnnh2w1/view”, "
+ "then the File Identifier would be “16npTpL3ozkAzB5kLQ-oQD3IlTZhnnh2w1”.")
private String fileIdentifier;

/**
* Returns the ValidationResult.
*
Expand All @@ -141,7 +158,7 @@ public abstract class GoogleAuthBaseConfig extends PluginConfig {
*/
public ValidationResult validate(FailureCollector collector) {
IdUtils.validateReferenceName(referenceName, collector);

checkIfDirectoryOrFileIdentifierExists(collector);
ValidationResult validationResult = new ValidationResult();
if (validateAuthType(collector)) {
AuthType authType = getAuthType();
Expand All @@ -162,13 +179,10 @@ public ValidationResult validate(FailureCollector collector) {
try {
GoogleDriveClient client = new GoogleDriveClient(this);

// validate auth
validateCredentials(collector, client);

// validate directory
validateDirectoryIdentifier(collector, client);

validationResult.setDirectoryAccessible(true);
// check directory or file access
if (isDirectoryOrFileAccessible(collector, client)) {
validationResult.setDirectoryOrFileAccessible(true);
}
} catch (Exception e) {
collector.addFailure(
String.format("Exception during authentication/directory properties check: %s.", e.getMessage()),
Expand Down Expand Up @@ -229,27 +243,51 @@ private boolean validateServiceAccount(FailureCollector collector) {
return collector.getValidationFailures().size() == 0;
}

private void validateCredentials(FailureCollector collector, GoogleDriveClient driveClient) throws IOException {
try {
driveClient.checkRootFolder();
} catch (GoogleJsonResponseException e) {
collector.addFailure(e.getDetails().getMessage(), "Provide valid credentials.")
.withConfigProperty(NAME_SERVICE_ACCOUNT_TYPE)
.withStacktrace(e.getStackTrace());
private boolean isDirectoryOrFileAccessible(FailureCollector collector, GoogleDriveClient driveClient)
throws IOException {
if (containsMacro(FILE_IDENTIFIER) || containsMacro(DIRECTORY_IDENTIFIER)) {
return false;
}
}

private void validateDirectoryIdentifier(FailureCollector collector, GoogleDriveClient driveClient)
throws IOException {
if (!containsMacro(DIRECTORY_IDENTIFIER)) {
if (IdentifierType.DIRECTORY_IDENTIFIER.equals(getIdentifierType()) &&
!Strings.isNullOrEmpty(directoryIdentifier) && !containsMacro(DIRECTORY_IDENTIFIER)) {
try {
driveClient.isFolderAccessible(directoryIdentifier);
return true;
} catch (GoogleJsonResponseException e) {
collector.addFailure(e.getDetails().getMessage(), "Provide an existing folder identifier.")
.withConfigProperty(DIRECTORY_IDENTIFIER)
.withStacktrace(e.getStackTrace());
}
}

if (IdentifierType.FILE_IDENTIFIER.equals(getIdentifierType()) &&
!Strings.isNullOrEmpty(fileIdentifier) && !containsMacro(FILE_IDENTIFIER)) {
try {
driveClient.isFileAccessible(fileIdentifier);
return true;
} catch (GoogleJsonResponseException e) {
collector.addFailure(e.getDetails().getMessage(), "Provide an existing file identifier.")
.withConfigProperty(FILE_IDENTIFIER)
.withStacktrace(e.getStackTrace());
}
}
throw collector.getOrThrowException();
}

protected void checkIfDirectoryOrFileIdentifierExists(FailureCollector collector) {
if (IdentifierType.DIRECTORY_IDENTIFIER.equals(getIdentifierType()) && Strings.isNullOrEmpty(directoryIdentifier)
&& !containsMacro(DIRECTORY_IDENTIFIER)) {
collector.addFailure("Directory Identifier can not be null.",
"Provide Directory Identifier.")
.withConfigProperty(DIRECTORY_IDENTIFIER);
}
if (IdentifierType.FILE_IDENTIFIER.equals(getIdentifierType()) && Strings.isNullOrEmpty(fileIdentifier)
&& !containsMacro(FILE_IDENTIFIER)) {
collector.addFailure("File Identifier can not be null.",
"Provide File Identifier.")
.withConfigProperty(FILE_IDENTIFIER);
}
}

protected boolean checkPropertyIsSet(FailureCollector collector, String propertyValue, String propertyName,
Expand All @@ -269,10 +307,18 @@ public String getReferenceName() {
return referenceName;
}

public IdentifierType getIdentifierType() {
return Strings.isNullOrEmpty(identifierType) ? IdentifierType.DIRECTORY_IDENTIFIER :
IdentifierType.valueOf(identifierType.toUpperCase());
}
public String getDirectoryIdentifier() {
return directoryIdentifier;
}

public String getFileIdentifier() {
return fileIdentifier;
}

public AuthType getAuthType() {
return AuthType.fromValue(authType);
}
Expand All @@ -297,10 +343,17 @@ public void setAccountFilePath(String accountFilePath) {
this.accountFilePath = accountFilePath;
}

public void setIdentifierType(String identifierType) {
this.identifierType = identifierType;
}
public void setDirectoryIdentifier(String directoryIdentifier) {
this.directoryIdentifier = directoryIdentifier;
}

public void setFileIdentifier(String fileIdentifier) {
this.fileIdentifier = fileIdentifier;
}

public void setClientId(String clientId) {
this.clientId = clientId;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
*/
public class GoogleDriveClient<C extends GoogleAuthBaseConfig> {
private static final JsonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();
private static final String ROOT_FOLDER_ID = "root";
protected final Drive service;
protected final C config;
protected NetHttpTransport httpTransport;
Expand Down Expand Up @@ -126,11 +125,11 @@ protected List<String> getRequiredScopes() {
return Collections.singletonList(DriveScopes.DRIVE_READONLY);
}

public void checkRootFolder() throws IOException {
service.files().get(ROOT_FOLDER_ID).setSupportsAllDrives(true).execute();
}

public void isFolderAccessible(String folderId) throws IOException {
service.files().get(folderId).setSupportsAllDrives(true).execute();
}

public void isFileAccessible(String fileId) throws IOException {
service.files().get(fileId).setSupportsAllDrives(true).execute();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ public List<File> getFilesSummary(List<ExportedType> exportedTypes, int filesNum
String nextToken = "";
int retrievedFiles = 0;
int actualFilesNumber = filesNumber;
if (IdentifierType.FILE_IDENTIFIER.equals(config.getIdentifierType())) {
files.add(service.files().get(config.getFileIdentifier()).setSupportsAllDrives(true).execute());
return files;
}
Drive.Files.List request = service.files().list()
.setSupportsAllDrives(true)
.setIncludeItemsFromAllDrives(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class GoogleFilteringSourceConfig extends GoogleAuthBaseConfig {
protected String filter;

@Name(MODIFICATION_DATE_RANGE)
@Nullable
@Description("Filter that narrows set of files by modified date range. \n" +
"User can select either among predefined or custom entered ranges. \n" +
"For _Custom_ selection the dates range can be specified via **Start date** and **End date**.")
Expand Down Expand Up @@ -98,7 +99,7 @@ && getModificationDateRangeType().equals(ModifiedDateRangeType.CUSTOM)) {
}

private boolean validateModificationDateRange(FailureCollector collector) {
if (!containsMacro(MODIFICATION_DATE_RANGE)) {
if (!containsMacro(MODIFICATION_DATE_RANGE) && IdentifierType.DIRECTORY_IDENTIFIER.equals(getIdentifierType())) {
try {
getModificationDateRangeType();
return true;
Expand Down
25 changes: 25 additions & 0 deletions src/main/java/io/cdap/plugin/google/common/IdentifierType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package io.cdap.plugin.google.common;

/**
* Identifier type specifies whether the selected Google Drive entity is a file or directory.
*/
public enum IdentifierType {
FILE_IDENTIFIER,
DIRECTORY_IDENTIFIER
}
10 changes: 5 additions & 5 deletions src/main/java/io/cdap/plugin/google/common/ValidationResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
* Wrapper to save validation results. Is used to transfer validation results.
*/
public class ValidationResult {
private boolean directoryAccessible = false;
private boolean directoryOrFileAccessible = false;

public boolean isDirectoryAccessible() {
return directoryAccessible;
public boolean isDirectoryOrFileAccessible() {
return directoryOrFileAccessible;
}

public void setDirectoryAccessible(boolean directoryAccessible) {
this.directoryAccessible = directoryAccessible;
public void setDirectoryOrFileAccessible(boolean directoryOrFileAccessible) {
this.directoryOrFileAccessible = directoryOrFileAccessible;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,14 @@ public static GoogleDriveSourceConfig of(JsonObject properties) throws IOExcepti
if (properties.has(GoogleDriveSourceConfig.OAUTH_METHOD)) {
googleDriveSourceConfig.setoAuthMethod(properties.get(GoogleDriveSourceConfig.OAUTH_METHOD).getAsString());
}
if (properties.has(GoogleDriveSourceConfig.FILE_IDENTIFIER)) {
googleDriveSourceConfig.setFileIdentifier(
properties.get(GoogleDriveSourceConfig.FILE_IDENTIFIER).getAsString());
}
if (properties.has(GoogleDriveSourceConfig.IDENTIFIER_TYPE)) {
googleDriveSourceConfig.setIdentifierType(
properties.get(GoogleDriveSourceConfig.IDENTIFIER_TYPE).getAsString());
}
return googleDriveSourceConfig;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ private boolean shouldGetSchema() {
!containsMacro(ACCOUNT_FILE_PATH) && !containsMacro(NAME_SERVICE_ACCOUNT_JSON) &&
!containsMacro(CLIENT_ID) && !containsMacro(CLIENT_SECRET) &&
!containsMacro(REFRESH_TOKEN) && !containsMacro(ACCESS_TOKEN) &&
!containsMacro(OAUTH_METHOD);
!containsMacro(OAUTH_METHOD) && !containsMacro(FILE_IDENTIFIER) &&
!containsMacro(DIRECTORY_IDENTIFIER);
}

/**
Expand All @@ -303,7 +304,7 @@ public ValidationResult validate(FailureCollector collector) {
validateLastDataColumnIndexAndLastRowIndex(collector);
validateSpreadsheetAndSheetFieldNames(collector);

if (collector.getValidationFailures().isEmpty() && validationResult.isDirectoryAccessible()) {
if (collector.getValidationFailures().isEmpty() && validationResult.isDirectoryOrFileAccessible()) {
GoogleDriveFilteringClient driveClient;
GoogleSheetsSourceClient sheetsSourceClient;
try {
Expand All @@ -318,7 +319,8 @@ public ValidationResult validate(FailureCollector collector) {
spreadsheetsFiles = driveClient
.getFilesSummary(Collections.singletonList(ExportedType.SPREADSHEETS), 1);
} catch (ExecutionException | RetryException e) {
collector.addFailure("Invalid search query, see https://developers.google.com/drive/api/v3/ref-search-terms",
collector.addFailure(
String.format("Failed to get spreadsheet file summary due to reason : %s", e.getMessage()),
null).withStacktrace(e.getStackTrace());
return validationResult;
}
Expand Down Expand Up @@ -1270,6 +1272,15 @@ public static GoogleSheetsSourceConfig of(JsonObject properties) throws IOExcept
googleSheetsSourceConfig.setAccessToken(
properties.get(GoogleSheetsSourceConfig.ACCESS_TOKEN).getAsString());
}
if (properties.has(GoogleSheetsSourceConfig.FILE_IDENTIFIER)) {
googleSheetsSourceConfig.setFileIdentifier(
properties.get(GoogleSheetsSourceConfig.FILE_IDENTIFIER).getAsString());
}
if (properties.has(GoogleSheetsSourceConfig.IDENTIFIER_TYPE)) {
googleSheetsSourceConfig.setIdentifierType(
properties.get(GoogleSheetsSourceConfig.IDENTIFIER_TYPE).getAsString());
}

return googleSheetsSourceConfig;
}
}
Loading

0 comments on commit 6da4aff

Please sign in to comment.