Skip to content

Commit

Permalink
PR Rework
Browse files Browse the repository at this point in the history
  • Loading branch information
vikasrathee-cs committed Mar 28, 2024
1 parent d090e69 commit fd3f6ad
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 19 deletions.
4 changes: 2 additions & 2 deletions docs/GoogleSheets-batchsource.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ _Treat first row as column names_ - the plugin uses first row for schema definin
**Column Names Row Number:** Number of the row to be treated as a header.
Only shown when the 'Column Names Selection' field is set to 'Custom row as column names' header.

**Auto Detect Rows and Columns:** Field to enable automatic detection of the number of rows and columns to read from the
sheet.
**Auto Detect Number of Rows and Columns:** Field to enable automatic detection of the number of rows and columns to
read from the sheet.

**Number of Columns to Read:** Last column plugin will read as data. It will be ignored if the Column
Names Row contains less number of columns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,17 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
}

private void populateBufferedTasks() {
List<Sheet> sheetTitles;
List<Sheet> sheetList;
try {
sheetTitles = getSheetTitles();
sheetList = getSheetList();
} catch (ExecutionException | RetryException e) {
throw new RuntimeException("Exception during sheet titles retrieving.", e);
}
sheetTitles.forEach(t -> {
sheetList.forEach(t -> {
int firstDataRow = config.getActualFirstDataRow();
// Each sheet can have different number of records so last row can be different sheet wise
// and in case auto-detect for rows is enabled, It will fetch all records from the sheet
int lastDataRow = config.getActualLastDataRow(
t.getProperties().getGridProperties().getRowCount());
int lastDataRow = config.getActualLastDataRow(t.getProperties().getGridProperties().getRowCount());
int rowsNumber = lastDataRow - firstDataRow + 1;
overallRowsNumber += rowsNumber;
int counter = 0;
Expand All @@ -119,7 +118,7 @@ private void populateBufferedTasks() {
currentGroupedRowTask = null;
}

private List<Sheet> getSheetTitles() throws ExecutionException, RetryException {
private List<Sheet> getSheetList() throws ExecutionException, RetryException {
List<Sheet> sheetList = new ArrayList<>();
switch (config.getSheetsToPull()) {
case ALL:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,20 @@ public class GoogleSheetsSourceConfig extends GoogleFilteringSourceConfig {
@Macro
private Integer lastFooterRow;

@Name(AUTO_DETECT_ROWS_AND_COLUMNS)
@Nullable
@Name(AUTO_DETECT_ROWS_AND_COLUMNS)
@Description("Field to enable automatic detection of the number of rows and columns to read from the sheet.")
private boolean autoDetectRowsAndColumns;
private Boolean autoDetectRowsAndColumns;

@Name(LAST_DATA_COLUMN)
@Nullable
@Description("Last column plugin will read as data. It will be ignored if the Column Names " +
"Row contain less number of columns.")
@Macro
private String lastDataColumn;

@Name(LAST_DATA_ROW)
@Nullable
@Description("Last row plugin will read as data.")
@Macro
private String lastDataRow;
Expand Down Expand Up @@ -307,7 +309,9 @@ public ValidationResult validate(FailureCollector collector) {
dataSchemaInfo = new LinkedHashMap<>();

validateColumnNamesRow(collector);
validateLastDataColumnIndexAndLastRowIndex(collector);
if (!getAutoDetectRowsAndColumns()) {
validateLastDataColumnIndexAndLastRowIndex(collector);
}
validateSpreadsheetAndSheetFieldNames(collector);

if (collector.getValidationFailures().isEmpty() && validationResult.isDirectoryOrFileAccessible()) {
Expand Down Expand Up @@ -553,7 +557,7 @@ private void getAndValidateSheetSchema(FailureCollector collector, GoogleSheetsS
if (columnMerges.isEmpty()) {
dataRow = subColumnsRow;
}

lastDataColumn = lastDataColumn == 0 ? columnsRow.size() : lastDataColumn;
resultHeaderTitles = processColumns(columnsRow, subColumnsRow, dataRow, columnMerges,
lastDataColumn, collector);
if (collector.getValidationFailures().isEmpty()) {
Expand All @@ -567,6 +571,7 @@ private void getAndValidateSheetSchema(FailureCollector collector, GoogleSheetsS
MergesForNumeredRows firstRowData = sheetsSourceClient.getSingleRows(firstFileTitles.getKey(),
firstFileTitles.getValue().get(0), Collections.singleton(firstDataRow));
List<CellData> dataCells = firstRowData.getNumeredRows().get(firstDataRow);
lastDataColumn = lastDataColumn == 0 ? dataCells.size() : lastDataColumn;
if (CollectionUtils.isEmpty(dataCells)) {
dataSchemaInfo = defaultGeneratedHeaders(lastDataColumn);
} else {
Expand Down Expand Up @@ -919,16 +924,17 @@ public int getLastFooterRow() {
return lastFooterRow == null ? 0 : lastFooterRow;
}

@Nullable
public boolean getAutoDetectRowsAndColumns() {
return autoDetectRowsAndColumns;
return Boolean.TRUE.equals(autoDetectRowsAndColumns);
}

public Integer getLastDataColumn() {
return Integer.parseInt(lastDataColumn);
return lastDataColumn == null ? 0 : Integer.parseInt(lastDataColumn);
}

public Integer getLastDataRow() {
return Integer.parseInt(lastDataRow);
return lastDataRow == null ? 0 : Integer.parseInt(lastDataRow);
}

public String getMetadataCells() {
Expand Down
6 changes: 2 additions & 4 deletions widgets/GoogleSheets-batchsource.json
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@
},
{
"widget-type": "toggle",
"label": "Auto Detect Rows and Columns",
"label": "Auto Detect Number of Rows and Columns",
"name": "autoDetectRowsAndColumns",
"widget-attributes": {
"on": {
Expand Down Expand Up @@ -652,9 +652,7 @@
{
"name": "Auto Detect Rows and Columns",
"condition": {
"property": "autoDetectRowsAndColumns",
"operator": "equal to",
"value": "false"
"expression": "autoDetectRowsAndColumns != true"
},
"show": [
{
Expand Down

0 comments on commit fd3f6ad

Please sign in to comment.