From 7c0bc689c18bc213370732f7b087e57677665474 Mon Sep 17 00:00:00 2001 From: Vikas Date: Wed, 22 May 2024 14:56:17 +0530 Subject: [PATCH] Revert "[PLUGIN-1785]Column name cleansing done as per other file plugins. (#48)" This reverts commit 41e40a080e33a91c538996b2af73ca588885ec51. --- .../source/GoogleSheetsSourceConfig.java | 43 +++++-------------- .../source/GoogleSheetsSourceConfigTest.java | 41 +----------------- 2 files changed, 13 insertions(+), 71 deletions(-) diff --git a/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java b/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java index 53c816b..8fb53f1 100644 --- a/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java +++ b/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java @@ -98,7 +98,7 @@ public class GoogleSheetsSourceConfig extends GoogleFilteringSourceConfig { public static final String CONFIGURATION_PARSE_PROPERTY_NAME = "properties"; private static final Logger LOG = LoggerFactory.getLogger(GoogleSheetsSourceConfig.class); private static final Pattern CELL_ADDRESS = Pattern.compile("^([A-Z]+)([0-9]+)$"); - private static final Pattern NOT_VALID_PATTERN = Pattern.compile("[^A-Za-z0-9_]+"); + private static final Pattern COLUMN_NAME = Pattern.compile("^[A-Za-z_][A-Za-z0-9_-]*$"); private static LinkedHashMap dataSchemaInfo = new LinkedHashMap<>(); @Name(SHEETS_TO_PULL) @@ -593,7 +593,7 @@ private LinkedHashMap processColumns(List columnHeaders = new LinkedHashMap<>(); - final Map seenFieldNames = new HashMap<>(); + List headerTitles = new ArrayList<>(); for (int i = 0; i < Math.min(columnsRow.size(), lastDataColumn); i++) { CellData columnHeaderCell = columnsRow.get(i); @@ -609,7 +609,7 @@ private LinkedHashMap processColumns(List processColumns(List processSubHeaders(int startIndex, int length, List subColumnsRow, List dataRow, FailureCollector collector) { List subHeaders = new ArrayList<>(); - final Map seenFieldNames = new HashMap<>(); List titles = new ArrayList<>(); for (int i = startIndex; i < startIndex + length; i++) { String subHeaderTitle; @@ -643,7 +642,7 @@ private List processSubHeaders(int startIndex, int leng if (StringUtils.isEmpty(subHeaderTitle)) { subHeaderTitle = ColumnAddressConverter.getColumnName(i + 1); } - subHeaderTitle = checkTitleFormat(subHeaderTitle, seenFieldNames); + subHeaderTitle = checkTitleFormat(subHeaderTitle, i); } else { subHeaderTitle = ColumnAddressConverter.getColumnName(i + 1); } @@ -662,34 +661,14 @@ private List processSubHeaders(int startIndex, int leng return subHeaders; } - private String checkTitleFormat(String title, Map seenFieldNames) { - final String replacementChar = "_"; - - StringBuilder cleanFieldNameBuilder = new StringBuilder(); - - // Remove any spaces at the end of the strings - title = title.trim(); - - // If it's an empty string replace it with BLANK - if (title.isEmpty()) { - cleanFieldNameBuilder.append("BLANK"); - } else if (Character.isDigit(title.charAt(0))) { - // Prepend a col_ if the first character is a number - cleanFieldNameBuilder.append("col_"); - } - - // Replace all invalid characters with the replacement char - cleanFieldNameBuilder.append(NOT_VALID_PATTERN.matcher(title).replaceAll(replacementChar)); - - String cleanFieldName = cleanFieldNameBuilder.toString(); - String lowerCaseCleanFieldName = cleanFieldName.toLowerCase(); - int count = seenFieldNames.getOrDefault(lowerCaseCleanFieldName, 0) + 1; - seenFieldNames.put(lowerCaseCleanFieldName, count); - // In case column already exists in seenFieldNames map, append the count with column name. - if (count > 1) { - cleanFieldNameBuilder.append(replacementChar).append(count); + private String checkTitleFormat(String title, int columnIndex) { + if (!COLUMN_NAME.matcher(title).matches()) { + String defaultColumnName = ColumnAddressConverter.getColumnName(columnIndex + 1); + LOG.warn(String.format("Original column name '%s' doesn't satisfy column name requirements '%s', " + + "the default column name '%s' will be used.", title, COLUMN_NAME.pattern(), defaultColumnName)); + return defaultColumnName; } - return cleanFieldNameBuilder.toString(); + return title; } private Schema getDataCellSchema(List dataRow, int index, String headerName) { diff --git a/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java b/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java index d227aa8..55969a9 100644 --- a/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java +++ b/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java @@ -359,13 +359,13 @@ public void testProcessColumnsInvalidTitles() Assert.assertTrue(columns.get(0).getSubColumns().isEmpty()); // check complex columns, top header should have column name as name - Assert.assertEquals("title_with_space", columns.get(1).getHeaderTitle()); + Assert.assertEquals("B", columns.get(1).getHeaderTitle()); List subColumns = columns.get(1).getSubColumns(); Assert.assertFalse(subColumns.isEmpty()); // check sub-columns Assert.assertEquals(2, subColumns.size()); - Assert.assertEquals("col_9titleWithFirstNumber", subColumns.get(0).getHeaderTitle()); + Assert.assertEquals("B", subColumns.get(0).getHeaderTitle()); Assert.assertTrue(subColumns.get(0).getSubColumns().isEmpty()); Assert.assertEquals("d", subColumns.get(1).getHeaderTitle()); Assert.assertTrue(subColumns.get(0).getSubColumns().isEmpty()); @@ -376,41 +376,4 @@ private void setFieldValue(String fieldName, Object fieldValue) throws NoSuchFie metadataKeyCellsField.setAccessible(true); metadataKeyCellsField.set(config, fieldValue); } - - @Test - public void testProcessColumnsSameCaseSensitiveTitles() - throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { - Method processColumnsMethod = config.getClass().getDeclaredMethod("processColumns", List.class, - List.class, List.class, List.class, int.class, - FailureCollector.class); - processColumnsMethod.setAccessible(true); - - List columnsRow = new ArrayList<>(); - columnsRow.add(new CellData().setFormattedValue("title with space")); - columnsRow.add(new CellData().setFormattedValue("Title with space")); - columnsRow.add(new CellData().setFormattedValue("Title%with%space")); - - List dataRow = new ArrayList<>(); - dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setStringValue("aa"))); - dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setNumberValue(13d))); - dataRow.add(new CellData().setUserEnteredValue(new ExtendedValue().setBoolValue(true))); - - List columnMerges = new ArrayList<>(); - - FailureCollector collector = new DefaultFailureCollector("", Collections.EMPTY_MAP); - - int lastDataColumn = 3; - - LinkedHashMap columns = - (LinkedHashMap) processColumnsMethod.invoke(config, columnsRow, - null, dataRow, columnMerges, - lastDataColumn, collector); - - Assert.assertEquals(3, columns.size()); - Assert.assertTrue(columns.keySet().containsAll(Arrays.asList(0, 1, 2))); - - Assert.assertEquals("title_with_space", columns.get(0).getHeaderTitle()); - Assert.assertEquals("Title_with_space_2", columns.get(1).getHeaderTitle()); - Assert.assertEquals("Title_with_space_3", columns.get(2).getHeaderTitle()); - } }