Skip to content

Commit

Permalink
[Improve][Excel] Support read blank string & auto type-cast (#8111)
Browse files Browse the repository at this point in the history
  • Loading branch information
hailin0 authored Nov 25, 2024
1 parent 5211c63 commit 3a54f12
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ private Object getCellValue(CellType cellType, Cell cell) {
return cell.getLocalDateTimeCellValue();
}
return cell.getNumericCellValue();
case BLANK:
return "";
case ERROR:
break;
default:
Expand All @@ -206,14 +208,25 @@ private Object getCellValue(CellType cellType, Cell cell) {
@SneakyThrows
private Object convert(Object field, SeaTunnelDataType<?> fieldType) {
if (field == null) {
return "";
return null;
}

SqlType sqlType = fieldType.getSqlType();
if (!(SqlType.STRING.equals(sqlType)) && "".equals(field)) {
return null;
}
switch (sqlType) {
case MAP:
case ARRAY:
return objectMapper.readValue((String) field, fieldType.getTypeClass());
case STRING:
if (field instanceof Double) {
String stringValue = field.toString();
if (stringValue.endsWith(".0")) {
return stringValue.substring(0, stringValue.length() - 2);
}
return stringValue;
}
return String.valueOf(field);
case DOUBLE:
return Double.parseDouble(field.toString());
Expand Down Expand Up @@ -250,7 +263,7 @@ private Object convert(Object field, SeaTunnelDataType<?> fieldType) {
return LocalDateTime.parse(
(String) field, DateTimeFormatter.ofPattern(datetimeFormat.getValue()));
case NULL:
return "";
return null;
case BYTES:
return field.toString().getBytes(StandardCharsets.UTF_8);
case ROW:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,113 @@ public class ExcelReadStrategyTest {

@Test
public void testExcelRead() throws IOException, URISyntaxException {
testExcelRead("/excel/test_read_excel.xlsx");
testExcelRead("/excel/test_read_excel_date_string.xlsx");
URL excelFile = ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.xlsx");
URL conf = ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
Assertions.assertNotNull(excelFile);
Assertions.assertNotNull(conf);
String excelFilePath = Paths.get(excelFile.toURI()).toString();
String confPath = Paths.get(conf.toURI()).toString();
Config pluginConfig = ConfigFactory.parseFile(new File(confPath));
ExcelReadStrategy excelReadStrategy = new ExcelReadStrategy();
LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
excelReadStrategy.setPluginConfig(pluginConfig);
excelReadStrategy.init(localConf);

List<String> fileNamesByPath = excelReadStrategy.getFileNamesByPath(excelFilePath);
CatalogTable userDefinedCatalogTable = CatalogTableUtil.buildWithConfig(pluginConfig);
excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
TestCollector testCollector = new TestCollector();
excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);

SeaTunnelRow seaTunnelRow = testCollector.getRows().get(0);

Assertions.assertEquals(seaTunnelRow.getArity(), 14);
Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), Byte.class);
Assertions.assertEquals(seaTunnelRow.getField(1).getClass(), Short.class);
Assertions.assertEquals(seaTunnelRow.getField(2).getClass(), Integer.class);
Assertions.assertEquals(seaTunnelRow.getField(3).getClass(), Long.class);
Assertions.assertEquals(seaTunnelRow.getField(4).getClass(), String.class);
Assertions.assertEquals(seaTunnelRow.getField(5).getClass(), Double.class);
Assertions.assertEquals(seaTunnelRow.getField(6).getClass(), Float.class);
Assertions.assertEquals(seaTunnelRow.getField(7).getClass(), BigDecimal.class);
Assertions.assertEquals(seaTunnelRow.getField(8).getClass(), Boolean.class);
Assertions.assertEquals(seaTunnelRow.getField(9).getClass(), LinkedHashMap.class);
Assertions.assertEquals(seaTunnelRow.getField(10).getClass(), String[].class);
Assertions.assertEquals(seaTunnelRow.getField(11).getClass(), LocalDate.class);
Assertions.assertEquals(seaTunnelRow.getField(12).getClass(), LocalDateTime.class);
Assertions.assertEquals(seaTunnelRow.getField(13).getClass(), LocalTime.class);

Assertions.assertEquals(seaTunnelRow.getField(0), (byte) 1);
Assertions.assertEquals(seaTunnelRow.getField(1), (short) 22);
Assertions.assertEquals(seaTunnelRow.getField(2), 333);
Assertions.assertEquals(seaTunnelRow.getField(3), 4444L);
Assertions.assertEquals(seaTunnelRow.getField(4), "Cosmos");
Assertions.assertEquals(seaTunnelRow.getField(5), 5.555);
Assertions.assertEquals(seaTunnelRow.getField(6), (float) 6.666);
Assertions.assertEquals(seaTunnelRow.getField(7), new BigDecimal("7.78"));
Assertions.assertEquals(seaTunnelRow.getField(8), Boolean.FALSE);
Assertions.assertEquals(
seaTunnelRow.getField(9),
new LinkedHashMap<String, String>() {
{
put("name", "Ivan");
put("age", "26");
}
});
Assertions.assertArrayEquals(
(String[]) seaTunnelRow.getField(10), new String[] {"Ivan", "Dusayi"});
Assertions.assertEquals(
seaTunnelRow.getField(11),
DateUtils.parse("2024-01-31", DateUtils.Formatter.YYYY_MM_DD));
Assertions.assertEquals(
seaTunnelRow.getField(12),
DateTimeUtils.parse(
"2024-01-31 16:00:48", DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS));
Assertions.assertEquals(
seaTunnelRow.getField(13),
TimeUtils.parse("16:00:48", TimeUtils.Formatter.HH_MM_SS));

SeaTunnelRow row2 = testCollector.getRows().get(1);
Assertions.assertEquals(row2.getArity(), 14);
// check number blank
Assertions.assertEquals(row2.getField(0).getClass(), Byte.class);
Assertions.assertNull(row2.getField(1));
Assertions.assertNull(row2.getField(2));
Assertions.assertNull(row2.getField(3));
Assertions.assertEquals(row2.getField(4), "1");
Assertions.assertNull(row2.getField(5));
Assertions.assertNull(row2.getField(6));
Assertions.assertNull(row2.getField(7));
Assertions.assertNull(row2.getField(8));
Assertions.assertNull(row2.getField(9));
Assertions.assertNull(row2.getField(10));
Assertions.assertNull(row2.getField(11));
Assertions.assertNull(row2.getField(12));
Assertions.assertNull(row2.getField(13));

SeaTunnelRow row3 = testCollector.getRows().get(2);
Assertions.assertEquals(row3.getArity(), 14);
Assertions.assertEquals(row3.getField(0).getClass(), Byte.class);
Assertions.assertNull(row3.getField(1));
Assertions.assertNull(row3.getField(2));
Assertions.assertNull(row3.getField(3));
// check string blank
Assertions.assertEquals(row3.getField(4), "");
Assertions.assertNull(row3.getField(5));
Assertions.assertNull(row3.getField(6));
Assertions.assertNull(row3.getField(7));
Assertions.assertNull(row3.getField(8));
Assertions.assertNull(row3.getField(9));
Assertions.assertNull(row3.getField(10));
Assertions.assertNull(row3.getField(11));
Assertions.assertNull(row3.getField(12));
Assertions.assertNull(row3.getField(13));
}

private void testExcelRead(String filePath) throws IOException, URISyntaxException {
URL excelFile = ExcelReadStrategyTest.class.getResource(filePath);
@Test
public void testExcelReadDateString() throws IOException, URISyntaxException {
URL excelFile =
ExcelReadStrategyTest.class.getResource("/excel/test_read_excel_date_string.xlsx");
URL conf = ExcelReadStrategyTest.class.getResource("/excel/test_read_excel.conf");
Assertions.assertNotNull(excelFile);
Assertions.assertNotNull(conf);
Expand All @@ -76,6 +177,7 @@ private void testExcelRead(String filePath) throws IOException, URISyntaxExcepti
excelReadStrategy.setCatalogTable(userDefinedCatalogTable);
TestCollector testCollector = new TestCollector();
excelReadStrategy.read(fileNamesByPath.get(0), "", testCollector);

for (SeaTunnelRow seaTunnelRow : testCollector.getRows()) {
Assertions.assertEquals(seaTunnelRow.getArity(), 14);
Assertions.assertEquals(seaTunnelRow.getField(0).getClass(), Byte.class);
Expand Down
Binary file not shown.

0 comments on commit 3a54f12

Please sign in to comment.