Skip to content
This repository has been archived by the owner on Jun 29, 2021. It is now read-only.

dev/add datatypes to excel columns #230

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
90303fa
updated poi-ooxml
Sep 25, 2019
999647c
refactored [tabs] to [spaces]
Sep 26, 2019
5a75486
resolved review comments
Sep 26, 2019
b82e728
resolved review comments part 1
SociopathicPixel Sep 30, 2019
8c21387
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
8919900
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
aae088f
removed empty spaces in whitelines
SociopathicPixel Sep 30, 2019
7ae1b26
added a test, however datatypes are not found...
SociopathicPixel Sep 30, 2019
77a1b80
added test case for datatypes
Oct 1, 2019
84c6f06
fixed another test that fel over
Oct 1, 2019
b4d9a20
commit part 1; did some indentation fixes
Oct 2, 2019
1a34285
commit part 1.01; did some indentation fixes
Oct 2, 2019
3785c91
resolving review comments
Oct 3, 2019
66749e9
Merge branch 'dev/add-datatypes-to-excel-columns' of https://github.c…
SociopathicPixel Oct 4, 2019
a8ab4b6
resolving indentation filler
SociopathicPixel Oct 4, 2019
57108bc
revert all code style changes
Oct 7, 2019
138b147
wrote some tests, added update check
Oct 8, 2019
5088744
fixed assert that was set wrong
SociopathicPixel Oct 8, 2019
4bfcc41
resolving review comments 1 of many
Oct 14, 2019
0701bab
resolving review comments
Oct 16, 2019
9c3976e
resolving review comments, still there are a few thingies that could …
Oct 23, 2019
cc85725
still need to pull apache/master into this branch
SociopathicPixel Oct 27, 2019
23a7781
resolving review comments, not finished yet
Oct 29, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion excel/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ under the License.
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.1</version>
<version>4.1.0</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.Resource;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
Expand Down Expand Up @@ -128,6 +130,9 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
row = rowIterator.next();
}

// Get first 1000 rows for the eager-read
final ColumnType[] columnTypes = getColumnTypes(sheet, row);

final int columnNameLineNumber = _configuration.getColumnNameLineNumber();
if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) {

Expand All @@ -137,27 +142,22 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
while (row == null && rowIterator.hasNext()) {
row = rowIterator.next();
}

SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
// build columns without any intrinsic column names
final ColumnNamingStrategy columnNamingStrategy = _configuration.getColumnNamingStrategy();
try (final ColumnNamingSession columnNamingSession = columnNamingStrategy.startColumnNamingSession()) {
final int offset = getColumnOffset(row);
for (int i = 0; i < offset; i++) {
columnNamingSession.getNextColumnName(new ColumnNamingContextImpl(i));
}

for (int j = offset; j < row.getLastCellNum(); j++) {
final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j);
final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext),
ColumnType.STRING, table, j, true);
columnTypes[j], table, j, true);
table.addColumn(column);
}
}

} else {

boolean hasColumns = true;

// iterate to the column name line number (if above 1)
for (int j = 1; j < columnNameLineNumber; j++) {
if (rowIterator.hasNext()) {
Expand All @@ -167,50 +167,109 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
break;
}
}

if (hasColumns) {
createColumns(table, wb, row);
createColumns(table, wb, row, columnTypes);
}
}

return table;
}

private ColumnType[] getColumnTypes(final Sheet sheet, final Row row) {
final Iterator<Row> data = ExcelUtils.getRowIterator(sheet, _configuration, false);
final int rowLength = row.getLastCellNum();
int eagerness = 1000;
final ColumnType[] columnTypes = new ColumnType[rowLength];

while (data.hasNext() && eagerness-- > 0) {
Row currentRow = data.next();
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
for (int index = 0; index < rowLength; index++) {
if (currentRow.getLastCellNum() == 0) {
continue;
}
if (currentRow.getCell(index) == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change the logic a bit around here. I would propose you extract lines 189 through 218 into a separate method which returns the ColumnType of a cell in a row. And then move the logic from lines 225 through 231 into this method deciding whether or not to assign that value to the columnTypes array.

Copy link
Author

@SociopathicPixel SociopathicPixel Sep 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extracted the method: getColumnTypeFromRow(final ColumnType columnType, final Row currentRow, int index) from lines 189 through 218.

I haven't moved the logic of
checkColumnTypes(final ColumnType expecetedColumnType, ColumnType columnType)
to
getColumnTypeFromRow(final ColumnType columnType, final Row currentRow, int index)
Cause it gets used multiple times.

checkColumnType(ColumnType.STRING, columnTypes, index);
} else {
CellType cellType = currentRow.getCell(index).getCellType();
switch (cellType) {
case NUMERIC:
if (DateUtil.isCellDateFormatted(currentRow.getCell(index))) {
checkColumnType(ColumnType.DATE, columnTypes, index);
} else {
checkColumnType((currentRow.getCell(index).getNumericCellValue() % 1 == 0)
? ColumnType.INTEGER : ColumnType.DOUBLE, columnTypes, index);
}
break;
case BOOLEAN:
checkColumnType(ColumnType.BOOLEAN, columnTypes, index);
break;
case ERROR:
// fall through
break;
case _NONE:
// fall through
case STRING:
// fall through
case FORMULA:
// fall through
case BLANK:
checkColumnType(ColumnType.STRING, columnTypes, index);
break;
}
}
}
}
return columnTypes;
}

private void checkColumnType(final ColumnType columnType, final ColumnType[] columnTypes, int index) {
if (columnTypes[index] != null) {
if (!columnTypes[index].equals(ColumnType.STRING) && !columnTypes[index].equals(columnType)) {
columnTypes[index] = ColumnType.STRING;
}
} else {
columnTypes[index] = columnType;
}
}

/**
* Builds columns based on row/cell values.
*
* @param table
* @param wb
* @param row
*/
private void createColumns(MutableTable table, Workbook wb, Row row) {
private void createColumns(MutableTable table, Workbook wb, Row row, ColumnType[] columTypes) {
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
if (row == null) {
logger.warn("Cannot create columns based on null row!");
return;
}
final short rowLength = row.getLastCellNum();

final int offset = getColumnOffset(row);

// build columns based on cell values.
try (final ColumnNamingSession columnNamingSession = _configuration.getColumnNamingStrategy()
try (final ColumnNamingSession columnNamingSession = _configuration
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
.getColumnNamingStrategy()
.startColumnNamingSession()) {
for (int j = offset; j < rowLength; j++) {
final Cell cell = row.getCell(j);
final String intrinsicColumnName = ExcelUtils.getCellValue(wb, cell);
final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName,
j);
final String columnName = columnNamingSession.getNextColumnName(columnNamingContext);
final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
Column column = null;
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
if (columTypes == null) {
column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
} else {
column = new MutableColumn(columnName, columTypes[j], table, j, true);
}
table.addColumn(column);
}
}
}

/**
* Gets the column offset (first column to include). This is dependent on
* the row used for column processing and whether the skip empty columns
* property is set.
* Gets the column offset (first column to include). This is dependent on the
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
* row used for column processing and whether the skip empty columns property is
* set.
*
* @param row
* @return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,10 @@ public void testMissingValues() throws Exception {
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=c,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
assertEquals("[Column[name=a,columnNumber=0,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=c,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));

Query q = new Query().select(table.getColumns()).from(table);
Expand All @@ -426,10 +426,10 @@ public void testMissingColumnHeader() throws Exception {
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=A,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
assertEquals("[Column[name=a,columnNumber=0,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=A,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));

Query q = new Query().select(table.getColumns()).from(table);
Expand Down Expand Up @@ -532,11 +532,10 @@ public void testTicket99defect() throws Exception {

Table table = schema.getTableByName("Sheet1");
assertEquals(

"[Column[name=Pkg No.,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Description,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Room,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Level,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
"[Column[name=Pkg No.,columnNumber=0,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Description,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Room,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Level,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));
}

Expand Down