Skip to content
This repository has been archived by the owner on Jun 29, 2021. It is now read-only.

dev/add datatypes to excel columns #230

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
90303fa
updated poi-ooxml
Sep 25, 2019
999647c
refactored [tabs] to [spaces]
Sep 26, 2019
5a75486
resolved review comments
Sep 26, 2019
b82e728
resolved review comments part 1
SociopathicPixel Sep 30, 2019
8c21387
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
8919900
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
aae088f
removed empty spaces in whitelines
SociopathicPixel Sep 30, 2019
7ae1b26
added a test, however datatypes are not found...
SociopathicPixel Sep 30, 2019
77a1b80
added test case for datatypes
Oct 1, 2019
84c6f06
fixed another test that fel over
Oct 1, 2019
b4d9a20
commit part 1; did some indentation fixes
Oct 2, 2019
1a34285
commit part 1.01; did some indentation fixes
Oct 2, 2019
3785c91
resolving review comments
Oct 3, 2019
66749e9
Merge branch 'dev/add-datatypes-to-excel-columns' of https://github.c…
SociopathicPixel Oct 4, 2019
a8ab4b6
resolving indentation filler
SociopathicPixel Oct 4, 2019
57108bc
revert all code style changes
Oct 7, 2019
138b147
wrote some tests, added update check
Oct 8, 2019
5088744
fixed assert that was set wrong
SociopathicPixel Oct 8, 2019
4bfcc41
resolving review comments 1 of many
Oct 14, 2019
0701bab
resolving review comments
Oct 16, 2019
9c3976e
resolving review comments, still there are a few thingies that could …
Oct 23, 2019
cc85725
still need to pull apache/master into this branch
SociopathicPixel Oct 27, 2019
23a7781
resolving review comments, not finished yet
Oct 29, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion excel/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ under the License.
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.1</version>
<version>4.1.0</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.Resource;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
Expand Down Expand Up @@ -103,9 +104,8 @@ public void notifyTablesModified() {
// do nothing
}

private MutableTable createTable(final Workbook wb, final Sheet sheet) {
private MutableTable createTable(Workbook wb, Sheet sheet) {
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
final MutableTable table = new MutableTable(sheet.getSheetName(), TableType.TABLE);

SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
if (sheet.getPhysicalNumberOfRows() <= 0) {
// no physical rows in sheet
return table;
Expand All @@ -117,9 +117,7 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
// no physical rows in sheet
return table;
}

Row row = null;

if (_configuration.isSkipEmptyLines()) {
while (row == null && rowIterator.hasNext()) {
row = rowIterator.next();
Expand All @@ -128,6 +126,12 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
row = rowIterator.next();
}

// Get first 1000 rows for the eager-read
final Iterator<Row> data = ExcelUtils.getRowIterator(sheet, _configuration, false);
int rowLength = row.getLastCellNum();
ColumnType[] columnTypes = new ColumnType[rowLength];

setColumnType(data, rowLength, columnTypes);
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
final int columnNameLineNumber = _configuration.getColumnNameLineNumber();
if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) {

Expand All @@ -137,27 +141,22 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
while (row == null && rowIterator.hasNext()) {
row = rowIterator.next();
}

SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
// build columns without any intrinsic column names
final ColumnNamingStrategy columnNamingStrategy = _configuration.getColumnNamingStrategy();
try (final ColumnNamingSession columnNamingSession = columnNamingStrategy.startColumnNamingSession()) {
final int offset = getColumnOffset(row);
for (int i = 0; i < offset; i++) {
columnNamingSession.getNextColumnName(new ColumnNamingContextImpl(i));
}

for (int j = offset; j < row.getLastCellNum(); j++) {
final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j);
final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext),
ColumnType.STRING, table, j, true);
columnTypes[j], table, j, true);
table.addColumn(column);
}
}

} else {

boolean hasColumns = true;

// iterate to the column name line number (if above 1)
for (int j = 1; j < columnNameLineNumber; j++) {
if (rowIterator.hasNext()) {
Expand All @@ -167,50 +166,114 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
break;
}
}

if (hasColumns) {
createColumns(table, wb, row);
createColumns(table, wb, row, columnTypes);
}
}

return table;
}

private void setColumnType(Iterator<Row> data, int rowLength, ColumnType[] columnTypes) {
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
while (data.hasNext()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Essentially you're iterating over the complete data set now. I would expect you to only look at the first line (or maybe the first few lines of data) to get the column type.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a query to look up only the first 1000records, I see that that got removed when getting DataRow types instead of the "normal" Row type.
I've added a countdown from 1000 in the while-loop

int eagerness = 1000;
...
...
while(data.hasnext() && eagerness-- > 0) {
...
...

Row row = data.next();
for (int index = 0; index < rowLength; index++) {
if (row.getLastCellNum() == 0) {
continue;
}
if (row.getCell(index) == null) {
columnTypes = checkColumnType(ColumnType.STRING, columnTypes, index);
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
} else {
CellType cellType = row.getCell(index).getCellType();
if (cellType.getCode() != 0 && cellType.getCode() <= 2) {
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
columnTypes = checkColumnType(ColumnType.STRING, columnTypes, index);
} else if (cellType.getCode() == 0) {
columnTypes = checkColumnType((row.getCell(index).getNumericCellValue() % 1 == 0)
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
? ColumnType.INTEGER : ColumnType.DOUBLE, columnTypes, index);
} else if (cellType.getCode() == 4) {
columnTypes = checkColumnType(ColumnType.BOOLEAN, columnTypes, index);
}
}
}
}
}

private ColumnType[] checkColumnType(ColumnType columnType, ColumnType[] columnTypes, int index) {
if (columnTypes[index] != null) {
if (!columnTypes[index].equals(ColumnType.STRING) && !columnTypes[index].equals(columnType)) {
columnTypes[index] = ColumnType.STRING;
}
} else {
columnTypes[index] = columnType;
}
return columnTypes;
}

private void determineColumnDatatype(Object[] datatypes, Row row) {
for (int index = 0; index < row.getLastCellNum(); index++) {
CellType type = ((Cell) row.getCell(index)).getCellType();

if (datatypes[index] instanceof Object) {
datatypes[index] = type;
} else if (datatypes[index] instanceof CellType) {
if (datatypes[index].equals(type)) {
continue;
} else {
datatypes[index] = CellType.STRING;
}
}
}
}

/**
* Builds columns based on row/cell values.
*
* @param table
* @param wb
* @param row
*/
private void createColumns(MutableTable table, Workbook wb, Row row) {
private void createColumns(MutableTable table, Workbook wb, Row row, ColumnType[] columTypes) {
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
if (row == null) {
logger.warn("Cannot create columns based on null row!");
return;
}
final short rowLength = row.getLastCellNum();

final int offset = getColumnOffset(row);

// build columns based on cell values.
try (final ColumnNamingSession columnNamingSession = _configuration.getColumnNamingStrategy()
try (final ColumnNamingSession columnNamingSession = _configuration
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
.getColumnNamingStrategy()
.startColumnNamingSession()) {
for (int j = offset; j < rowLength; j++) {
final Cell cell = row.getCell(j);
final String intrinsicColumnName = ExcelUtils.getCellValue(wb, cell);
final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName,
j);
final String columnName = columnNamingSession.getNextColumnName(columnNamingContext);
final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
Column column = null;
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
if (columTypes == null) {
column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
} else {
column = new MutableColumn(columnName, columTypes[j], table, j, true);
}
table.addColumn(column);
}
}
}

/**
* Gets the column offset (first column to include). This is dependent on
* the row used for column processing and whether the skip empty columns
* property is set.
* Builds columns based on row/cell values.
*
* @param table
* @param wb
* @param row
*/
private void createColumns(MutableTable table, Workbook wb, Row row) {
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
createColumns(table, wb, row, null);
}

/**
* Gets the column offset (first column to include). This is dependent on the
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
* row used for column processing and whether the skip empty columns property is
* set.
*
* @param row
* @return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,10 @@ public void testMissingValues() throws Exception {
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=c,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
assertEquals("[Column[name=a,columnNumber=0,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=c,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));

Query q = new Query().select(table.getColumns()).from(table);
Expand All @@ -426,10 +426,10 @@ public void testMissingColumnHeader() throws Exception {
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=A,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
assertEquals("[Column[name=a,columnNumber=0,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=A,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));

Query q = new Query().select(table.getColumns()).from(table);
Expand Down Expand Up @@ -532,11 +532,10 @@ public void testTicket99defect() throws Exception {

Table table = schema.getTableByName("Sheet1");
assertEquals(

"[Column[name=Pkg No.,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Description,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Room,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Level,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
"[Column[name=Pkg No.,columnNumber=0,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Description,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Room,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Level,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));
}

Expand Down