Skip to content
This repository has been archived by the owner on Jun 29, 2021. It is now read-only.

dev/add datatypes to excel columns #230

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
90303fa
updated poi-ooxml
Sep 25, 2019
999647c
refactored [tabs] to [spaces]
Sep 26, 2019
5a75486
resolved review comments
Sep 26, 2019
b82e728
resolved review comments part 1
SociopathicPixel Sep 30, 2019
8c21387
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
8919900
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
aae088f
removed empty spaces in whitelines
SociopathicPixel Sep 30, 2019
7ae1b26
added a test, however datatypes are not found...
SociopathicPixel Sep 30, 2019
77a1b80
added test case for datatypes
Oct 1, 2019
84c6f06
fixed another test that fel over
Oct 1, 2019
b4d9a20
commit part 1; did some indentation fixes
Oct 2, 2019
1a34285
commit part 1.01; did some indentation fixes
Oct 2, 2019
3785c91
resolving review comments
Oct 3, 2019
66749e9
Merge branch 'dev/add-datatypes-to-excel-columns' of https://github.c…
SociopathicPixel Oct 4, 2019
a8ab4b6
resolving indentation filler
SociopathicPixel Oct 4, 2019
57108bc
revert all code style changes
Oct 7, 2019
138b147
wrote some tests, added update check
Oct 8, 2019
5088744
fixed assert that was set wrong
SociopathicPixel Oct 8, 2019
4bfcc41
resolving review comments 1 of many
Oct 14, 2019
0701bab
resolving review comments
Oct 16, 2019
9c3976e
resolving review comments, still there are a few thingies that could …
Oct 23, 2019
cc85725
still need to pull apache/master into this branch
SociopathicPixel Oct 27, 2019
23a7781
resolving review comments, not finished yet
Oct 29, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion excel/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ under the License.
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.1</version>
<version>4.1.0</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.metamodel.excel;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
Expand All @@ -41,6 +42,7 @@
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.Resource;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
Expand All @@ -51,20 +53,20 @@
* The default {@link SpreadsheetReaderDelegate}, which uses POI's main user
* model to read spreadsheets: the Workbook class.
*/
final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate {
class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate {

private static final Logger logger = LoggerFactory.getLogger(DefaultSpreadsheetReaderDelegate.class);

private final Resource _resource;
private final ExcelConfiguration _configuration;
protected final Resource _resource;
protected final ExcelConfiguration _configuration;

public DefaultSpreadsheetReaderDelegate(Resource resource, ExcelConfiguration configuration) {
_resource = resource;
_configuration = configuration;
}

@Override
SociopathicPixel marked this conversation as resolved.
Show resolved Hide resolved
public Schema createSchema(String schemaName) {
public Schema createSchema(String schemaName) throws Exception {
final MutableSchema schema = new MutableSchema(schemaName);
final Workbook wb = ExcelUtils.readWorkbook(_resource, true);
try {
Expand All @@ -82,7 +84,7 @@ public Schema createSchema(String schemaName) {
}

@Override
public DataSet executeQuery(Table table, List<Column> columns, int maxRows) {
public DataSet executeQuery(Table table, List<Column> columns, int maxRows) throws Exception {
final Workbook wb = ExcelUtils.readWorkbook(_resource, true);
final Sheet sheet = wb.getSheet(table.getName());

Expand Down Expand Up @@ -129,6 +131,7 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
}

final int columnNameLineNumber = _configuration.getColumnNameLineNumber();
final ColumnType[] columnTypes = getColumnTypes(sheet, row);
if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) {

// get to the first non-empty line (no matter if lines are skipped
Expand All @@ -149,7 +152,7 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
for (int j = offset; j < row.getLastCellNum(); j++) {
final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j);
final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext),
ColumnType.STRING, table, j, true);
columnTypes[j], table, j, true);
table.addColumn(column);
}
}
Expand All @@ -169,21 +172,87 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
}

if (hasColumns) {
createColumns(table, wb, row);
createColumns(table, wb, row, columnTypes);
}
}

return table;
}

protected ColumnType[] getColumnTypes(final Sheet sheet, final Row row) {
final Iterator<Row> data = ExcelUtils.getRowIterator(sheet, _configuration, false);
final int rowLength = row.getLastCellNum();
final ColumnType[] columnTypes = new ColumnType[rowLength];
if (_configuration.isDetectColumnTypes()) {

int numberOfLinesToScan = _configuration.getNumberOfLinesToScan();

while (data.hasNext() && numberOfLinesToScan-- > 0) {
final Row currentRow = data.next();
if (currentRow.getRowNum() < _configuration.getColumnNameLineNumber()) {
continue;
}
for (int index = 0; index < rowLength; index++) {
if (currentRow.getLastCellNum() == 0) {
continue;
}

final ColumnType columnType = columnTypes[index];
final ColumnType expectedColumnType = getColumnTypeFromRow(currentRow, index);
if (columnType != null) {
if (!columnType.equals(ColumnType.STRING) && !columnType.equals(expectedColumnType)) {
columnTypes[index] = ColumnType.VARCHAR;
}
} else {
columnTypes[index] = expectedColumnType;
}
}
}
} else {
Arrays.fill(columnTypes, ColumnType.STRING);
}
return columnTypes;
}

protected ColumnType getColumnTypeFromRow(final Row currentRow, int index) {
if (currentRow.getCell(index) == null) {
return ColumnType.STRING;
} else {
switch (currentRow.getCell(index).getCellType()) {
case NUMERIC:
if (DateUtil.isCellDateFormatted(currentRow.getCell(index))) {
return ColumnType.DATE;
} else {
return (currentRow.getCell(index).getNumericCellValue() % 1 == 0)
? ColumnType.INTEGER : ColumnType.DOUBLE;
}
case BOOLEAN:
return ColumnType.BOOLEAN;
case ERROR:
// fall through
case _NONE:
// fall through
case STRING:
// fall through
case FORMULA:
// fall through
case BLANK:
// fall through
default:
return ColumnType.STRING;
}
}
}

/**
* Builds columns based on row/cell values.
*
* @param table
* @param wb
* @param row
*/
private void createColumns(MutableTable table, Workbook wb, Row row) {
private void createColumns(final MutableTable table, final Workbook wb, final Row row,
final ColumnType[] columTypes) {
if (row == null) {
logger.warn("Cannot create columns based on null row!");
return;
Expand All @@ -197,11 +266,17 @@ private void createColumns(MutableTable table, Workbook wb, Row row) {
.startColumnNamingSession()) {
for (int j = offset; j < rowLength; j++) {
final Cell cell = row.getCell(j);
final String intrinsicColumnName = ExcelUtils.getCellValue(wb, cell);
Object cellValue = ExcelUtils.getCellValue(wb, cell);
final String intrinsicColumnName = cellValue == null ? "" : cellValue.toString();
final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName,
j);
final String columnName = columnNamingSession.getNextColumnName(columnNamingContext);
final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
final Column column;
if (!_configuration.isDetectColumnTypes()) {
column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
} else {
column = new MutableColumn(columnName, columTypes[j], table, j, true);
}
table.addColumn(column);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,45 @@
public final class ExcelConfiguration extends BaseObject implements
Serializable {

private static final long serialVersionUID = 1L;

private static final long serialVersionUID = 1L;

public static final int NO_COLUMN_NAME_LINE = 0;
public static final int DEFAULT_COLUMN_NAME_LINE = 1;
private static final int NUMBERS_OF_LINES_TO_SCAN = 1000;

private final int numberOfLinesToScan;
private final int columnNameLineNumber;
private final ColumnNamingStrategy columnNamingStrategy;
private final boolean skipEmptyLines;
private final boolean skipEmptyColumns;
private final boolean detectColumnTypes;

public ExcelConfiguration() {
this(DEFAULT_COLUMN_NAME_LINE, true, false);
}

public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns) {
this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns);
this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns, false, NUMBERS_OF_LINES_TO_SCAN);
}

public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy,
Boolean skipEmptyLines, Boolean skipEmptyColumns) {
this(columnNameLineNumber, columnNamingStrategy, skipEmptyLines, skipEmptyColumns, false, NUMBERS_OF_LINES_TO_SCAN);
}

public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns, boolean detectColumnTypes) {
this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns, detectColumnTypes, NUMBERS_OF_LINES_TO_SCAN);
}

public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy,
boolean skipEmptyLines, boolean skipEmptyColumns) {
boolean skipEmptyLines, boolean skipEmptyColumns, boolean detectColumnTypes, int numberOfLinesToScan) {
this.columnNameLineNumber = columnNameLineNumber;
this.skipEmptyLines = skipEmptyLines;
this.skipEmptyColumns = skipEmptyColumns;
this.columnNamingStrategy = columnNamingStrategy;
this.detectColumnTypes = detectColumnTypes;
this.numberOfLinesToScan = numberOfLinesToScan;
}

/**
Expand Down Expand Up @@ -102,17 +117,34 @@ public boolean isSkipEmptyColumns() {
return skipEmptyColumns;
}

/**
* Defines if columns in the excel spreadsheet should be validated on datatypes while
* reading the spreadsheet.
*
* @return a boolean indicating whether or not to validate column types.
*/
public boolean isDetectColumnTypes() {
return detectColumnTypes;
}

@Override
protected void decorateIdentity(List<Object> identifiers) {
identifiers.add(columnNameLineNumber);
identifiers.add(skipEmptyLines);
identifiers.add(skipEmptyColumns);
identifiers.add(detectColumnTypes);
identifiers.add(numberOfLinesToScan);
}

@Override
public String toString() {
return "ExcelConfiguration[columnNameLineNumber="
+ columnNameLineNumber + ", skipEmptyLines=" + skipEmptyLines
+ ", skipEmptyColumns=" + skipEmptyColumns + "]";
+ ", skipEmptyColumns=" + skipEmptyColumns +", detectColumnTypes="
+ detectColumnTypes + ", numbersOfLinesToScan=" + numberOfLinesToScan + "]";
}

public int getNumberOfLinesToScan() {
return numberOfLinesToScan;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,23 @@

import java.util.Date;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.Row;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.data.Style;
import org.apache.metamodel.data.Style.Color;
import org.apache.metamodel.data.Style.SizeUnit;
import org.apache.metamodel.data.Style.TextAlignment;
import org.apache.metamodel.insert.AbstractRowInsertionBuilder;
import org.apache.metamodel.insert.RowInsertionBuilder;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.LazyRef;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.Row;

/**
* {@link RowInsertionBuilder} for excel spreadsheets.
Expand Down Expand Up @@ -149,8 +151,33 @@ protected CellStyle fetch() {
cell.setCellStyle(cellStyle.get());
}
}
validateUpdateType(row);
Copy link
Contributor

@arjansh arjansh Oct 11, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what the value is of adding this here. I think that for now it may be best to skip the "validation" part.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see: ExcelDataContextTest.testUpdateDifferentDataTypes

It looks if the Column has a specific ColumnType and if so it will validate the given row.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SociopathicPixel I'm sorry, but I still don't get why this has been added here.

}
}

private void validateUpdateType(final Row original) {
for (int index = 0; index < this.getColumns().length; index++) {
final ColumnType columnType = getColumns()[index].getType();
if (columnType != null && getValues()[index] != null) {
switch (columnType.getName()) {
case "INTEGER":
try {
Integer.decode(getValues()[index].toString());
} catch (NumberFormatException ex) {
throw new MetaModelException(original.getCell(index)
+ " should be an Integer!");
}
break;
case "STRING":
// fall through
case "VARCHAR":
// fall through
default:
break;
}
}
}
}

/**
* Converts a percentage based font size to excel "pt" scale.
Expand Down
12 changes: 6 additions & 6 deletions excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -171,22 +171,22 @@ public static void writeAndCloseWorkbook(ExcelDataContext dataContext, final Wor

}

public static String getCellValue(Workbook wb, Cell cell) {
public static Object getCellValue(Workbook wb, Cell cell) {
if (cell == null) {
return null;
}

final String cellCoordinate = "(" + cell.getRowIndex() + "," + cell.getColumnIndex() + ")";

final String result;
final Object result;

switch (cell.getCellType()) {
case BLANK:
case _NONE:
result = null;
break;
case BOOLEAN:
result = Boolean.toString(cell.getBooleanCellValue());
result = cell.getBooleanCellValue();
break;
case ERROR:
String errorResult;
Expand Down Expand Up @@ -237,7 +237,7 @@ public static String getCellValue(Workbook wb, Cell cell) {
return result;
}

private static String getFormulaCellValue(Workbook wb, Cell cell) {
private static Object getFormulaCellValue(Workbook wb, Cell cell) {
// first try with a cached/precalculated value
try {
double numericCellValue = cell.getNumericCellValue();
Expand Down Expand Up @@ -414,13 +414,13 @@ public static Iterator<Row> getRowIterator(Sheet sheet, ExcelConfiguration confi
*/
public static DefaultRow createRow(Workbook workbook, Row row, DataSetHeader header) {
final int size = header.size();
final String[] values = new String[size];
final Object[] values = new Object[size];
final Style[] styles = new Style[size];
if (row != null) {
for (int i = 0; i < size; i++) {
final int columnNumber = header.getSelectItem(i).getColumn().getColumnNumber();
final Cell cell = row.getCell(columnNumber);
final String value = ExcelUtils.getCellValue(workbook, cell);
final Object value = ExcelUtils.getCellValue(workbook, cell);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning an Object doesn't do the trick, we know based on the column type, what type of object should be returned, so use that to either return a String, Integer, Boolean or Date object.

final Style style = ExcelUtils.getCellStyle(workbook, cell);
values[i] = value;
styles[i] = style;
Expand Down
Loading