Skip to content

Commit

Permalink
feat: add formatter for the date
Browse files Browse the repository at this point in the history
hale: add new Excel reader parameter that allows to specify import format for date cells
At the moment the formatter has the following format: yyyy-MM-dd, but should be customizable by the user with a new issue.

ING-4151
  • Loading branch information
emanuelaepure10 committed Dec 28, 2023
1 parent ec50283 commit 98f0093
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

package eu.esdihumboldt.hale.io.xls.test

import java.text.SimpleDateFormat
import java.time.LocalDate

import eu.esdihumboldt.hale.common.instance.groovy.InstanceBuilder
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection
import eu.esdihumboldt.hale.common.schema.groovy.SchemaBuilder
Expand All @@ -26,6 +29,11 @@ class XLSInstanceWriterTestExamples {

Schema schema = createSchema()

// Declare a date in the "dd/mm/yyyy" format
def dateString1 = "25/12/2023"
def dateFormat1 = new SimpleDateFormat("dd/MM/yyyy")
def date1 = dateFormat1.parse(dateString1)

// create the instance collection
// concrete types are only strings, since the test is not able to choose the correct type in wizard
InstanceCollection instances = new InstanceBuilder(types: schema).createCollection {
Expand Down Expand Up @@ -59,6 +67,7 @@ class XLSInstanceWriterTestExamples {
name('other')
number('1')
description('other type')
date(date1)
}
}
}
Expand Down Expand Up @@ -91,9 +100,9 @@ class XLSInstanceWriterTestExamples {
name(String)
number(String)
description(String)
date(LocalDate)
}
}
return schema;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ private void update(int sheetNum) throws Exception {
// if the sheet is empty an Exception occurs
AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable(
getWizard().getProvider().getSource(),
ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0);
ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0, null);

setHeader(analyser.getHeader().toArray(new String[0]));

Expand Down
13 changes: 13 additions & 0 deletions io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,19 @@
ref="list">
</parameterComplexValue>
</providerParameter>
<providerParameter
description="Date Time Formatter specifying how values imported from Date cells should be imported"
label="Date Time Formatter"
name="dateTimeFormatter"
optional="true">
<parameterBinding
class="java.lang.String">
</parameterBinding>
<valueDescriptor
default="dd.mm.yyyy"
defaultDescription="Default to dd.mm.yyyy">
</valueDescriptor>
</providerParameter>
</provider>
<provider
class="eu.esdihumboldt.hale.io.xls.writer.XLSInstanceWriter"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.time.format.DateTimeFormatter;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
Expand Down Expand Up @@ -46,36 +47,43 @@ public abstract class AbstractAnalyseTable {
* Load table to analyse from an Excel file (first sheet).
*
* @param source the source to load the file from
* @param xlsx
* @throws Exception if an error occurs loading the file
*/
protected void analyse(LocatableInputSupplier<InputStream> source, boolean xlsx)
throws Exception {
analyse(source, xlsx, 0, 0);
analyse(source, xlsx, 0, 0, null);
}

/**
* Load table to analyse from an Excel file.
*
* @param source the source to load the file from
* @param isXlsx if the file should be loaded as XLSX file
* @param sheetNum number of the sheet that should be loaded (0-based)
* @param skipNlines number of lines to skip
* @param dateTime
* @throws Exception if an error occurs loading the file
*/
protected void analyse(LocatableInputSupplier<? extends InputStream> source, boolean xlsx,
int sheetNum, int skipNlines) throws Exception {
protected void analyse(LocatableInputSupplier<? extends InputStream> source, boolean isXlsx,
int sheetNum, int skipNlines, String dateTime) throws Exception {
try (InputStream inp = new BufferedInputStream(source.getInput());) {
// https://poi.apache.org/components/spreadsheet/quick-guide.html#FileInputStream
URI location = source.getLocation();
Workbook wb = loadWorkbook(inp, location, xlsx);
Workbook wb = loadWorkbook(inp, location, isXlsx);

Sheet sheet = wb.getSheetAt(sheetNum);
evaluator = wb.getCreationHelper().createFormulaEvaluator();

DateTimeFormatter dateFormatter = null;
if (dateTime != null) {
dateFormatter = DateTimeFormatter.ofPattern(dateTime);
}
// the first might row represents the header
analyseHeader(sheet);
analyseHeader(sheet, dateFormatter);

// load configuration entries
analyseContent(sheet, skipNlines);
analyseContent(sheet, skipNlines, dateFormatter);
} finally {
// reset evaluator reference
evaluator = null;
Expand All @@ -88,15 +96,15 @@ protected void analyse(LocatableInputSupplier<? extends InputStream> source, boo
* @param input the input stream to load
* @param location an optional location that can be used to determine the
* file type
* @param xlsx if the file should be loaded as XLSX file
* @param isXlsx if the file should be loaded as XLSX file
* @return the loaded workbook
* @throws IOException if an error occurs reading the file
* @throws InvalidFormatException if file has an invalid format when
* attempting to load as OpenXML file
*/
public static Workbook loadWorkbook(InputStream input, URI location, boolean xlsx)
public static Workbook loadWorkbook(InputStream input, URI location, boolean isXlsx)
throws IOException, InvalidFormatException {
if (location != null && !xlsx && location.getPath().toLowerCase().endsWith(".xls")) {
if (location != null && !isXlsx && location.getPath().toLowerCase().endsWith(".xls")) {
try (POIFSFileSystem fs = new POIFSFileSystem(input)) {
return new HSSFWorkbook(fs.getRoot(), true);
}
Expand All @@ -111,16 +119,17 @@ public static Workbook loadWorkbook(InputStream input, URI location, boolean xls
* Analyzes the table header.
*
* @param sheet the table sheet
* @param dateTimeFormatter
*/
protected void analyseHeader(Sheet sheet) {
protected void analyseHeader(Sheet sheet, DateTimeFormatter dateTimeFormatter) {
Row header = sheet.getRow(0);
if (header != null) {

// identify columns
int count = 0;
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
Cell cell = header.getCell(i);
String text = extractText(cell, sheet);
String text = extractText(cell, sheet, dateTimeFormatter);
// cell cannot be empty to extract the text
if (text != null) {
headerCell(count, text);
Expand All @@ -142,11 +151,13 @@ protected void analyseHeader(Sheet sheet) {
* the skip line
*
* @param sheet the table sheet
* @param skipNlines
* @param dateTimeFormatter
*/
private void analyseContent(Sheet sheet, int skipNlines) {
private void analyseContent(Sheet sheet, int skipNlines, DateTimeFormatter dateTimeFormatter) {
for (int i = skipNlines; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
analyseRow(i, row, sheet);
analyseRow(i, row, sheet, dateTimeFormatter);
}
}

Expand All @@ -157,18 +168,22 @@ private void analyseContent(Sheet sheet, int skipNlines) {
* separately)
* @param row the table row
* @param sheet the sheet
* @param dateTimeFormatter
*/
protected abstract void analyseRow(int num, Row row, Sheet sheet);
protected abstract void analyseRow(int num, Row row, Sheet sheet,
DateTimeFormatter dateTimeFormatter);

/**
* Extract the text from a given cell. Formulas are evaluated, for blank or
* error cells <code>null</code> is returned
*
* @param cell the cell
* @param sheet to extract text
* @param dateTimeFormatter to convert the date into
* @return the cell text
*/
protected String extractText(Cell cell, Sheet sheet) {
return XLSUtil.extractText(cell, evaluator, sheet);
protected String extractText(Cell cell, Sheet sheet, DateTimeFormatter dateTimeFormatter) {
return XLSUtil.extractText(cell, evaluator, sheet, dateTimeFormatter);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package eu.esdihumboldt.hale.io.xls;

import java.io.InputStream;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
Expand All @@ -41,17 +42,20 @@ public class AnalyseXLSSchemaTable extends AbstractAnalyseTable {
* Default constructor
*
* @param source the source to load the file from
* @param xlsx
* @param sheetNum number of the sheet in Excel file (0-based)
* @param skipNlines
* @param dateTime
*
* @throws Exception thrown if the analysis fails
*/
public AnalyseXLSSchemaTable(LocatableInputSupplier<? extends InputStream> source, boolean xlsx,
int sheetNum, int skipNlines) throws Exception {
int sheetNum, int skipNlines, String dateTime) throws Exception {

header = new ArrayList<String>();
rows = new LinkedHashMap<Integer, List<String>>();

analyse(source, xlsx, sheetNum, skipNlines);
analyse(source, xlsx, sheetNum, skipNlines, dateTime);
}

/**
Expand All @@ -71,11 +75,11 @@ protected void headerCell(int num, String text) {
* org.apache.poi.ss.usermodel.Row)
*/
@Override
protected void analyseRow(int num, Row row, Sheet sheet) {
protected void analyseRow(int num, Row row, Sheet sheet, DateTimeFormatter dateTimeFormatter) {
if (row != null) {
List<String> rowContent = new ArrayList<String>();
for (int i = 0; i < row.getLastCellNum(); i++) {
rowContent.add(extractText(row.getCell(i), sheet));
rowContent.add(extractText(row.getCell(i), sheet, dateTimeFormatter));
}
if (!rowContent.isEmpty()
&& !rowContent.stream().allMatch(s -> s == null || s.isEmpty())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,16 @@

package eu.esdihumboldt.hale.io.xls;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.FormatStyle;
import java.util.Date;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
Expand All @@ -30,15 +37,23 @@
*/
public class XLSUtil {

/**
* Default Time Formatter
*/
public static final String PARAMETER_DATE_FORMAT = "yyyy-MM-dd";

/**
* Extract the text from a given cell. Formulas are evaluated, for blank or
* error cells <code>null</code> is returned
*
* @param cell the cell
* @param evaluator the formula evaluator
* @param sheet to extract the text
* @param dateTimeFormatter to convert the date into
* @return the cell text
*/
public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet) {
public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet,
DateTimeFormatter dateTimeFormatter) {
if (cell == null)
return null;

Expand All @@ -65,11 +80,31 @@ public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sh
case BOOLEAN:
return String.valueOf(value.getBooleanValue());
case NUMERIC:
double number = value.getNumberValue();
if (number == Math.floor(number)) {
return String.valueOf((int) number);
if (DateUtil.isCellDateFormatted(cell)) {
// Get the date value from the cell
Date dateCellValue = cell.getDateCellValue();

// Convert java.util.Date to java.time.LocalDateTime
LocalDateTime localDateTime = dateCellValue.toInstant()
.atZone(ZoneId.systemDefault()).toLocalDateTime();

// Define a DateTimeFormatter with a specific pattern
if (dateTimeFormatter == null) {
dateTimeFormatter = DateTimeFormatter.ofLocalizedDate(FormatStyle.SHORT);
}
// Format LocalDateTime using DateTimeFormatter
String formattedDate = localDateTime.format(dateTimeFormatter);

return formattedDate;
}
else {
double number = value.getNumberValue();
if (number == Math.floor(number)) {
return String.valueOf((int) number);
}

return String.valueOf(value.getNumberValue());
}
return String.valueOf(value.getNumberValue());
case STRING:
return value.getStringValue();
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,10 @@ public Map<Value, Value> read(Workbook workbook, boolean skipFirst, int keyColum
Row currentRow = sheet.getRow(row);
if (currentRow != null) {
String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator,
sheet);
sheet, null);
if (value != null && (!ignoreEmptyStrings || !value.isEmpty())) {
map.put(Value.of(
XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator, sheet)),
Value.of(value));
map.put(Value.of(XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator,
sheet, null)), Value.of(value));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ public class ReaderSettings {
*/
public static final String PARAMETER_SHEET_SETTINGS = "sheetSettings";

/**
* Parameter for the reader specifying how values imported from Date cells
* should be formatted.
*/
public static final String PARAMETER_DATE_FORMAT = "dateTimeFormatter";

/**
* Collect information and settings on a single sheet.
*/
Expand Down Expand Up @@ -97,6 +103,9 @@ public void applySettings(SheetSettings settings) {
if (settings.getSkipLines() != null) {
this.settings.setSkipLines(settings.getSkipLines());
}
if (settings.getDateTime() != null) {
this.settings.setDateTime(settings.getDateTime());
}
}

/**
Expand Down Expand Up @@ -190,9 +199,14 @@ else if (skipType) {
else {
skipNlines = 0;
}

// read dateFormat
String dateFormatString = reader.getParameter(PARAMETER_DATE_FORMAT).as(String.class);

// apply to all sheets as default
for (SheetInfo sheet : sheets) {
sheet.getSettings().setSkipLines(skipNlines);
sheet.getSettings().setDateTime(dateFormatString);
}

// determine if multi sheet mode, defaults to false for backwards
Expand Down
Loading

0 comments on commit 98f0093

Please sign in to comment.