diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls.test/src/eu/esdihumboldt/hale/io/xls/test/XLSInstanceWriterTestExamples.groovy b/io/plugins/eu.esdihumboldt.hale.io.xls.test/src/eu/esdihumboldt/hale/io/xls/test/XLSInstanceWriterTestExamples.groovy index 6cb89f21b8..bd315f722c 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls.test/src/eu/esdihumboldt/hale/io/xls/test/XLSInstanceWriterTestExamples.groovy +++ b/io/plugins/eu.esdihumboldt.hale.io.xls.test/src/eu/esdihumboldt/hale/io/xls/test/XLSInstanceWriterTestExamples.groovy @@ -15,6 +15,9 @@ package eu.esdihumboldt.hale.io.xls.test +import java.text.SimpleDateFormat +import java.time.LocalDate + import eu.esdihumboldt.hale.common.instance.groovy.InstanceBuilder import eu.esdihumboldt.hale.common.instance.model.InstanceCollection import eu.esdihumboldt.hale.common.schema.groovy.SchemaBuilder @@ -26,6 +29,11 @@ class XLSInstanceWriterTestExamples { Schema schema = createSchema() + // Declare a date in the "dd/mm/yyyy" format + def dateString1 = "25/12/2023" + def dateFormat1 = new SimpleDateFormat("dd/MM/yyyy") + def date1 = dateFormat1.parse(dateString1) + // create the instance collection // concrete types are only strings, since the test is not able to choose the correct type in wizard InstanceCollection instances = new InstanceBuilder(types: schema).createCollection { @@ -59,6 +67,7 @@ class XLSInstanceWriterTestExamples { name('other') number('1') description('other type') + date(date1) } } } @@ -91,9 +100,9 @@ class XLSInstanceWriterTestExamples { name(String) number(String) description(String) + date(LocalDate) } } return schema; } - } diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java b/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java index 9f63544270..136b65b2e0 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java @@ -241,7 +241,7 @@ private void update(int sheetNum) throws Exception { // if the sheet is empty an Exception occurs AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable( getWizard().getProvider().getSource(), - ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0); + ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0, null); setHeader(analyser.getHeader().toArray(new String[0])); diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml b/io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml index f2c94612dd..446435b76e 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml @@ -150,6 +150,19 @@ ref="list"> + + + + + + source, boolean xlsx) throws Exception { - analyse(source, xlsx, 0, 0); + analyse(source, xlsx, 0, 0, null); } /** * Load table to analyse from an Excel file. * * @param source the source to load the file from + * @param isXlsx if the file should be loaded as XLSX file * @param sheetNum number of the sheet that should be loaded (0-based) * @param skipNlines number of lines to skip + * @param dateTime * @throws Exception if an error occurs loading the file */ - protected void analyse(LocatableInputSupplier source, boolean xlsx, - int sheetNum, int skipNlines) throws Exception { + protected void analyse(LocatableInputSupplier source, boolean isXlsx, + int sheetNum, int skipNlines, String dateTime) throws Exception { try (InputStream inp = new BufferedInputStream(source.getInput());) { // https://poi.apache.org/components/spreadsheet/quick-guide.html#FileInputStream URI location = source.getLocation(); - Workbook wb = loadWorkbook(inp, location, xlsx); + Workbook wb = loadWorkbook(inp, location, isXlsx); Sheet sheet = wb.getSheetAt(sheetNum); evaluator = wb.getCreationHelper().createFormulaEvaluator(); + DateTimeFormatter dateFormatter = null; + if (dateTime != null) { + dateFormatter = DateTimeFormatter.ofPattern(dateTime); + } // the first might row represents the header - analyseHeader(sheet); + analyseHeader(sheet, dateFormatter); // load configuration entries - analyseContent(sheet, skipNlines); + analyseContent(sheet, skipNlines, dateFormatter); } finally { // reset evaluator reference evaluator = null; @@ -88,15 +96,15 @@ protected void analyse(LocatableInputSupplier source, boo * @param input the input stream to load * @param location an optional location that can be used to determine the * file type - * @param xlsx if the file should be loaded as XLSX file + * @param isXlsx if the file should be loaded as XLSX file * @return the loaded workbook * @throws IOException if an error occurs reading the file * @throws InvalidFormatException if file has an invalid format when * attempting to load as OpenXML file */ - public static Workbook loadWorkbook(InputStream input, URI location, boolean xlsx) + public static Workbook loadWorkbook(InputStream input, URI location, boolean isXlsx) throws IOException, InvalidFormatException { - if (location != null && !xlsx && location.getPath().toLowerCase().endsWith(".xls")) { + if (location != null && !isXlsx && location.getPath().toLowerCase().endsWith(".xls")) { try (POIFSFileSystem fs = new POIFSFileSystem(input)) { return new HSSFWorkbook(fs.getRoot(), true); } @@ -111,8 +119,9 @@ public static Workbook loadWorkbook(InputStream input, URI location, boolean xls * Analyzes the table header. * * @param sheet the table sheet + * @param dateTimeFormatter */ - protected void analyseHeader(Sheet sheet) { + protected void analyseHeader(Sheet sheet, DateTimeFormatter dateTimeFormatter) { Row header = sheet.getRow(0); if (header != null) { @@ -120,7 +129,7 @@ protected void analyseHeader(Sheet sheet) { int count = 0; for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) { Cell cell = header.getCell(i); - String text = extractText(cell, sheet); + String text = extractText(cell, sheet, dateTimeFormatter); // cell cannot be empty to extract the text if (text != null) { headerCell(count, text); @@ -142,11 +151,13 @@ protected void analyseHeader(Sheet sheet) { * the skip line * * @param sheet the table sheet + * @param skipNlines + * @param dateTimeFormatter */ - private void analyseContent(Sheet sheet, int skipNlines) { + private void analyseContent(Sheet sheet, int skipNlines, DateTimeFormatter dateTimeFormatter) { for (int i = skipNlines; i <= sheet.getLastRowNum(); i++) { Row row = sheet.getRow(i); - analyseRow(i, row, sheet); + analyseRow(i, row, sheet, dateTimeFormatter); } } @@ -157,18 +168,22 @@ private void analyseContent(Sheet sheet, int skipNlines) { * separately) * @param row the table row * @param sheet the sheet + * @param dateTimeFormatter */ - protected abstract void analyseRow(int num, Row row, Sheet sheet); + protected abstract void analyseRow(int num, Row row, Sheet sheet, + DateTimeFormatter dateTimeFormatter); /** * Extract the text from a given cell. Formulas are evaluated, for blank or * error cells null is returned * * @param cell the cell + * @param sheet to extract text + * @param dateTimeFormatter to convert the date into * @return the cell text */ - protected String extractText(Cell cell, Sheet sheet) { - return XLSUtil.extractText(cell, evaluator, sheet); + protected String extractText(Cell cell, Sheet sheet, DateTimeFormatter dateTimeFormatter) { + return XLSUtil.extractText(cell, evaluator, sheet, dateTimeFormatter); } } diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java index 1bf6e9ef31..b424dfcf60 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java @@ -16,6 +16,7 @@ package eu.esdihumboldt.hale.io.xls; import java.io.InputStream; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; @@ -41,17 +42,20 @@ public class AnalyseXLSSchemaTable extends AbstractAnalyseTable { * Default constructor * * @param source the source to load the file from + * @param xlsx * @param sheetNum number of the sheet in Excel file (0-based) + * @param skipNlines + * @param dateTime * * @throws Exception thrown if the analysis fails */ public AnalyseXLSSchemaTable(LocatableInputSupplier source, boolean xlsx, - int sheetNum, int skipNlines) throws Exception { + int sheetNum, int skipNlines, String dateTime) throws Exception { header = new ArrayList(); rows = new LinkedHashMap>(); - analyse(source, xlsx, sheetNum, skipNlines); + analyse(source, xlsx, sheetNum, skipNlines, dateTime); } /** @@ -71,11 +75,11 @@ protected void headerCell(int num, String text) { * org.apache.poi.ss.usermodel.Row) */ @Override - protected void analyseRow(int num, Row row, Sheet sheet) { + protected void analyseRow(int num, Row row, Sheet sheet, DateTimeFormatter dateTimeFormatter) { if (row != null) { List rowContent = new ArrayList(); for (int i = 0; i < row.getLastCellNum(); i++) { - rowContent.add(extractText(row.getCell(i), sheet)); + rowContent.add(extractText(row.getCell(i), sheet, dateTimeFormatter)); } if (!rowContent.isEmpty() && !rowContent.stream().allMatch(s -> s == null || s.isEmpty())) { diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java index 43af849818..5189fc37d3 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java @@ -15,9 +15,16 @@ package eu.esdihumboldt.hale.io.xls; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.util.Date; + import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.CellValue; +import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.FormulaEvaluator; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; @@ -30,15 +37,23 @@ */ public class XLSUtil { + /** + * Default Time Formatter + */ + public static final String PARAMETER_DATE_FORMAT = "yyyy-MM-dd"; + /** * Extract the text from a given cell. Formulas are evaluated, for blank or * error cells null is returned * * @param cell the cell * @param evaluator the formula evaluator + * @param sheet to extract the text + * @param dateTimeFormatter to convert the date into * @return the cell text */ - public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet) { + public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet, + DateTimeFormatter dateTimeFormatter) { if (cell == null) return null; @@ -65,11 +80,31 @@ public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sh case BOOLEAN: return String.valueOf(value.getBooleanValue()); case NUMERIC: - double number = value.getNumberValue(); - if (number == Math.floor(number)) { - return String.valueOf((int) number); + if (DateUtil.isCellDateFormatted(cell)) { + // Get the date value from the cell + Date dateCellValue = cell.getDateCellValue(); + + // Convert java.util.Date to java.time.LocalDateTime + LocalDateTime localDateTime = dateCellValue.toInstant() + .atZone(ZoneId.systemDefault()).toLocalDateTime(); + + // Define a DateTimeFormatter with a specific pattern + if (dateTimeFormatter == null) { + dateTimeFormatter = DateTimeFormatter.ofLocalizedDate(FormatStyle.SHORT); + } + // Format LocalDateTime using DateTimeFormatter + String formattedDate = localDateTime.format(dateTimeFormatter); + + return formattedDate; + } + else { + double number = value.getNumberValue(); + if (number == Math.floor(number)) { + return String.valueOf((int) number); + } + + return String.valueOf(value.getNumberValue()); } - return String.valueOf(value.getNumberValue()); case STRING: return value.getStringValue(); default: diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/DefaultXLSLookupTableReader.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/DefaultXLSLookupTableReader.java index 0ad1a76ba3..9107d3ea64 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/DefaultXLSLookupTableReader.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/DefaultXLSLookupTableReader.java @@ -58,11 +58,10 @@ public Map read(Workbook workbook, boolean skipFirst, int keyColum Row currentRow = sheet.getRow(row); if (currentRow != null) { String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator, - sheet); + sheet, null); if (value != null && (!ignoreEmptyStrings || !value.isEmpty())) { - map.put(Value.of( - XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator, sheet)), - Value.of(value)); + map.put(Value.of(XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator, + sheet, null)), Value.of(value)); } } } diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/ReaderSettings.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/ReaderSettings.java index 7f2a235bba..058c566361 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/ReaderSettings.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/ReaderSettings.java @@ -53,6 +53,12 @@ public class ReaderSettings { */ public static final String PARAMETER_SHEET_SETTINGS = "sheetSettings"; + /** + * Parameter for the reader specifying how values imported from Date cells + * should be formatted. + */ + public static final String PARAMETER_DATE_FORMAT = "dateTimeFormatter"; + /** * Collect information and settings on a single sheet. */ @@ -97,6 +103,9 @@ public void applySettings(SheetSettings settings) { if (settings.getSkipLines() != null) { this.settings.setSkipLines(settings.getSkipLines()); } + if (settings.getDateTime() != null) { + this.settings.setDateTime(settings.getDateTime()); + } } /** @@ -190,9 +199,14 @@ else if (skipType) { else { skipNlines = 0; } + + // read dateFormat + String dateFormatString = reader.getParameter(PARAMETER_DATE_FORMAT).as(String.class); + // apply to all sheets as default for (SheetInfo sheet : sheets) { sheet.getSettings().setSkipLines(skipNlines); + sheet.getSettings().setDateTime(dateFormatString); } // determine if multi sheet mode, defaults to false for backwards diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/SheetSettings.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/SheetSettings.java index c15cee697f..c45a8164d3 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/SheetSettings.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/SheetSettings.java @@ -35,6 +35,7 @@ public class SheetSettings { private QName typeName; private Boolean skipSheet; private Integer skipLines; + private String dateTime; /** * Create sheet settings @@ -83,6 +84,13 @@ public Integer getSkipLines() { return skipLines; } + /** + * @return the dateTime + */ + public String getDateTime() { + return dateTime; + } + /** * @param typeName the typeName to set */ @@ -104,6 +112,13 @@ public void setSkipLines(Integer skipLines) { this.skipLines = skipLines; } + /** + * @param dateTime the dateTime to set + */ + public void setDateTime(String dateTime) { + this.dateTime = dateTime; + } + /** * Convert to a {@link Value}. * @@ -126,6 +141,9 @@ public Value toValue() { if (skipLines != null) { props.put("skipLines", Value.of(skipLines)); } + if (dateTime != null) { + props.put("dateTime", Value.of(dateTime)); + } return props.toValue(); } @@ -143,12 +161,14 @@ public static Optional fromValue(Value value) { QName typeName = props.getSafe("typeName").as(QName.class); Boolean skipSheet = props.getSafe("skipSheet").as(Boolean.class); Integer skipLines = props.getSafe("skipLines").as(Integer.class); + String dateTime = props.getSafe("dateTime").as(String.class); SheetSettings settings = new SheetSettings(identifiedByName, identifiedByIndex); settings.setTypeName(typeName); settings.setSkipSheet(skipSheet); settings.setSkipLines(skipLines); + settings.setDateTime(dateTime); return Optional.of(settings); } diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java index 05ac94c422..8fda8c4ffe 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java @@ -56,6 +56,15 @@ public class XLSInstanceReader extends AbstractInstanceReader { private DefaultInstanceCollection instances; + /** + * Default constructor. + */ + public XLSInstanceReader() { + super(); + + addSupportedParameter(ReaderSettings.PARAMETER_DATE_FORMAT); + } + /** * @see eu.esdihumboldt.hale.common.instance.io.InstanceReader#getInstances() */ @@ -142,9 +151,11 @@ private void loadSheet(SheetInfo sheet, IOReporter reporter) throws Exception { int skipNlines = sheet.getSettings().getSkipLines() != null ? sheet.getSettings().getSkipLines() : 0; + String dateTime = sheet.getSettings().getDateTime(); AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable(getSource(), - ReaderSettings.isXlsxContentType(getContentType()), sheet.getIndex(), skipNlines); + ReaderSettings.isXlsxContentType(getContentType()), sheet.getIndex(), skipNlines, + dateTime); // get type definition of the schema QName typeName = sheet.getSettings().getTypeName(); diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java index 3ae6c00622..0d0896089f 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java @@ -72,6 +72,7 @@ protected Schema loadFromSource(ProgressIndicator progress, IOReporter reporter) throws IOProviderConfigurationException, IOException { sheetNum = getParameter(InstanceTableIOConstants.SHEET_INDEX).as(int.class, 0); + String dateTime = getParameter(ReaderSettings.PARAMETER_DATE_FORMAT).as(String.class); progress.begin("Load XLS/XLSX schema", ProgressIndicator.UNKNOWN); @@ -81,7 +82,7 @@ protected Schema loadFromSource(ProgressIndicator progress, IOReporter reporter) try { analyser = new AnalyseXLSSchemaTable(getSource(), - ReaderSettings.isXlsxContentType(getContentType()), sheetNum, 0); + ReaderSettings.isXlsxContentType(getContentType()), sheetNum, 0, dateTime); header = analyser.getHeader(); // create type definition