From 1c849564534ff8fdb31577bbcd1c11701b69a5cf Mon Sep 17 00:00:00 2001 From: Miroslav Blasko Date: Mon, 6 Jan 2025 18:18:38 +0100 Subject: [PATCH 1/5] Reformat --- .../cz/cvut/spipes/modules/TabularModule.java | 101 ++++++++++-------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java index 0316249a..ab238de3 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java @@ -113,12 +113,12 @@ public class TabularModule extends AnnotatedAbstractModule { private final Property P_PROCESS_TABLE_AT_INDEX = getSpecificParameter("process-table-at-index"); @Parameter(iri = SML.replace, comment = "Specifies whether a module should overwrite triples" + - " from its predecessors. When set to true (default is false), it prevents" + - " passing through triples from the predecessors.") + " from its predecessors. When set to true (default is false), it prevents" + + " passing through triples from the predecessors.") private boolean isReplace = false; @Parameter(iri = PARAM_URL_PREFIX + "source-resource-uri", comment = "URI of resource" + - " that represent tabular data (e.g. resource representing CSV file).") + " that represent tabular data (e.g. resource representing CSV file).") private StreamResource sourceResource; @Parameter(iri = PARAM_URL_PREFIX + "delimiter", comment = "Column delimiter. Default value is comma ','.") @@ -281,11 +281,13 @@ ExecutionContext executeSelf() { tableSchema.setAboutUrl(schemaColumn, sourceResource.getUri()); schemaColumn.setProperty( - dataPrefix, - sourceResource.getUri(), - hasInputSchema ? tableSchema.getColumn(columnName) : null); + dataPrefix, + sourceResource.getUri(), + hasInputSchema ? tableSchema.getColumn(columnName) : null); schemaColumn.setTitle(columnTitle); - if (isDuplicate) throwNotUniqueException(schemaColumn, columnTitle, columnName); + if (isDuplicate) { + throwNotUniqueException(schemaColumn, columnTitle, columnName); + } } List row; @@ -348,13 +350,14 @@ ExecutionContext executeSelf() { int cellsNum = 1; for (Region region : regions) { int firstCellInRegionNum = cellsNum; - for(int i = region.getFirstRow();i <= region.getLastRow();i++){ - for(int j = region.getFirstColumn();j <= region.getLastColumn();j++) { - Cell cell = new Cell(sourceResource.getUri()+"#cell"+(cellsNum)); + for (int i = region.getFirstRow(); i <= region.getLastRow(); i++) { + for (int j = region.getFirstColumn(); j <= region.getLastColumn(); j++) { + Cell cell = new Cell(sourceResource.getUri() + "#cell" + cellsNum); cell.setRow(tableSchema.createAboutUrl(i)); cell.setColumn(outputColumns.get(j).getUri().toString()); - if(cellsNum != firstCellInRegionNum) - cell.setSameValueAsCell(sourceResource.getUri()+"#cell"+(firstCellInRegionNum)); + if (cellsNum != firstCellInRegionNum) { + cell.setSameValueAsCell(sourceResource.getUri() + "#cell" + firstCellInRegionNum); + } em.merge(cell); cellsNum++; } @@ -381,19 +384,19 @@ private String getValueFromRow(List row, int index, int expectedRowLengt StringBuilder record = new StringBuilder(recordDelimiter); for (int i = 0; i < row.size(); i++) { record - .append(i) - .append(":") - .append(row.get(i)) - .append(recordDelimiter); + .append(i) + .append(":") + .append(row.get(i)) + .append(recordDelimiter); } LOG.error("Reading input file failed when reading record #{} (may not reflect the line #).\n" + - " It was expected that the current record contains {} values" + - ", but {}. element was not retrieved before whole record was processed.\n" + - "The problematic record: {}", - currentRecordNumber, - expectedRowLength, - index+1, - record + " It was expected that the current record contains {} values" + + ", but {}. element was not retrieved before whole record was processed.\n" + + "The problematic record: {}", + currentRecordNumber, + expectedRowLength, + index+1, + record ); throw new SPipesException("Reading input file failed.", e); } @@ -413,9 +416,9 @@ private Statement createRowResource(String cellValue, int rowNumber, Column colu Resource rowResource = ResourceFactory.createResource(tableSchema.createAboutUrl(rowNumber)); return ResourceFactory.createStatement( - rowResource, - ResourceFactory.createProperty(column.getPropertyUrl()), - ResourceFactory.createPlainLiteral(cellValue)); + rowResource, + ResourceFactory.createProperty(column.getPropertyUrl()), + ResourceFactory.createPlainLiteral(cellValue)); } private boolean hasInputSchema(TableSchema inputTableSchema) { @@ -429,11 +432,11 @@ private boolean hasInputSchema(TableSchema inputTableSchema) { private TableSchema getTableSchema(EntityManager em) { TypedQuery query = em.createNativeQuery( - "PREFIX csvw: \n" + - "SELECT ?t WHERE { \n" + - "?t a csvw:TableSchema. \n" + - "}", - TableSchema.class + "PREFIX csvw: \n" + + "SELECT ?t WHERE { \n" + + "?t a csvw:TableSchema. \n" + + "}", + TableSchema.class ); int tableSchemaCount = query.getResultList().size(); @@ -452,14 +455,14 @@ private TableSchema getTableSchema(EntityManager em) { private void throwNotUniqueException(Column column, String columnTitle, String columnName) { throw new ResourceNotUniqueException( - String.format("Unable to create value of property %s due to collision. " + - "Both column titles '%s' and '%s' are normalized to '%s' " + - "and thus would refer to the same property url <%s>.", - CSVW.propertyUrl, - columnTitle, - column.getTitle(), - columnName, - column.getPropertyUrl())); + String.format("Unable to create value of property %s due to collision. " + + "Both column titles '%s' and '%s' are normalized to '%s' " + + "and thus would refer to the same property url <%s>.", + CSVW.propertyUrl, + columnTitle, + column.getTitle(), + columnName, + column.getPropertyUrl())); } private ExecutionContext getExecutionContext(Model inputModel, Model outputModel) { @@ -473,12 +476,12 @@ private ExecutionContext getExecutionContext(Model inputModel, Model outputModel @Override public void loadManualConfiguration() { sourceResourceFormat = ResourceFormat.fromString( - getPropertyValue(P_SOURCE_RESOURCE_FORMAT, ResourceFormat.PLAIN.getValue()) + getPropertyValue(P_SOURCE_RESOURCE_FORMAT, ResourceFormat.PLAIN.getValue()) ); delimiter = getPropertyValue(P_DELIMITER, getDefaultDelimiterSupplier(sourceResourceFormat)); quoteCharacter = getPropertyValue(P_QUOTE_CHARACTER, getDefaultQuoteCharacterSupplier(sourceResourceFormat)); outputMode = Mode.fromResource( - getPropertyValue(P_OUTPUT_MODE, Mode.STANDARD.getResource()) + getPropertyValue(P_OUTPUT_MODE, Mode.STANDARD.getResource()) ); setInputCharset(delimiter); } @@ -514,7 +517,7 @@ private Supplier getDefaultQuoteCharacterSupplier(ResourceFormat sour if (sourceResourceFormat == ResourceFormat.CSV) { return () -> { LOG.debug("Quote character not specified, using double-quote as default value" + - " to be compliant with RFC 4180 (CSV)"); + " to be compliant with RFC 4180 (CSV)"); return '"'; }; } @@ -524,8 +527,8 @@ private Supplier getDefaultQuoteCharacterSupplier(ResourceFormat sour private char getPropertyValue(Property property, Supplier defaultValueSupplier) { return Optional.ofNullable(getPropertyValue(property)) - .map(n -> n.asLiteral().getChar()) - .orElseGet(defaultValueSupplier); + .map(n -> n.asLiteral().getChar()) + .orElseGet(defaultValueSupplier); } @Override @@ -624,7 +627,7 @@ public int getDelimiter() { public void setDelimiter(int delimiter) { if ((sourceResourceFormat == ResourceFormat.CSV && delimiter != ',') || - (sourceResourceFormat == ResourceFormat.TSV && delimiter != '\t')) { + (sourceResourceFormat == ResourceFormat.TSV && delimiter != '\t')) { throw new SpecificationNonComplianceException(sourceResourceFormat, delimiter); } this.delimiter = delimiter; @@ -680,7 +683,9 @@ private String[] getHeaderFromSchema(Model inputModel, String[] header, boolean tableSchema.setOrderList(orderList); header = createHeaders(header.length, tableSchema.sortColumns(orderList)); - } else LOG.info("Order of columns was not provided in the schema."); + } else { + LOG.info("Order of columns was not provided in the schema."); + } } else { header = createHeaders(header.length, new ArrayList<>()); } @@ -693,7 +698,9 @@ private String[] createHeaders(int size, List columns) { for (int i = 0; i < size; i++) { if (!columns.isEmpty()) { headers[i] = columns.get(i).getName(); - } else headers[i] = "column_" + (i + 1); + } else { + headers[i] = "column_" + (i + 1); + } } return headers; } From a1b4b52d74a8da6694a7166263d93bfed5890586 Mon Sep 17 00:00:00 2001 From: Miroslav Blasko Date: Tue, 7 Jan 2025 17:43:48 +0100 Subject: [PATCH 2/5] [#228] Refactor using CSVStreamReaderAdapter --- .../cz/cvut/spipes/modules/TabularModule.java | 78 ++++--------- .../exception/MissingArgumentException.java | 7 ++ .../modules/util/CSVStreamReaderAdapter.java | 106 ++++++++++++++++++ .../modules/util/StreamReaderAdapter.java | 19 ++++ 4 files changed, 155 insertions(+), 55 deletions(-) create mode 100644 s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/exception/MissingArgumentException.java create mode 100644 s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java create mode 100644 s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java index ab238de3..cb9d4136 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java @@ -13,6 +13,7 @@ import cz.cvut.spipes.exception.ResourceNotUniqueException; import cz.cvut.spipes.exception.SPipesException; import cz.cvut.spipes.modules.annotations.SPipesModule; +import cz.cvut.spipes.modules.exception.MissingArgumentException; import cz.cvut.spipes.modules.exception.SheetDoesntExistsException; import cz.cvut.spipes.modules.exception.SheetIsNotSpecifiedException; import cz.cvut.spipes.modules.exception.SpecificationNonComplianceException; @@ -22,18 +23,14 @@ import cz.cvut.spipes.registry.StreamResource; import cz.cvut.spipes.registry.StreamResourceRegistry; import cz.cvut.spipes.util.JenaUtils; -import org.apache.commons.cli.MissingArgumentException; import org.apache.jena.rdf.model.*; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.supercsv.io.CsvListReader; -import org.supercsv.io.ICsvListReader; import org.supercsv.prefs.CsvPreference; +import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; import java.net.URI; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -189,6 +186,8 @@ ExecutionContext executeSelf() { StreamResource originalSourceResource = sourceResource; TSVConvertor tsvConvertor = null; + StreamReaderAdapter streamReaderAdapter; + CsvPreference csvPreference = null; switch (sourceResourceFormat) { case HTML: @@ -201,7 +200,7 @@ ExecutionContext executeSelf() { tsvConvertor = new HTML2TSVConvertor(processTableAtIndex); table.setLabel(tsvConvertor.getTableName(sourceResource)); setSourceResource(tsvConvertor.convertToTSV(sourceResource)); - setDelimiter('\t'); + streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, '\t'); break; case XLS: case XLSM: @@ -221,7 +220,10 @@ ExecutionContext executeSelf() { throw new SheetDoesntExistsException("Requested sheet doesn't exists."); } setSourceResource(tsvConvertor.convertToTSV(sourceResource)); - setDelimiter('\t'); + streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, '\t'); + break; + default: + streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, delimiter); break; } @@ -236,33 +238,21 @@ ExecutionContext executeSelf() { List outputColumns = new ArrayList<>(); List rowStatements = new ArrayList<>(); - CsvPreference csvPreference = new CsvPreference.Builder( - quoteCharacter, - delimiter, - System.lineSeparator()).build(); - try { - ICsvListReader listReader = getCsvListReader(csvPreference); - - if (listReader == null) { - logMissingQuoteError(); - return getExecutionContext(inputModel, outputModel); - } - - String[] header = listReader.getHeader(true); // skip the header (can't be used with CsvListReader) + streamReaderAdapter.initialise(new ByteArrayInputStream(sourceResource.getContent()), + sourceResourceFormat, processTableAtIndex, acceptInvalidQuoting, inputCharset, sourceResource); + String[] header = streamReaderAdapter.getHeader(skipHeader);; + Set columnNames = new HashSet<>(); - if (header == null) { - LOG.warn("Input stream resource {} to provide tabular data is empty.", this.sourceResource.getUri()); - return getExecutionContext(inputModel, outputModel); + if (streamReaderAdapter.getSheetLabel() != null) { + table.setLabel(streamReaderAdapter.getSheetLabel()); } - Set columnNames = new HashSet<>(); TableSchema inputTableSchema = getTableSchema(em); hasInputSchema = hasInputSchema(inputTableSchema); if (skipHeader) { header = getHeaderFromSchema(inputModel, header, hasInputSchema); - listReader = new CsvListReader(getReader(), csvPreference); } else if (hasInputSchema) { header = getHeaderFromSchema(inputModel, header, true); } @@ -290,10 +280,9 @@ ExecutionContext executeSelf() { } } - List row; int rowNumber = 0; - //for each row - while ((row = listReader.read()) != null) { + List row; + while ((row = streamReaderAdapter.getNextRow()) != null) { rowNumber++; // 4.6.1 and 4.6.3 Row r = new Row(); @@ -331,8 +320,12 @@ ExecutionContext executeSelf() { // 4.6.8.7 - else, if cellValue is not null } } - listReader.close(); - } catch (IOException | MissingArgumentException e) { + streamReaderAdapter.close(); + } catch (MissingArgumentException e) { + if (ExecutionConfig.isExitOnError()) { + return getExecutionContext(inputModel, outputModel); + } + } catch (IOException e) { LOG.error("Error while reading file from resource uri {}", sourceResource, e); } @@ -402,16 +395,6 @@ private String getValueFromRow(List row, int index, int expectedRowLengt } } - private ICsvListReader getCsvListReader(CsvPreference csvPreference) { - if (acceptInvalidQuoting) { - if (getQuote() == '\0') { - return null; - } else - return new CsvListReader(new InvalidQuotingTokenizer(getReader(), csvPreference), csvPreference); - } - return new CsvListReader(getReader(), csvPreference); - } - private Statement createRowResource(String cellValue, int rowNumber, Column column) { Resource rowResource = ResourceFactory.createResource(tableSchema.createAboutUrl(rowNumber)); @@ -590,10 +573,6 @@ private String normalize(String label) { return label.trim().replaceAll("[^\\w]", "_"); } - private Reader getReader() { - return new StringReader(new String(sourceResource.getContent(), inputCharset)); - } - @NotNull private StreamResource getResourceByUri(@NotNull String resourceUri) { @@ -633,10 +612,6 @@ public void setDelimiter(int delimiter) { this.delimiter = delimiter; } - public char getQuote() { - return quoteCharacter; - } - public void setQuoteCharacter(char quoteCharacter) { this.quoteCharacter = quoteCharacter; } @@ -704,11 +679,4 @@ private String[] createHeaders(int size, List columns) { } return headers; } - - private void logMissingQuoteError() throws MissingArgumentException { - String message = "Quote character must be specified when using custom tokenizer."; - if (ExecutionConfig.isExitOnError()) { - throw new MissingArgumentException(message); - } else LOG.error(message); - } } diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/exception/MissingArgumentException.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/exception/MissingArgumentException.java new file mode 100644 index 00000000..4614d352 --- /dev/null +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/exception/MissingArgumentException.java @@ -0,0 +1,7 @@ +package cz.cvut.spipes.modules.exception; + +public class MissingArgumentException extends RuntimeException { + public MissingArgumentException(String message) { + super(); + } +} diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java new file mode 100644 index 00000000..765fd3de --- /dev/null +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java @@ -0,0 +1,106 @@ +package cz.cvut.spipes.modules.util; + +import cz.cvut.spipes.InvalidQuotingTokenizer; +import cz.cvut.spipes.modules.ResourceFormat; +import cz.cvut.spipes.modules.exception.MissingArgumentException; +import cz.cvut.spipes.modules.model.Region; +import cz.cvut.spipes.registry.StreamResource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.supercsv.io.CsvListReader; +import org.supercsv.io.ICsvListReader; +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class CSVStreamReaderAdapter implements StreamReaderAdapter { + private ICsvListReader listReader; + private CsvPreference csvPreference; + String [] header = null; + String [] firstRow = null; + boolean acceptInvalidQuoting; + Charset inputCharset; + StreamResource sourceResource; + private final static Logger log = LoggerFactory.getLogger(CSVStreamReaderAdapter.class); + + public CSVStreamReaderAdapter(char quoteCharacter, int delimiter) { + this.csvPreference = new CsvPreference.Builder(quoteCharacter, delimiter, System.lineSeparator()).build(); + } + + @Override + public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, + boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException { + this.acceptInvalidQuoting = acceptInvalidQuoting; + this.inputCharset = inputCharset; + this.sourceResource = sourceResource; + listReader = getCsvListReader(csvPreference); + if (listReader == null) { + throwMissingQuoteError(); + } + } + + @Override + public String[] getHeader(boolean skipHeader) throws IOException { + header = listReader.getHeader(true); + if (skipHeader) { + firstRow = header; + } + return header; + } + + @Override + public List getNextRow() throws IOException { + if (firstRow != null) { + List row = Arrays.asList(firstRow); + firstRow = null; + return row; + } + return listReader.read(); + } + + @Override + public List getMergedRegions() { + return new ArrayList<>(); + } + + @Override + public String getSheetLabel(){ + return null; + } + + @Override + public void close() throws IOException{ + listReader.close(); + } + + private ICsvListReader getCsvListReader(CsvPreference csvPreference) { + if (acceptInvalidQuoting) { + if (getQuote() == '\0') { + return null; + } else + return new CsvListReader(new InvalidQuotingTokenizer(getReader(), csvPreference), csvPreference); + } + return new CsvListReader(getReader(), csvPreference); + } + + private Reader getReader() { + return new StringReader(new String(sourceResource.getContent(), inputCharset)); + } + + public char getQuote() { + return csvPreference.getQuoteChar(); + } + + private void throwMissingQuoteError() throws MissingArgumentException { + String message = "Quote character must be specified when using custom tokenizer."; + log.error(message); + throw new MissingArgumentException(message); + } +} diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java new file mode 100644 index 00000000..a69b6160 --- /dev/null +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java @@ -0,0 +1,19 @@ +package cz.cvut.spipes.modules.util; + +import cz.cvut.spipes.modules.ResourceFormat; +import cz.cvut.spipes.modules.model.Region; +import cz.cvut.spipes.registry.StreamResource; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.List; + +public interface StreamReaderAdapter { + void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException; + String[] getHeader(boolean skipHeader) throws IOException; + List getNextRow() throws IOException; + List getMergedRegions(); + String getSheetLabel() throws IOException; + void close() throws IOException; +} From 03a92728d4c5299ce08e7ceb9ab38245b4a41385 Mon Sep 17 00:00:00 2001 From: Miroslav Blasko Date: Tue, 7 Jan 2025 18:10:49 +0100 Subject: [PATCH 3/5] [#228] Tabular Module now uses adapters --- .../cz/cvut/spipes/modules/TabularModule.java | 66 ++++----- .../modules/util/HTMLStreamReaderAdapter.java | 130 ++++++++++++++++++ .../modules/util/XLSStreamReaderAdapter.java | 114 +++++++++++++++ 3 files changed, 268 insertions(+), 42 deletions(-) create mode 100644 s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java create mode 100644 s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java index cb9d4136..91972aac 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java @@ -14,7 +14,6 @@ import cz.cvut.spipes.exception.SPipesException; import cz.cvut.spipes.modules.annotations.SPipesModule; import cz.cvut.spipes.modules.exception.MissingArgumentException; -import cz.cvut.spipes.modules.exception.SheetDoesntExistsException; import cz.cvut.spipes.modules.exception.SheetIsNotSpecifiedException; import cz.cvut.spipes.modules.exception.SpecificationNonComplianceException; import cz.cvut.spipes.modules.handlers.ModeHandler; @@ -197,10 +196,7 @@ ExecutionContext executeSelf() { if (processTableAtIndex != 1) { throw new UnsupportedOperationException("Support for 'process-table-at-index' different from 1 is not implemented for HTML files yet."); } - tsvConvertor = new HTML2TSVConvertor(processTableAtIndex); - table.setLabel(tsvConvertor.getTableName(sourceResource)); - setSourceResource(tsvConvertor.convertToTSV(sourceResource)); - streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, '\t'); + streamReaderAdapter = new HTMLStreamReaderAdapter(); break; case XLS: case XLSM: @@ -208,19 +204,7 @@ ExecutionContext executeSelf() { if (processTableAtIndex == 0) { throw new SheetIsNotSpecifiedException("Source resource format is set to XLS(X,M) file but no specific table is set for processing."); } - tsvConvertor = new XLS2TSVConvertor(processTableAtIndex, sourceResourceFormat); - int numberOfSheets = tsvConvertor.getTablesCount(sourceResource); - table.setLabel(tsvConvertor.getTableName(sourceResource)); - LOG.debug("Number of sheets: {}", numberOfSheets); - if ((processTableAtIndex > numberOfSheets) || (processTableAtIndex < 1)) { - LOG.error("Requested sheet doesn't exist, number of sheets in the doc: {}, requested sheet: {}", - numberOfSheets, - processTableAtIndex - ); - throw new SheetDoesntExistsException("Requested sheet doesn't exists."); - } - setSourceResource(tsvConvertor.convertToTSV(sourceResource)); - streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, '\t'); + streamReaderAdapter = new XLSStreamReaderAdapter(); break; default: streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, delimiter); @@ -320,14 +304,6 @@ ExecutionContext executeSelf() { // 4.6.8.7 - else, if cellValue is not null } } - streamReaderAdapter.close(); - } catch (MissingArgumentException e) { - if (ExecutionConfig.isExitOnError()) { - return getExecutionContext(inputModel, outputModel); - } - } catch (IOException e) { - LOG.error("Error while reading file from resource uri {}", sourceResource, e); - } tableSchema.adjustProperties(hasInputSchema, outputColumns, sourceResource.getUri()); tableSchema.setColumnsSet(new HashSet<>(outputColumns)); @@ -337,26 +313,32 @@ ExecutionContext executeSelf() { em.persist(tableGroup); em.merge(tableSchema); - if (tsvConvertor != null) { - List regions = tsvConvertor.getMergedRegions(originalSourceResource); - - int cellsNum = 1; - for (Region region : regions) { - int firstCellInRegionNum = cellsNum; - for (int i = region.getFirstRow(); i <= region.getLastRow(); i++) { - for (int j = region.getFirstColumn(); j <= region.getLastColumn(); j++) { - Cell cell = new Cell(sourceResource.getUri() + "#cell" + cellsNum); - cell.setRow(tableSchema.createAboutUrl(i)); - cell.setColumn(outputColumns.get(j).getUri().toString()); - if (cellsNum != firstCellInRegionNum) { - cell.setSameValueAsCell(sourceResource.getUri() + "#cell" + firstCellInRegionNum); - } - em.merge(cell); - cellsNum++; + List regions = streamReaderAdapter.getMergedRegions(); + + int cellsNum = 1; + for (Region region : regions) { + int firstCellInRegionNum = cellsNum; + for (int i = region.getFirstRow(); i <= region.getLastRow(); i++) { + for (int j = region.getFirstColumn(); j <= region.getLastColumn(); j++) { + Cell cell = new Cell(sourceResource.getUri() + "#cell" + cellsNum); + cell.setRow(tableSchema.createAboutUrl(i)); + cell.setColumn(outputColumns.get(j).getUri().toString()); + if (cellsNum != firstCellInRegionNum) { + cell.setSameValueAsCell(sourceResource.getUri() + "#cell" + firstCellInRegionNum); } + em.merge(cell); + cellsNum++; } } } + streamReaderAdapter.close(); + } catch (MissingArgumentException e) { + if (ExecutionConfig.isExitOnError()) { + return getExecutionContext(inputModel, outputModel); + } + } catch (IOException e) { + LOG.error("Error while reading file from resource uri {}", sourceResource, e); + } em.getTransaction().commit(); Model persistedModel = JopaPersistenceUtils.getDataset(em).getDefaultModel(); diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java new file mode 100644 index 00000000..44ddb5e5 --- /dev/null +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java @@ -0,0 +1,130 @@ +package cz.cvut.spipes.modules.util; + +import cz.cvut.spipes.modules.ResourceFormat; +import cz.cvut.spipes.modules.model.Region; +import cz.cvut.spipes.registry.StreamResource; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.*; +import java.nio.charset.Charset; +import java.util.*; + +public class HTMLStreamReaderAdapter implements StreamReaderAdapter { + private Elements rows; + private int currentIndex; + private Element table; + private String label; + + private List mergedRegions; + private Map> mergedCells; + + @Override + public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, + int tableIndex, boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException { + Document doc = Jsoup.parse(inputStream, "UTF-8", ""); + Element table = doc.select("table").first(); + rows = table.select("tr"); + currentIndex = 0; + this.table = table; + mergedRegions = extractMergedRegions(table); + mergedCells = new HashMap<>(); + label = table.attr("data-name"); + } + + + @Override + public String[] getHeader(boolean skipHeader) throws IOException { + Elements headerCells = rows.get(0).select("th, td"); + return headerCells.stream() + .map(Element::text) + .toArray(String[]::new); + } + + private boolean hasNextRow() { + return currentIndex < rows.size() - 1; // Skip header row + } + + @Override + public List getNextRow() { + if (!hasNextRow()) { + return null; + } + + currentIndex++; + Elements cells = rows.get(currentIndex).select("td, th"); + List row = new ArrayList<>(); + int cellIndex = 0; + + for (Element cell : cells) { + int colspan = Integer.parseInt(cell.attr("colspan").isEmpty() ? "1" : cell.attr("colspan")); + int rowspan = Integer.parseInt(cell.attr("rowspan").isEmpty() ? "1" : cell.attr("rowspan")); + String cellValue = cell.text(); + + if (cellValue != null && cellValue.matches("[-+]?[0-9]*\\,?[0-9]+")) { + cellValue = cellValue.replace(",", "."); + } + + while (row.size() < cellIndex) { + row.add(null); + } + + row.add(cellValue); + + for (int i = 1; i < colspan; i++) { + row.add(null); + } + + if (rowspan > 1) { + for (int i = 1; i < rowspan; i++) { + mergedCells.computeIfAbsent(currentIndex + i, k -> new HashMap<>()).put(cellIndex, cellValue); + } + } + + cellIndex += colspan; + } + + if (mergedCells.containsKey(currentIndex)) { + Map rowMergedCells = mergedCells.get(currentIndex); + for (Map.Entry entry : rowMergedCells.entrySet()) { + row.add(entry.getKey(), null); + } + mergedCells.remove(currentIndex); + } + + return row; + } + + @Override + public List getMergedRegions() { + return mergedRegions; + } + + private List extractMergedRegions(Element table) { + List regions = new ArrayList<>(); + Elements rows = table.select("tr"); + for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) { + Elements cells = rows.get(rowIndex).select("td, th"); + for (int colIndex = 0; colIndex < cells.size(); colIndex++) { + Element cell = cells.get(colIndex); + int colspan = Integer.parseInt(cell.attr("colspan").isEmpty() ? "1" : cell.attr("colspan")); + int rowspan = Integer.parseInt(cell.attr("rowspan").isEmpty() ? "1" : cell.attr("rowspan")); + if (colspan > 1 || rowspan > 1) { + regions.add(new Region(rowIndex, colIndex, rowIndex + rowspan - 1, colIndex + colspan - 1)); + } + } + } + return regions; + } + + @Override + public String getSheetLabel(){ + return label; + } + + @Override + public void close() { + } +} diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java new file mode 100644 index 00000000..6e29a321 --- /dev/null +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java @@ -0,0 +1,114 @@ +package cz.cvut.spipes.modules.util; + +import cz.cvut.spipes.modules.ResourceFormat; +import cz.cvut.spipes.modules.exception.SheetDoesntExistsException; +import cz.cvut.spipes.modules.model.Region; +import cz.cvut.spipes.registry.StreamResource; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +public class XLSStreamReaderAdapter implements StreamReaderAdapter { + private Sheet sheet; + private Iterator rowIterator; + Boolean skipHeader; + private static final Logger LOG = LoggerFactory.getLogger(XLSStreamReaderAdapter.class); + + @Override + public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, + boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException { + Workbook workbook; + if (sourceResourceFormat == ResourceFormat.XLS) { + workbook = new HSSFWorkbook(inputStream); + } else { + workbook = new XSSFWorkbook(inputStream); + } + if ((tableIndex > workbook.getNumberOfSheets()) || (tableIndex < 1)) { + LOG.error("Requested sheet doesn't exist, number of sheets in the doc: {}, requested sheet: {}", + workbook.getNumberOfSheets(), + tableIndex + ); + throw new SheetDoesntExistsException("Requested sheet doesn't exists."); + } + sheet = workbook.getSheetAt(tableIndex - 1); + rowIterator = sheet.iterator(); + } + + @Override + public String[] getHeader(boolean skipHeader) throws IOException { + Row headerRow = sheet.getRow(0); + if (skipHeader) { + return null; + } + else { + rowIterator.next(); // move iterator to 2nd row + return StreamSupport.stream(headerRow.spliterator(), false) + .map(cell -> cell.getStringCellValue()) + .toArray(String[]::new); + } + } + + @Override + public List getNextRow() { + if (!rowIterator.hasNext()) + return null; + Row currentRow = rowIterator.next(); + DataFormatter formatter = new DataFormatter(); + List row = StreamSupport.stream(currentRow.spliterator(), false) + .map(cell -> { + String cellValue = formatter.formatCellValue(cell); + cellValue = fixNumberFormat(cellValue); + return cellValue.isEmpty() ? null : cellValue; + }) + .collect(Collectors.toList()); + return row; + } + + @Override + public List getMergedRegions() { + List regions = new ArrayList<>(); + for (int i = 0; i < sheet.getNumMergedRegions(); i++) { + CellRangeAddress region = sheet.getMergedRegion(i); + regions.add(new Region( + region.getFirstRow(), + region.getFirstColumn(), + region.getLastRow(), + region.getLastColumn() + )); + } + return regions; + } + + @Override + public String getSheetLabel(){ + return sheet.getSheetName(); + } + + public String fixNumberFormat (String cellValue){ + //xls uses ',' as decimal separator, so we should convert it to '.' + if (cellValue != null && cellValue.matches("[-+]?[0-9]*\\,?[0-9]+")) { + cellValue = cellValue.replace(",", "."); + } + return cellValue; + } + + @Override + public void close() { + } +} + From 18a893b3e8aff559051c3a2fa6b758e192f03ac2 Mon Sep 17 00:00:00 2001 From: Evgenii Grigorev Date: Wed, 8 Jan 2025 11:16:02 +0100 Subject: [PATCH 4/5] [#228] Implemented suggested changes --- .../cz/cvut/spipes/modules/TabularModule.java | 24 +++++++++++++++++-- .../modules/util/CSVStreamReaderAdapter.java | 9 ++++--- .../modules/util/HTMLStreamReaderAdapter.java | 3 +-- .../modules/util/StreamReaderAdapter.java | 5 ++-- .../modules/util/XLSStreamReaderAdapter.java | 5 ++-- 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java index 91972aac..22b94eeb 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java @@ -91,6 +91,26 @@ * Does not support custom table URIs.
* Does not support processing of multiple files.
* Does not support the suppress output annotation. + * + * The header processing uses this logic: + * + * If we have a schema, and we should skip the header: + * - not calling getHeader() + * - assume that data looks like in schema + * + * If we have a schema,and we should not skip the header: + * - calling getHeader() + * - adapt schema to match header of the file + * - if ordering is not specified use ordering or the header + * - reuse column IRIs from Schema + * + * If we don't have a schema, and we should skip the header: + * - not calling getHeader() + * - create column names column_1, column_2, etc. + * + * If we don't have a schema, and we should not skip the header: + * - calling getHeader() + * - create schema entirely based on the header */ @SPipesModule(label = "Tabular module", comment = "Module for converting tabular data (e.g. CSV or TSV) to RDF") public class TabularModule extends AnnotatedAbstractModule { @@ -207,7 +227,7 @@ ExecutionContext executeSelf() { streamReaderAdapter = new XLSStreamReaderAdapter(); break; default: - streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, delimiter); + streamReaderAdapter = new CSVStreamReaderAdapter(quoteCharacter, delimiter, acceptInvalidQuoting, inputCharset); break; } @@ -224,7 +244,7 @@ ExecutionContext executeSelf() { try { streamReaderAdapter.initialise(new ByteArrayInputStream(sourceResource.getContent()), - sourceResourceFormat, processTableAtIndex, acceptInvalidQuoting, inputCharset, sourceResource); + sourceResourceFormat, processTableAtIndex, sourceResource); String[] header = streamReaderAdapter.getHeader(skipHeader);; Set columnNames = new HashSet<>(); diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java index 765fd3de..03faafd3 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/CSVStreamReaderAdapter.java @@ -30,15 +30,14 @@ public class CSVStreamReaderAdapter implements StreamReaderAdapter { StreamResource sourceResource; private final static Logger log = LoggerFactory.getLogger(CSVStreamReaderAdapter.class); - public CSVStreamReaderAdapter(char quoteCharacter, int delimiter) { + public CSVStreamReaderAdapter(char quoteCharacter, int delimiter, boolean acceptInvalidQuoting, Charset inputCharset) { this.csvPreference = new CsvPreference.Builder(quoteCharacter, delimiter, System.lineSeparator()).build(); + this.acceptInvalidQuoting = acceptInvalidQuoting; + this.inputCharset = inputCharset; } @Override - public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, - boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException { - this.acceptInvalidQuoting = acceptInvalidQuoting; - this.inputCharset = inputCharset; + public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, StreamResource sourceResource) throws IOException { this.sourceResource = sourceResource; listReader = getCsvListReader(csvPreference); if (listReader == null) { diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java index 44ddb5e5..d239b2c7 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/HTMLStreamReaderAdapter.java @@ -9,7 +9,6 @@ import org.jsoup.select.Elements; import java.io.*; -import java.nio.charset.Charset; import java.util.*; public class HTMLStreamReaderAdapter implements StreamReaderAdapter { @@ -23,7 +22,7 @@ public class HTMLStreamReaderAdapter implements StreamReaderAdapter { @Override public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, - int tableIndex, boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException { + int tableIndex, StreamResource sourceResource) throws IOException { Document doc = Jsoup.parse(inputStream, "UTF-8", ""); Element table = doc.select("table").first(); rows = table.select("tr"); diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java index a69b6160..41da79b4 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/StreamReaderAdapter.java @@ -6,14 +6,13 @@ import java.io.IOException; import java.io.InputStream; -import java.nio.charset.Charset; import java.util.List; public interface StreamReaderAdapter { - void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException; + void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, StreamResource sourceResource) throws IOException; String[] getHeader(boolean skipHeader) throws IOException; List getNextRow() throws IOException; List getMergedRegions(); - String getSheetLabel() throws IOException; + String getSheetLabel(); void close() throws IOException; } diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java index 6e29a321..88d1b56c 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/util/XLSStreamReaderAdapter.java @@ -16,7 +16,6 @@ import java.io.IOException; import java.io.InputStream; -import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -30,14 +29,14 @@ public class XLSStreamReaderAdapter implements StreamReaderAdapter { private static final Logger LOG = LoggerFactory.getLogger(XLSStreamReaderAdapter.class); @Override - public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, - boolean acceptInvalidQuoting, Charset inputCharset, StreamResource sourceResource) throws IOException { + public void initialise(InputStream inputStream, ResourceFormat sourceResourceFormat, int tableIndex, StreamResource sourceResource) throws IOException { Workbook workbook; if (sourceResourceFormat == ResourceFormat.XLS) { workbook = new HSSFWorkbook(inputStream); } else { workbook = new XSSFWorkbook(inputStream); } + LOG.debug("Number of sheets: {}", workbook.getNumberOfSheets()); if ((tableIndex > workbook.getNumberOfSheets()) || (tableIndex < 1)) { LOG.error("Requested sheet doesn't exist, number of sheets in the doc: {}, requested sheet: {}", workbook.getNumberOfSheets(), From 0119833389a9e1d2d7e2eb90337371b7aaa7a4ac Mon Sep 17 00:00:00 2001 From: Miroslav Blasko Date: Wed, 8 Jan 2025 13:42:07 +0100 Subject: [PATCH 5/5] [#228] Improve documentation about data schema --- .../cz/cvut/spipes/modules/TabularModule.java | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java index 22b94eeb..43c51df0 100644 --- a/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java +++ b/s-pipes-modules/module-tabular/src/main/java/cz/cvut/spipes/modules/TabularModule.java @@ -57,7 +57,7 @@ *

The implementation loosely follows the W3C Recommendation described here: * Generating RDF from Tabular Data on the Web

*

- * Within the recommendation, it is possible to define schema + * Within the recommendation, it is possible to specify schema * defining the shape of the output RDF data * (i.e. the input metadata values used for the conversion) * using csvw:tableSchema.
@@ -78,6 +78,19 @@ * ] * ] * + * Table schema can be provided in the input RDF data ("input schema") and is also included in the output RDF data + * ("output schema") of this module. If the input schema is provided, the output schema should consistently extend it. + * Following situations can happen: + * 1) there is no input schema in the input RDF data of this module + * a) {@link TabularModule#skipHeader} is false -- the output schema is created based on the header of the input file + * b) {@link TabularModule#skipHeader} is true -- the output schema is created based on number of columns, + * where column names "column_1", "column_2", etc. + * 2) there is an input schema in the input RDF data of this module + * a) {@link TabularModule#skipHeader} is false -- the output schema is consistently extended from data. This is + * typically used when we have input data schema that does not define order of columns, while the output schema + * will be extended with this order based on the header of the input file. + * b) {@link TabularModule#skipHeader} is true -- the output schema is reused from the input RDF data + * *

* This module can also be used to process HTML tables, see option {@link TabularModule#sourceResourceFormat}. * First, the HTML table is converted to TSV while replacing "\t" with two spaces @@ -91,26 +104,6 @@ * Does not support custom table URIs.
* Does not support processing of multiple files.
* Does not support the suppress output annotation. - * - * The header processing uses this logic: - * - * If we have a schema, and we should skip the header: - * - not calling getHeader() - * - assume that data looks like in schema - * - * If we have a schema,and we should not skip the header: - * - calling getHeader() - * - adapt schema to match header of the file - * - if ordering is not specified use ordering or the header - * - reuse column IRIs from Schema - * - * If we don't have a schema, and we should skip the header: - * - not calling getHeader() - * - create column names column_1, column_2, etc. - * - * If we don't have a schema, and we should not skip the header: - * - calling getHeader() - * - create schema entirely based on the header */ @SPipesModule(label = "Tabular module", comment = "Module for converting tabular data (e.g. CSV or TSV) to RDF") public class TabularModule extends AnnotatedAbstractModule {