Skip to content

Commit

Permalink
[Upd] Add support for the merged cells in the xls files in TabularMod…
Browse files Browse the repository at this point in the history
…ule.
  • Loading branch information
rodionnv committed Oct 3, 2023
1 parent d864986 commit a975370
Show file tree
Hide file tree
Showing 9 changed files with 312 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public static Property extendedProperty(String local )
public static final String tableUri = uri + "table";
public static final String rowUri = uri + "row";
public static final String RowUri = uri + "Row";
public static final String CellUri = uri + "Cell";
public static final String URL = uri + "url";
public static final String rowNumUri = uri + "rownum";
public static final String describesUri = uri + "describes";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ protected static Property property(String local )

public static final String propertyUri = uri + "property";

public static final String sameValueAsUri = uri + "same-value-as";

public static final Property property = ResourceFactory.createProperty(propertyUri);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ ExecutionContext executeSelf() {
tableGroup = onTableGroup(null);
table = onTable(null);

StreamResource originalSourceResource = sourceResource;

switch (sourceResourceFormat) {
case HTML:
HTML2TSVConvertor htmlConvertor = new HTML2TSVConvertor();
Expand Down Expand Up @@ -332,6 +334,27 @@ ExecutionContext executeSelf() {
em.getTransaction().begin();
em.persist(tableGroup);
em.merge(tableSchema);

if(sourceResourceFormat == ResourceFormat.EXCEL) {
XLS2TSVConvertor xls2TSVConvertor = new XLS2TSVConvertor();
List<Region> regions = xls2TSVConvertor.getMergedRegions(originalSourceResource, processSpecificSheetInXLSFile);
int cellsNum = 1;
for (Region region : regions) {
int firstCellInRegionNum = cellsNum;
for(int i = region.getFirstRow();i <= region.getLastRow();i++){
for(int j = region.getFirstColumn();j <= region.getLastColumn();j++) {
Cell cell = new Cell("http://example.org/cell"+(cellsNum));
cell.setRowName(tableSchema.createAboutUrl(i));
cell.setColumnName(outputColumns.get(j).getUri().toString());
if(cellsNum != firstCellInRegionNum)
cell.setSameValueAsCell("http://example.org/cell"+(firstCellInRegionNum));
em.merge(cell);
cellsNum++;
}
}
}
}

em.getTransaction().commit();
Model persistedModel = JopaPersistenceUtils.getDataset(em).getDefaultModel();
em.getEntityManagerFactory().close();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package cz.cvut.spipes.modules.model;

import cz.cvut.kbss.jopa.model.annotations.OWLAnnotationProperty;
import cz.cvut.kbss.jopa.model.annotations.OWLClass;
import cz.cvut.spipes.constants.CSVW;
import cz.cvut.spipes.constants.KBSS_CSVW;
import cz.cvut.spipes.modules.util.TabularModuleUtils;

import java.net.URI;
import java.net.URISyntaxException;

@OWLClass(iri = CSVW.CellUri)
public class Cell extends AbstractEntity{
public Cell() {}

public Cell(String cellUri) {
try {
this.setUri(new URI(cellUri));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}

@OWLAnnotationProperty(iri = CSVW.nameUri)
private String name;

@OWLAnnotationProperty(iri = CSVW.RowUri)
private String rowName;

@OWLAnnotationProperty(iri = CSVW.ColumnUri)
private String columnName;

@OWLAnnotationProperty(iri = KBSS_CSVW.sameValueAsUri)
private String sameValueAsCell;

private final transient TabularModuleUtils tabularModuleUtils = new TabularModuleUtils();

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getSameValueAsCell() {
return sameValueAsCell;
}

public void setSameValueAsCell(String sameValueAsCell) {
this.sameValueAsCell = sameValueAsCell;
}

public void setRowName(String rowName) {
tabularModuleUtils.setVariable(this.rowName, rowName, value -> this.rowName = value, "rowName");
}

public String getRowName() {
return rowName;
}

public void setColumnName(String columnName) {
tabularModuleUtils.setVariable(this.columnName, columnName, value -> this.columnName = value, "columnName");
}

public String getColumnName() {
return columnName;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package cz.cvut.spipes.modules.model;

public class Region {
private int firstRow;
private int lastRow;
private int firstColumn;
private int lastColumn;
public Region(int firstRow, int firstColumn, int lastRow, int lastColumn) {
this.firstRow = firstRow;
this.firstColumn = firstColumn;
this.lastRow = lastRow;
this.lastColumn = lastColumn;
}

public int getFirstRow() {
return firstRow;
}

public void setFirstRow(int firstRow) {
this.firstRow = firstRow;
}

public int getLastRow() {
return lastRow;
}

public void setLastRow(int lastRow) {
this.lastRow = lastRow;
}

public int getFirstColumn() {
return firstColumn;
}

public void setFirstColumn(int firstColumn) {
this.firstColumn = firstColumn;
}

public int getLastColumn() {
return lastColumn;
}

public void setLastColumn(int lastColumn) {
this.lastColumn = lastColumn;
}
}
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
package cz.cvut.spipes.modules.util;

import cz.cvut.spipes.modules.ResourceFormat;
import cz.cvut.spipes.modules.model.Region;
import cz.cvut.spipes.registry.StreamResource;
import cz.cvut.spipes.registry.StringStreamResource;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Module for converting tabular data from XLS to TSV. Converts specific sheet of the xls file.
*/
public class XLS2TSVConvertor {

private static final Logger LOG = LoggerFactory.getLogger(XLS2TSVConvertor.class);

public StringStreamResource convertToTSV(StreamResource streamResource,int sheetNumber){
try {
Workbook workbook = new HSSFWorkbook(new ByteArrayInputStream(streamResource.getContent()));
Expand All @@ -42,6 +50,28 @@ public StringStreamResource convertToTSV(StreamResource streamResource,int sheet
}
}

public List<Region> getMergedRegions(StreamResource streamResource, int sheetNumber){
Workbook workbook;
List<Region> list = new ArrayList<>();
try {
workbook = new HSSFWorkbook(new ByteArrayInputStream(streamResource.getContent()));
Sheet sheet = workbook.getSheetAt(sheetNumber-1);

for(int i = 0;i < sheet.getNumMergedRegions();i++){
CellRangeAddress region = sheet.getMergedRegion(i);
list.add(new Region(
region.getFirstRow(),
region.getFirstColumn(),
region.getLastRow(),
region.getLastColumn())
);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return list;
}

public int getNumberOfSheets(StreamResource streamResource){
try {
return new HSSFWorkbook(new ByteArrayInputStream(streamResource.getContent())).getNumberOfSheets();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ void executeWithSimpleTransformationXls() throws URISyntaxException, IOException
}

@Test
@Disabled
void executeWithSimpleTransformationMergedXls() throws URISyntaxException, IOException {
module.setSourceResource(
StreamResourceUtils.getStreamResource(
Expand All @@ -95,7 +94,9 @@ void executeWithSimpleTransformationMergedXls() throws URISyntaxException, IOExc

ExecutionContext outputContext = module.executeSelf();

assertTrue(outputContext.getDefaultModel().size() > 0);
Model expectedModel = ModelFactory.createDefaultModel().read(getFilePath("merged-xls-model-output.ttl").toString());

assertIsomorphic(outputContext.getDefaultModel(),expectedModel);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
<http://example.org/cell5>
a <http://www.w3.org/ns/csvw#Cell> ;
<http://www.w3.org/ns/csvw#Column>
_:b0 ;
<http://www.w3.org/ns/csvw#Row>
<http://test-file#row-4> ;
<https://onto.fel.cvut.cz/ontologies/extension/csvw/same-value-as>
<http://example.org/cell4> .

_:b0 a <http://www.w3.org/ns/csvw#Column> ;
<http://www.w3.org/ns/csvw#name>
"aa" ;
<http://www.w3.org/ns/csvw#propertyUrl>
"http://onto.fel.cvut.cz/data/aa" ;
<http://www.w3.org/ns/csvw#title>
"aa" ;
<https://onto.fel.cvut.cz/ontologies/extension/csvw/property>
<http://onto.fel.cvut.cz/data/aa> .

<http://test-file#row-4>
<http://onto.fel.cvut.cz/data/bb>
"ff" ;
<http://onto.fel.cvut.cz/data/cc>
"gg" .

<http://example.org/cell4>
a <http://www.w3.org/ns/csvw#Cell> ;
<http://www.w3.org/ns/csvw#Column>
_:b0 ;
<http://www.w3.org/ns/csvw#Row>
<http://test-file#row-3> .

[ a <http://www.w3.org/ns/csvw#TableGroup> ;
<http://www.w3.org/ns/csvw#table>
[ a <http://www.w3.org/ns/csvw#Table> ;
<http://www.w3.org/2000/01/rdf-schema#label>
"Sheet1" ;
<http://www.w3.org/ns/csvw#row>
[ a <http://www.w3.org/ns/csvw#Row> ;
<http://www.w3.org/ns/csvw#describes>
<http://test-file#row-2> ;
<http://www.w3.org/ns/csvw#rownum>
"1"^^<http://www.w3.org/2001/XMLSchema#int> ;
<http://www.w3.org/ns/csvw#url>
<http://test-file#row=2>
] ;
<http://www.w3.org/ns/csvw#row>
[ a <http://www.w3.org/ns/csvw#Row> ;
<http://www.w3.org/ns/csvw#describes>
<http://test-file#row-4> ;
<http://www.w3.org/ns/csvw#rownum>
"3"^^<http://www.w3.org/2001/XMLSchema#int> ;
<http://www.w3.org/ns/csvw#url>
<http://test-file#row=4>
] ;
<http://www.w3.org/ns/csvw#row>
[ a <http://www.w3.org/ns/csvw#Row> ;
<http://www.w3.org/ns/csvw#describes>
<http://test-file#row-3> ;
<http://www.w3.org/ns/csvw#rownum>
"2"^^<http://www.w3.org/2001/XMLSchema#int> ;
<http://www.w3.org/ns/csvw#url>
<http://test-file#row=3>
] ;
<http://www.w3.org/ns/csvw#tableSchema>
[ a <http://www.w3.org/ns/csvw#TableSchema> ;
<http://www.w3.org/ns/csvw#aboutUrl>
"http://test-file#row-{_row}"^^<http://www.w3.org/ns/csvw#uriTemplate> ;
<http://www.w3.org/ns/csvw#column>
_:b1 , _:b0 , _:b2 ;
<http://www.w3.org/ns/csvw#columns>
( _:b0
_:b2
_:b1
)
] ;
<http://www.w3.org/ns/csvw#url>
<http://test-file>
]
] .

<http://test-file#row-3>
<http://onto.fel.cvut.cz/data/aa>
"merged rows" ;
<http://onto.fel.cvut.cz/data/bb>
"dd" ;
<http://onto.fel.cvut.cz/data/cc>
"ee" .

<http://example.org/cell3>
a <http://www.w3.org/ns/csvw#Cell> ;
<http://www.w3.org/ns/csvw#Column>
_:b1 ;
<http://www.w3.org/ns/csvw#Row>
<http://test-file#row-2> ;
<https://onto.fel.cvut.cz/ontologies/extension/csvw/same-value-as>
<http://example.org/cell1> .

<http://test-file#row-2>
<http://onto.fel.cvut.cz/data/aa>
"merged columns" .

<http://example.org/cell2>
a <http://www.w3.org/ns/csvw#Cell> ;
<http://www.w3.org/ns/csvw#Column>
_:b2 ;
<http://www.w3.org/ns/csvw#Row>
<http://test-file#row-2> ;
<https://onto.fel.cvut.cz/ontologies/extension/csvw/same-value-as>
<http://example.org/cell1> .

<http://example.org/cell1>
a <http://www.w3.org/ns/csvw#Cell> ;
<http://www.w3.org/ns/csvw#Column>
_:b0 ;
<http://www.w3.org/ns/csvw#Row>
<http://test-file#row-2> .

_:b2 a <http://www.w3.org/ns/csvw#Column> ;
<http://www.w3.org/ns/csvw#name>
"bb" ;
<http://www.w3.org/ns/csvw#propertyUrl>
"http://onto.fel.cvut.cz/data/bb" ;
<http://www.w3.org/ns/csvw#title>
"bb" ;
<https://onto.fel.cvut.cz/ontologies/extension/csvw/property>
<http://onto.fel.cvut.cz/data/bb> .

_:b1 a <http://www.w3.org/ns/csvw#Column> ;
<http://www.w3.org/ns/csvw#name>
"cc" ;
<http://www.w3.org/ns/csvw#propertyUrl>
"http://onto.fel.cvut.cz/data/cc" ;
<http://www.w3.org/ns/csvw#title>
"cc" ;
<https://onto.fel.cvut.cz/ontologies/extension/csvw/property>
<http://onto.fel.cvut.cz/data/cc> .
Binary file not shown.

0 comments on commit a975370

Please sign in to comment.