From 706c9e0ecc7044bfae4c1153010ca6be7ee6a95d Mon Sep 17 00:00:00 2001 From: Jyrki Oraskari <31693668+jyrkioraskari@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:03:35 +0100 Subject: [PATCH] \\X\\ decoding for Unicode as well as for \\u codes. --- .../ifc2lbd/core/utils/IfcOWLUtils.java | 150 +++-- .../ifc2lbd/core/utils/StringOperations.java | 258 +++++++- .../ifc2lbd/core/valuesets/AttributeSet.java | 9 +- .../ifc2lbd/core/valuesets/PropertySet.java | 624 +++++++++--------- .../ifc2lbd/core/valuesets/QuantitySet.java | 8 +- IFCtoLBD_Geometry/.classpath | 1 - 6 files changed, 656 insertions(+), 394 deletions(-) diff --git a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/IfcOWLUtils.java b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/IfcOWLUtils.java index 79ad67af..1ec20937 100644 --- a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/IfcOWLUtils.java +++ b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/IfcOWLUtils.java @@ -490,7 +490,50 @@ static public File characterCoding(File whole_content_file) { // UTF-8 fix for French double encoding line = line.replace("\\X\\0D", ""); line = line.replace("\\X\\0A", ""); - + + // For Scandinavian letters + line = line.replace("\\X\\C5", "Å"); + line = line.replace("\\X\\C4", "Ä"); + line = line.replace("\\X\\D6", "Ö"); + line = line.replace("\\X\\E5", "å"); + line = line.replace("\\X\\E4", "ä"); + line = line.replace("\\X\\F6", "ö"); + + // For Norwegian and Danish letters + line = line.replace("\\X\\C6", "Æ"); + line = line.replace("\\X\\D8", "Ø"); + line = line.replace("\\X\\E6", "æ"); + line = line.replace("\\X\\F8", "ø"); + + // For French letters + line = line.replace("\\X\\C0", "À"); + line = line.replace("\\X\\C7", "Ç"); + line = line.replace("\\X\\C8", "È"); + line = line.replace("\\X\\C9", "É"); + line = line.replace("\\X\\CA", "Ê"); + line = line.replace("\\X\\CB", "Ë"); + line = line.replace("\\X\\CC", "Ì"); + line = line.replace("\\X\\CE", "Î"); + line = line.replace("\\X\\CF", "Ï"); + line = line.replace("\\X\\D4", "Ô"); + line = line.replace("\\X\\D9", "Ù"); + line = line.replace("\\X\\DB", "Û"); + line = line.replace("\\X\\E0", "à"); + line = line.replace("\\X\\E7", "ç"); + line = line.replace("\\X\\E8", "è"); + line = line.replace("\\X\\E9", "é"); + line = line.replace("\\X\\EA", "ê"); + line = line.replace("\\X\\EB", "ë"); + line = line.replace("\\X\\EC", "ì"); + line = line.replace("\\X\\EE", "î"); + line = line.replace("\\X\\EF", "ï"); + line = line.replace("\\X\\F4", "ô"); + line = line.replace("\\X\\F9", "ù"); + line = line.replace("\\X\\FB", "û"); + line = line.replace("\\X\\FC", "ü"); + + + line = line.replace("\\X2\\00A0\\X0\\", ""); line = line.replace("\\X2\\00B0\\X0\\", "°"); // LATIN letters @@ -562,7 +605,7 @@ static public File characterCoding(File whole_content_file) { line = line.replace("\\X2\\00FE\\X0\\", "þ"); line = line.replace("\\X2\\00FF\\X0\\", "ÿ"); - line = unIFCUnicode(line); // multi-character decode + line = StringOperations.unIFCUnicode(line); // multi-character decode writer.write(line.trim()); writer.newLine(); } @@ -721,6 +764,50 @@ static public File filterContent(File whole_content_file) { // UTF-8 fix for French double encoding line = line.replace("\\X\\0D", " "); line = line.replace("\\X\\0A", ""); + + System.out.println("Character coding..........................................................."); + + // For Scandinavian letters + line = line.replace("\\X\\C5", "Å"); + line = line.replace("\\X\\C4", "Ä"); + line = line.replace("\\X\\D6", "Ö"); + line = line.replace("\\X\\E5", "å"); + line = line.replace("\\X\\E4", "ä"); + line = line.replace("\\X\\F6", "ö"); + + // For Norwegian and Danish letters + line = line.replace("\\X\\C6", "Æ"); + line = line.replace("\\X\\D8", "Ø"); + line = line.replace("\\X\\E6", "æ"); + line = line.replace("\\X\\F8", "ø"); + + // For French letters + line = line.replace("\\X\\C0", "À"); + line = line.replace("\\X\\C7", "Ç"); + line = line.replace("\\X\\C8", "È"); + line = line.replace("\\X\\C9", "É"); + line = line.replace("\\X\\CA", "Ê"); + line = line.replace("\\X\\CB", "Ë"); + line = line.replace("\\X\\CC", "Ì"); + line = line.replace("\\X\\CE", "Î"); + line = line.replace("\\X\\CF", "Ï"); + line = line.replace("\\X\\D4", "Ô"); + line = line.replace("\\X\\D9", "Ù"); + line = line.replace("\\X\\DB", "Û"); + line = line.replace("\\X\\E0", "à"); + line = line.replace("\\X\\E7", "ç"); + line = line.replace("\\X\\E8", "è"); + line = line.replace("\\X\\E9", "é"); + line = line.replace("\\X\\EA", "ê"); + line = line.replace("\\X\\EB", "ë"); + line = line.replace("\\X\\EC", "ì"); + line = line.replace("\\X\\EE", "î"); + line = line.replace("\\X\\EF", "ï"); + line = line.replace("\\X\\F4", "ô"); + line = line.replace("\\X\\F9", "ù"); + line = line.replace("\\X\\FB", "û"); + line = line.replace("\\X\\FC", "ü"); + line = line.replace("\\X2\\00A0\\X0\\", " "); line = line.replace("\\X2\\00B0\\X0\\", "°"); @@ -793,7 +880,7 @@ static public File filterContent(File whole_content_file) { line = line.replace("\\X2\\00FE\\X0\\", "þ"); line = line.replace("\\X2\\00FF\\X0\\", "ÿ"); - line = unIFCUnicode(line); // multi-character decode + line = StringOperations.unIFCUnicode(line); // multi-character decode line = line.replace("\\", "\\\\"); line = line.replace("\\\\\"", "\\\""); @@ -836,63 +923,8 @@ static public File filterContent(File whole_content_file) { return tempFile; } - static private String unIFCUnicode(String txt) { - StringBuilder sb = new StringBuilder(); - StringBuilder su4 = new StringBuilder(); - int state = 0; - for (char ch : txt.toCharArray()) { - switch (state) { - default: - case 0: - if (ch == '\\') - state = 1; - else - sb.append(ch); - break; - case 1: - if (ch == 'X' || ch == 'x') - state = 2; - else - state = 0; - break; - case 2: - if (ch == '2' || ch == '4') - state = 3; - else - state = 0; - break; - case 3: - if (ch == '\\') - state = 4; - else - state = 0; - break; + - case 4: - if (ch == '\\') - state = 5; - else { - su4.append(ch); - if (su4.length() > 3) { - sb.append("\\u"); - sb.append(su4); - su4.setLength(0); - } - } - break; - case 5: - if (ch == '\'') { - sb.append("'"); - state = 0; - } - if (ch == '/' || ch == '\\') - state = 0; - break; - } - } - return sb.toString();// StringEscapeUtils.unescapeJava(sb.toString()); // For some reasons this - // blocks - } @SuppressWarnings("deprecation") private static List split(String s) { diff --git a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/StringOperations.java b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/StringOperations.java index b2112cae..bc9b4914 100644 --- a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/StringOperations.java +++ b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/utils/StringOperations.java @@ -1,26 +1,29 @@ package org.linkedbuildingdata.ifc2lbd.core.utils; import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public abstract class StringOperations { /** * Converts a string into the CamelCase notation described in: * https://en.wikipedia.org/wiki/Camel_case - * - * @param txt a string to be formatetd - * @return a Camel Case formatted string + * + * @param txt a string to be formatetd + * @return a Camel Case formatted string */ static public String toCamelCase(String txt) { - if(txt.toUpperCase().equals(txt)) + if (txt.toUpperCase().equals(txt)) try { return URLEncoder.encode(txt.replace(" ", "_"), StandardCharsets.UTF_8.toString()); } catch (UnsupportedEncodingException e) { return txt.replace(" ", "_"); } - - txt=org.apache.commons.lang3.StringUtils.stripAccents(txt); + + txt = org.apache.commons.lang3.StringUtils.stripAccents(txt); if (txt == null) return null; @@ -49,38 +52,33 @@ static public String toCamelCase(String txt) { /** * Converts a CamelCase string into space separate words. * https://en.wikipedia.org/wiki/Camel_case - * - * @param txt a string to be formatetd - * @return an un-"CamelCased" formatted string + * + * @param txt a string to be formatetd + * @return an un-"CamelCased" formatted string */ static public String toUnCamelCase(final String txt) { if (txt == null) return null; StringBuilder ret = new StringBuilder(); - for(int i=0;i0 && Character.isUpperCase(c)) - { + for (int i = 0; i < txt.length(); i++) { + char c = txt.charAt(i); + if (i > 0 && Character.isUpperCase(c)) { ret.append(" "); ret.append(Character.toLowerCase(c)); - } - else - if(c=='_') + } else if (c == '_') ret.append(" "); else - ret.append(c); + ret.append(c); } - return ret.toString(); + return ret.toString(); } - - + /** * Removes all characters other than letters from a string * * @param txt A text string - * @return filtered content + * @return filtered content */ static private String filterCharaters(String txt) { StringBuilder ret = new StringBuilder(); @@ -92,5 +90,221 @@ static private String filterCharaters(String txt) { return ret.toString(); } + static public String handleUnicode(String line) { + try { + line = line.replace("\\\\", "\\"); + + // UTF-8 fix for French double encoding + line = line.replace("\\X\\0D", " "); + line = line.replace("\\X\\0A", ""); + line = line.replace("\\X\\22", "\""); + line = line.replace("\\X\\23", "#"); + line = line.replace("\\X\\26", "&"); + line = line.replace("\\X\\3A", ":"); + line = line.replace("\\X\\3D", "="); + line = line.replace("\\X\\3F", "?"); + + // For Scandinavian letters + line = line.replace("\\X\\C5", "Å"); + line = line.replace("\\X\\C4", "Ä"); + line = line.replace("\\X\\D6", "Ö"); + line = line.replace("\\X\\E5", "å"); + line = line.replace("\\X\\E4", "ä"); + line = line.replace("\\X\\F6", "ö"); + + // For Norwegian and Danish letters + line = line.replace("\\X\\C6", "Æ"); + line = line.replace("\\X\\D8", "Ø"); + line = line.replace("\\X\\E6", "æ"); + line = line.replace("\\X\\F8", "ø"); + + // For French letters + line = line.replace("\\X\\C0", "À"); + line = line.replace("\\X\\C7", "Ç"); + line = line.replace("\\X\\C8", "È"); + line = line.replace("\\X\\C9", "É"); + line = line.replace("\\X\\CA", "Ê"); + line = line.replace("\\X\\CB", "Ë"); + line = line.replace("\\X\\CC", "Ì"); + line = line.replace("\\X\\CE", "Î"); + line = line.replace("\\X\\CF", "Ï"); + line = line.replace("\\X\\D4", "Ô"); + line = line.replace("\\X\\D9", "Ù"); + line = line.replace("\\X\\DB", "Û"); + line = line.replace("\\X\\E0", "à"); + line = line.replace("\\X\\E7", "ç"); + line = line.replace("\\X\\E8", "è"); + line = line.replace("\\X\\E9", "é"); + line = line.replace("\\X\\EA", "ê"); + line = line.replace("\\X\\EB", "ë"); + line = line.replace("\\X\\EC", "ì"); + line = line.replace("\\X\\EE", "î"); + line = line.replace("\\X\\EF", "ï"); + line = line.replace("\\X\\F4", "ô"); + line = line.replace("\\X\\F9", "ù"); + line = line.replace("\\X\\FB", "û"); + line = line.replace("\\X\\FC", "ü"); + + line = line.replace("\\X2\\00A0\\X0\\", " "); + line = line.replace("\\X2\\00B0\\X0\\", "°"); + // LATIN letters + line = line.replace("\\X2\\00C0\\X0\\", "À"); + line = line.replace("\\X2\\00C1\\X0\\", "Ã�"); + line = line.replace("\\X2\\00C2\\X0\\", "Â"); + line = line.replace("\\X2\\00C3\\X0\\", "Ã"); + line = line.replace("\\X2\\00C4\\X0\\", "Ä"); + line = line.replace("\\X2\\00C5\\X0\\", "Ã…"); + line = line.replace("\\X2\\00C6\\X0\\", "Æ"); + line = line.replace("\\X2\\00C7\\X0\\", "Ç"); + line = line.replace("\\X2\\00C8\\X0\\", "È"); + line = line.replace("\\X2\\00C9\\X0\\", "É"); + line = line.replace("\\X2\\00CA\\X0\\", "Ê"); + line = line.replace("\\X2\\00CB\\X0\\", "Ë"); + line = line.replace("\\X2\\00CC\\X0\\", "ÃŒ"); + line = line.replace("\\X2\\00CD\\X0\\", "Ã�"); + line = line.replace("\\X2\\00CE\\X0\\", "ÃŽ"); + line = line.replace("\\X2\\00CF\\X0\\", "Ã�"); + + line = line.replace("\\X2\\00D0\\X0\\", "Ã�"); + line = line.replace("\\X2\\00D1\\X0\\", "Ñ"); + line = line.replace("\\X2\\00D2\\X0\\", "Ã’"); + line = line.replace("\\X2\\00D3\\X0\\", "Ó"); + line = line.replace("\\X2\\00D4\\X0\\", "Ô"); + line = line.replace("\\X2\\00D5\\X0\\", "Õ"); + line = line.replace("\\X2\\00D6\\X0\\", "Ö"); + line = line.replace("\\X2\\00D7\\X0\\", "×"); + line = line.replace("\\X2\\00D8\\X0\\", "Ø"); + line = line.replace("\\X2\\00D9\\X0\\", "Ù"); + line = line.replace("\\X2\\00DA\\X0\\", "Ú"); + line = line.replace("\\X2\\00DB\\X0\\", "Û"); + line = line.replace("\\X2\\00DC\\X0\\", "Ãœ"); + line = line.replace("\\X2\\00DD\\X0\\", "Ã�"); + line = line.replace("\\X2\\00DE\\X0\\", "Þ"); + line = line.replace("\\X2\\00DF\\X0\\", "ß"); + + line = line.replace("\\X2\\00E0\\X0\\", "à"); + line = line.replace("\\X2\\00E1\\X0\\", "á"); + line = line.replace("\\X2\\00E2\\X0\\", "â"); + line = line.replace("\\X2\\00E3\\X0\\", "ã"); + line = line.replace("\\X2\\00E4\\X0\\", "ä"); + line = line.replace("\\X2\\00E5\\X0\\", "Ã¥"); + line = line.replace("\\X2\\00E6\\X0\\", "æ"); + line = line.replace("\\X2\\00E7\\X0\\", "ç"); + line = line.replace("\\X2\\00E8\\X0\\", "è"); + line = line.replace("\\X2\\00E9\\X0\\", "é"); + line = line.replace("\\X2\\00EA\\X0\\", "ê"); + line = line.replace("\\X2\\00EB\\X0\\", "ê"); + line = line.replace("\\X2\\00EC\\X0\\", "ì"); + line = line.replace("\\X2\\00ED\\X0\\", "í"); + line = line.replace("\\X2\\00EE\\X0\\", "î"); + line = line.replace("\\X2\\00EF\\X0\\", "ï"); + + line = line.replace("\\X2\\00F0\\X0\\", "ð"); + line = line.replace("\\X2\\00F1\\X0\\", "ñ"); + line = line.replace("\\X2\\00F2\\X0\\", "ò"); + line = line.replace("\\X2\\00F3\\X0\\", "ó"); + line = line.replace("\\X2\\00F4\\X0\\", "ô"); + line = line.replace("\\X2\\00F5\\X0\\", "õ"); + line = line.replace("\\X2\\00F6\\X0\\", "ö"); + line = line.replace("\\X2\\00F7\\X0\\", "÷"); + line = line.replace("\\X2\\00F8\\X0\\", "ø"); + line = line.replace("\\X2\\00F9\\X0\\", "ù"); + line = line.replace("\\X2\\00FA\\X0\\", "ú"); + line = line.replace("\\X2\\00FB\\X0\\", "û"); + line = line.replace("\\X2\\00FC\\X0\\", "ü"); + line = line.replace("\\X2\\00FD\\X0\\", "ý"); + line = line.replace("\\X2\\00FE\\X0\\", "þ"); + line = line.replace("\\X2\\00FF\\X0\\", "ÿ"); + + line = unIFCUnicode(line); // multi-character decode + // line = line.replace("\\", "\\\\"); + // line = line.replace("\\\\\"", "\\\""); + } catch (Exception e) { + // Just catch it + e.printStackTrace(); + } + return line; + } + + static public String unIFCUnicode(String txt) { + StringBuilder sb = new StringBuilder(); + StringBuilder su4 = new StringBuilder(); + int state = 0; + for (char ch : txt.toCharArray()) { + switch (state) { + default: + case 0: + if (ch == '\\') + state = 1; + else + sb.append(ch); + break; + case 1: + if (ch == 'X' || ch == 'x') + state = 2; + else + state = 0; + break; + case 2: + if (ch == '2' || ch == '4') + state = 3; + else + state = 0; + break; + case 3: + if (ch == '\\') + state = 4; + else + state = 0; + break; + + case 4: + if (ch == '\\') + state = 5; + else { + su4.append(ch); + if (su4.length() > 3) { + sb.append("\\u"); + sb.append(su4); + su4.setLength(0); + } + } + break; + case 5: + if (ch == '\'') { + sb.append("'"); + state = 0; + } + if (ch == '/' || ch == '\\') + state = 0; + break; + } + } + + try { + return decodeUnicode(sb.toString()); + } catch (Exception e) { + e.printStackTrace(); + System.err.println("org was: " + txt); + System.err.println("was: " + sb.toString()); + return sb.toString();// StringEscapeUtils.unescapeJava(sb.toString()); // For some reasons this + // blocks + } + } + + public static String decodeUnicode(String str) { + Pattern pattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); + Matcher matcher = pattern.matcher(str); + StringBuffer decodedString = new StringBuffer(); + + while (matcher.find()) { + String unicodeChar = matcher.group(1); + char decodedChar = (char) Integer.parseInt(unicodeChar, 16); + matcher.appendReplacement(decodedString, Character.toString(decodedChar)); + } + matcher.appendTail(decodedString); + + return decodedString.toString(); + } } diff --git a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/AttributeSet.java b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/AttributeSet.java index e3838166..68ee682a 100644 --- a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/AttributeSet.java +++ b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/AttributeSet.java @@ -10,6 +10,7 @@ import java.util.Optional; import java.util.Set; +import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.RDFNode; @@ -97,7 +98,12 @@ public AttributeSet(String uriBase, Model lbd_model, int props_level, boolean ha } public void putAnameValue(String attribute_name, RDFNode value, Optional atype) { - mapPnameValue.put(StringOperations.toCamelCase(attribute_name), value); + if (value.isLiteral()) { + String value_string = value.asLiteral().getLexicalForm(); + Literal literal_value = this.lbd_model.createLiteral(StringOperations.handleUnicode(value_string)); + mapPnameValue.put(StringOperations.toCamelCase(attribute_name), literal_value); + } else + mapPnameValue.put(StringOperations.toCamelCase(attribute_name), value); if (atype.isPresent()) { mapPnameType.put(StringOperations.toCamelCase(attribute_name), atype.get()); } @@ -177,6 +183,7 @@ private List writeOPM_Set(String long_guid) { state_resourse.addProperty(OPM.value, this.mapPnameValue.get(pname)); addUnit(state_resourse, pname); + } else { property_resource.addProperty(OPM.value, this.mapPnameValue.get(pname)); addUnit(property_resource, pname); diff --git a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/PropertySet.java b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/PropertySet.java index 3c1a6ce3..3aa1fe77 100644 --- a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/PropertySet.java +++ b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/PropertySet.java @@ -1,5 +1,6 @@ package org.linkedbuildingdata.ifc2lbd.core.valuesets; +import java.nio.charset.StandardCharsets; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -48,315 +49,323 @@ * */ public class PropertySet { - private boolean isActive=true; - private final Map unitmap; - private Map property_replace_map; // allows users to replace default properties + private boolean isActive = true; + private final Map unitmap; + private Map property_replace_map; // allows users to replace default properties + + private static class PsetProperty { + final Property p; // Jena RDF property + final Resource r; // Jena RDF resource object + + public PsetProperty(Property p, Resource r) { + super(); + this.p = p; + this.r = r; + } + } - private static class PsetProperty { - final Property p; // Jena RDF property - final Resource r; // Jena RDF resource object + private final String uriBase; + private final Model lbd_model; + private String propertyset_name; - public PsetProperty(Property p, Resource r) { - super(); - this.p = p; - this.r = r; - } - } + private final int props_level; + private final boolean hasBlank_nodes; + private boolean hasSimplified_properties; + + private final Map mapPnameValue = new HashMap<>(); + private final Map mapPnameType = new HashMap<>(); + private final Map mapPnameUnit = new HashMap<>(); + private final Map mapBSDD = new HashMap<>(); + + private boolean is_bSDD_pset = false; + private Resource psetDef = null; + private final boolean hasUnits; + private static long pset_counter = 0; + private long pset_inx = 0; + private boolean done = false; + + public PropertySet(String uriBase, Model lbd_model, Model ontology_model, String propertyset_name, int props_level, + boolean hasBlank_nodes, Map unitmap, boolean hasUnits) { + this.unitmap = unitmap; + this.uriBase = uriBase; + this.lbd_model = lbd_model; + this.propertyset_name = propertyset_name; + this.props_level = props_level; + this.hasBlank_nodes = hasBlank_nodes; + this.hasUnits = hasUnits; + // System.out.println("pset name: " + this.propertyset_name); + StmtIterator iter = ontology_model.listStatements(null, PROPS.namePset, this.propertyset_name); + if (iter.hasNext()) { + // System.out.println("Pset bsdd match!"); + is_bSDD_pset = true; + psetDef = iter.next().getSubject(); + } + this.hasSimplified_properties = false; + PropertySet.pset_counter++; + this.pset_inx = PropertySet.pset_counter; + } - private final String uriBase; - private final Model lbd_model; - private String propertyset_name; + public void putPnameValue(String property_name, RDFNode value) { - private final int props_level; - private final boolean hasBlank_nodes; - private boolean hasSimplified_properties; - - private final Map mapPnameValue = new HashMap<>(); - private final Map mapPnameType = new HashMap<>(); - private final Map mapPnameUnit = new HashMap<>(); - private final Map mapBSDD = new HashMap<>(); - - private boolean is_bSDD_pset = false; - private Resource psetDef = null; - private final boolean hasUnits; - private static long pset_counter=0; - private long pset_inx=0; - private boolean done=false; - - public PropertySet(String uriBase, Model lbd_model, Model ontology_model, String propertyset_name, int props_level, boolean hasBlank_nodes, Map unitmap, boolean hasUnits) { - this.unitmap = unitmap; - this.uriBase = uriBase; - this.lbd_model = lbd_model; - this.propertyset_name = propertyset_name; - this.props_level = props_level; - this.hasBlank_nodes = hasBlank_nodes; - this.hasUnits = hasUnits; - //System.out.println("pset name: " + this.propertyset_name); - StmtIterator iter = ontology_model.listStatements(null, PROPS.namePset, this.propertyset_name); - if (iter.hasNext()) { - //System.out.println("Pset bsdd match!"); - is_bSDD_pset = true; - psetDef = iter.next().getSubject(); - } - this.hasSimplified_properties = false; - PropertySet.pset_counter++; - this.pset_inx=PropertySet.pset_counter; - } - - public void putPnameValue(String property_name, RDFNode value) { - - mapPnameValue.put(StringOperations.toCamelCase(property_name), value); - } - - public void putPnameType(String property_name, RDFNode type) { - mapPnameType.put(StringOperations.toCamelCase(property_name), type); - } - - public void putPnameUnit(String property_name, RDFNode unit) { - mapPnameUnit.put(StringOperations.toCamelCase(property_name), unit); - } - - public void putPsetPropertyRef(RDFNode property) { - String pname = property.asLiteral().getString(); - pname = StringOperations.toCamelCase(pname); - if (is_bSDD_pset) { - StmtIterator iter = psetDef.listProperties(PROPS.propertyDef); - while (iter.hasNext()) { - Resource prop = iter.next().getResource(); - StmtIterator iterProp = prop.listProperties(PROPS.namePset); - while (iterProp.hasNext()) { - Literal psetPropName = iterProp.next().getLiteral(); - if (psetPropName.getString().equals(pname)) - { - mapBSDD.put(StringOperations.toCamelCase(property.toString()), prop); - } - else { - if (psetPropName.getString().toUpperCase().equals(pname.toUpperCase())) - { - mapBSDD.put(pname, prop); - } - } - } - } - } - } - - /** - * Adds property value property for an resource. - * - * @param lbd_resource - * The Jena Resource in the model - * @param extracted_guid - * The GUID of the elemet in the long form - */ - Set hashes = new HashSet<>(); - private boolean pksetclasses=false; - public void connect(Resource lbd_resource, String long_guid) { - //System.out.println("connect: "+this.getPropertyset_name()+" - "+lbd_resource.getLocalName()); - Resource to_connect=lbd_resource; - if(pksetclasses) - { - to_connect=this.lbd_model.createResource(this.uriBase + "pset_" + this.propertyset_name + "_" + this.pset_inx); - if(this.propertyset_name.contains("Common")) - { - Resource bsdd_class = this.lbd_model.createResource("https://identifier.buildingsmart.org/uri/buildingsmart/ifc/4.3/class/"+this.propertyset_name); - to_connect.addProperty(RDF.type, bsdd_class); - - } - Property property = this.lbd_model.createProperty(LBD.ns + "has"+this.propertyset_name.replace(" ", "_")); - lbd_resource.addProperty(property,to_connect); - if(this.done) - { - // Already done pset, just connect - return; - } - this.done=true; - } - - if(!this.isActive) - return; - if (this.mapPnameValue.keySet().size() > 0) - switch (this.props_level) { - case 1: - default: - for (String pname : this.mapPnameValue.keySet()) { - Property property; - if(this.hasSimplified_properties) - property = this.lbd_model.createProperty(property_replace(PROPS.ns + StringOperations.toCamelCase(pname.split(" ")[0]))); - else - property = this.lbd_model.createProperty(property_replace(PROPS.ns + StringOperations.toCamelCase(pname) + "_property_simple")); - this.lbd_model.add(property, RDF.type, OWL.DatatypeProperty); - this.lbd_model.add(property, RDFS.comment, "IFC property set "+this.propertyset_name+" property "+pname); - - if(!this.mapPnameValue.get(pname).toString().contains("IfcPropertySingleValue")) - to_connect.addProperty(property, this.mapPnameValue.get(pname)); - else - System.err.println("Odd value: "+this.mapPnameValue.get(pname)); - } - break; - case 2: - case 3: - if (hashes.add(long_guid)) { - List properties = writeOPM_Set(long_guid); - for (PsetProperty pp : properties) { - if (!this.lbd_model.listStatements(to_connect, pp.p, pp.r).hasNext()) { - lbd_resource.addProperty(pp.p, pp.r); - } - } - } - break; - } - } - - static private long state_resourse_counter = 0; - private List writeOPM_Set(String long_guid) { - List properties = new ArrayList<>(); - LocalDateTime datetime = LocalDateTime.now(); - for (String pname : this.mapPnameValue.keySet()) { - Resource property_resource; - if (this.hasBlank_nodes) - property_resource = this.lbd_model.createResource(); - else { - property_resource = this.lbd_model.createResource(this.uriBase + pname + "_" + long_guid); - property_resource.addProperty(RDF.type, OPM.property); - } - - if (this.mapBSDD.get(pname) != null) - property_resource.addProperty(RDFS.seeAlso, mapBSDD.get(pname)); - - // Just the complete name - property_resource.addProperty(RDFS.label, this.propertyset_name+":"+pname); - - if (this.props_level == 3) { - Resource state_resourse; - if (this.hasBlank_nodes) - state_resourse = this.lbd_model.createResource(); - else - state_resourse = this.lbd_model.createResource(this.uriBase + "state_" + pname + "_" + long_guid + "_p" + PropertySet.state_resourse_counter++); - // https://w3c-lbd-cg.github.io/opm/assets/states.svg - property_resource.addProperty(OPM.hasPropertyState, state_resourse); - - - String time_string = datetime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); - state_resourse.addProperty(RDF.type, OPM.currentPropertyState); - state_resourse.addLiteral(OPM.generatedAtTime, time_string); - state_resourse.addProperty(OPM.value, this.mapPnameValue.get(pname)); - if (this.hasUnits) - addUnit(state_resourse, pname); - - } else { - property_resource.addProperty(OPM.value, this.mapPnameValue.get(pname)); - if (this.hasUnits) - addUnit(property_resource, pname); - } - - Property property; - if(this.hasSimplified_properties) - property = this.lbd_model.createProperty(property_replace(PROPS.ns + StringOperations.toCamelCase(pname.split(" ")[0]))); - else - property = this.lbd_model.createProperty(property_replace(PROPS.ns + StringOperations.toCamelCase(pname))); - properties.add(new PsetProperty(property, property_resource)); - - this.lbd_model.add(property, RDF.type, OWL.ObjectProperty); - this.lbd_model.add(property, RDFS.comment, "IFC property set "+this.propertyset_name+" property "+pname); - - } - return properties; - } - - private void addUnit(Resource lbd_resource, String pname) { - RDFNode ifc_unit = this.mapPnameUnit.get(pname); - if (ifc_unit != null) { - String si_unit = ifc_unit.asResource().getLocalName(); - if (si_unit != null) { - if (si_unit.equals("METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.METER); - } else if (si_unit.equals("SQUARE_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_METRE); - } else if (si_unit.equals("CUBIC_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_METRE); - } else if (si_unit.equals("MILLI METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.MILLI_METER); - } else if (si_unit.equals("MILLI SQUARE_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_MILLI_METRE); - } else if (si_unit.equals("MILLI CUBIC_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_MILLI_METER); - } else if (si_unit.equals("RADIAN")) { - lbd_resource.addProperty(SMLS.unit, UNIT.RADIAN); - } - } - } else { - RDFNode ifc_measurement_type = this.mapPnameType.get(pname); - if (ifc_measurement_type != null) { - String unit = ifc_measurement_type.asResource().getLocalName().toLowerCase(); - if (unit.startsWith("ifc")) - unit = unit.substring(3); - if (unit.startsWith("positive")) - unit = unit.substring("positive".length()); - if (unit.endsWith("measure")) - unit = unit.substring(0, unit.length() - "measure".length()); - String si_unit = this.unitmap.get(unit); - if (si_unit != null) { - if (si_unit.equals("METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.METER); - } else if (si_unit.equals("SQUARE_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_METRE); - } else if (si_unit.equals("CUBIC_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_METRE); - } else if (si_unit.equals("MILLI METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.MILLI_METER); - } else if (si_unit.equals("MILLI SQUARE_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_MILLI_METRE); - } else if (si_unit.equals("MILLI CUBIC_METRE")) { - lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_MILLI_METER); - } else if (si_unit.equals("RADIAN")) { - lbd_resource.addProperty(SMLS.unit, UNIT.RADIAN); - } - } else { - if (unit.equals("length")) { - lbd_resource.addProperty(SMLS.unit, UNIT.MILLI_METER); // Default - // named - // in: - // https://standards.buildingsmart.org/IFC/RELEASE/IFC2x3/TC1/HTML/ifcmeasureresource/lexical/ifclengthmeasure.htm - } else if (unit.equals("area")) { - lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_METRE); // default - // named - // in: - // https://standards.buildingsmart.org/IFC/RELEASE/IFC4/ADD2_TC1/HTML/schema/ifcmeasureresource/lexical/ifcareameasure.htm - } else if (unit.equals("volume")) { - lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_METRE); // default - // named - // in: - // https://standards.buildingsmart.org/IFC/RELEASE/IFC2x3/TC1/HTML/ifcmeasureresource/lexical/ifcvolumemeasure.htm - } - - } - } - } - - } - - public Optional isExternal() { - - RDFNode val = this.mapPnameValue.get("isExternal"); - - if (val == null) - return Optional.empty(); + if (value.isLiteral()) { + String value_string = value.asLiteral().getLexicalForm(); + Literal literal_value = this.lbd_model.createLiteral(StringOperations.handleUnicode(value_string)); + mapPnameValue.put(StringOperations.toCamelCase(property_name), literal_value); + } else + mapPnameValue.put(StringOperations.toCamelCase(property_name), value); + } + + public void putPnameType(String property_name, RDFNode type) { + mapPnameType.put(StringOperations.toCamelCase(property_name), type); + } + + public void putPnameUnit(String property_name, RDFNode unit) { + mapPnameUnit.put(StringOperations.toCamelCase(property_name), unit); + } + + public void putPsetPropertyRef(RDFNode property) { + String pname = property.asLiteral().getString(); + pname = StringOperations.toCamelCase(pname); + if (is_bSDD_pset) { + StmtIterator iter = psetDef.listProperties(PROPS.propertyDef); + while (iter.hasNext()) { + Resource prop = iter.next().getResource(); + StmtIterator iterProp = prop.listProperties(PROPS.namePset); + while (iterProp.hasNext()) { + Literal psetPropName = iterProp.next().getLiteral(); + if (psetPropName.getString().equals(pname)) { + mapBSDD.put(StringOperations.toCamelCase(property.toString()), prop); + } else { + if (psetPropName.getString().toUpperCase().equals(pname.toUpperCase())) { + mapBSDD.put(pname, prop); + } + } + } + } + } + } + + /** + * Adds property value property for an resource. + * + * @param lbd_resource The Jena Resource in the model + * @param extracted_guid The GUID of the elemet in the long form + */ + Set hashes = new HashSet<>(); + private boolean pksetclasses = false; + + public void connect(Resource lbd_resource, String long_guid) { + // System.out.println("connect: "+this.getPropertyset_name()+" - + // "+lbd_resource.getLocalName()); + Resource to_connect = lbd_resource; + if (pksetclasses) { + to_connect = this.lbd_model + .createResource(this.uriBase + "pset_" + this.propertyset_name + "_" + this.pset_inx); + if (this.propertyset_name.contains("Common")) { + Resource bsdd_class = this.lbd_model + .createResource("https://identifier.buildingsmart.org/uri/buildingsmart/ifc/4.3/class/" + + this.propertyset_name); + to_connect.addProperty(RDF.type, bsdd_class); + + } + Property property = this.lbd_model.createProperty(LBD.ns + "has" + this.propertyset_name.replace(" ", "_")); + lbd_resource.addProperty(property, to_connect); + if (this.done) { + // Already done pset, just connect + return; + } + this.done = true; + } + + if (!this.isActive) + return; + if (this.mapPnameValue.keySet().size() > 0) + switch (this.props_level) { + case 1: + default: + for (String pname : this.mapPnameValue.keySet()) { + Property property; + if (this.hasSimplified_properties) + property = this.lbd_model.createProperty( + property_replace(PROPS.ns + StringOperations.toCamelCase(pname.split(" ")[0]))); + else + property = this.lbd_model.createProperty( + property_replace(PROPS.ns + StringOperations.toCamelCase(pname) + "_property_simple")); + this.lbd_model.add(property, RDF.type, OWL.DatatypeProperty); + this.lbd_model.add(property, RDFS.comment, + "IFC property set " + this.propertyset_name + " property " + pname); + + if (!this.mapPnameValue.get(pname).toString().contains("IfcPropertySingleValue")) + to_connect.addProperty(property, this.mapPnameValue.get(pname)); + else + System.err.println("Odd value: " + this.mapPnameValue.get(pname)); + } + break; + case 2: + case 3: + if (hashes.add(long_guid)) { + List properties = writeOPM_Set(long_guid); + for (PsetProperty pp : properties) { + if (!this.lbd_model.listStatements(to_connect, pp.p, pp.r).hasNext()) { + lbd_resource.addProperty(pp.p, pp.r); + } + } + } + break; + } + } + + static private long state_resourse_counter = 0; + + private List writeOPM_Set(String long_guid) { + List properties = new ArrayList<>(); + LocalDateTime datetime = LocalDateTime.now(); + for (String pname : this.mapPnameValue.keySet()) { + Resource property_resource; + if (this.hasBlank_nodes) + property_resource = this.lbd_model.createResource(); + else { + property_resource = this.lbd_model.createResource(this.uriBase + pname + "_" + long_guid); + property_resource.addProperty(RDF.type, OPM.property); + } + + if (this.mapBSDD.get(pname) != null) + property_resource.addProperty(RDFS.seeAlso, mapBSDD.get(pname)); + + // Just the complete name + property_resource.addProperty(RDFS.label, this.propertyset_name + ":" + pname); + + if (this.props_level == 3) { + Resource state_resourse; + if (this.hasBlank_nodes) + state_resourse = this.lbd_model.createResource(); + else + state_resourse = this.lbd_model.createResource(this.uriBase + "state_" + pname + "_" + long_guid + + "_p" + PropertySet.state_resourse_counter++); + // https://w3c-lbd-cg.github.io/opm/assets/states.svg + property_resource.addProperty(OPM.hasPropertyState, state_resourse); + + String time_string = datetime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + state_resourse.addProperty(RDF.type, OPM.currentPropertyState); + state_resourse.addLiteral(OPM.generatedAtTime, time_string); + state_resourse.addProperty(OPM.value, this.mapPnameValue.get(pname)); + if (this.hasUnits) + addUnit(state_resourse, pname); + + } else { + property_resource.addProperty(OPM.value, this.mapPnameValue.get(pname)); + if (this.hasUnits) + addUnit(property_resource, pname); + } + + Property property; + if (this.hasSimplified_properties) + property = this.lbd_model + .createProperty(property_replace(PROPS.ns + StringOperations.toCamelCase(pname.split(" ")[0]))); + else + property = this.lbd_model + .createProperty(property_replace(PROPS.ns + StringOperations.toCamelCase(pname))); + properties.add(new PsetProperty(property, property_resource)); + + this.lbd_model.add(property, RDF.type, OWL.ObjectProperty); + this.lbd_model.add(property, RDFS.comment, + "IFC property set " + this.propertyset_name + " property " + pname); + + } + return properties; + } + + private void addUnit(Resource lbd_resource, String pname) { + RDFNode ifc_unit = this.mapPnameUnit.get(pname); + if (ifc_unit != null) { + String si_unit = ifc_unit.asResource().getLocalName(); + if (si_unit != null) { + if (si_unit.equals("METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.METER); + } else if (si_unit.equals("SQUARE_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_METRE); + } else if (si_unit.equals("CUBIC_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_METRE); + } else if (si_unit.equals("MILLI METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.MILLI_METER); + } else if (si_unit.equals("MILLI SQUARE_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_MILLI_METRE); + } else if (si_unit.equals("MILLI CUBIC_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_MILLI_METER); + } else if (si_unit.equals("RADIAN")) { + lbd_resource.addProperty(SMLS.unit, UNIT.RADIAN); + } + } + } else { + RDFNode ifc_measurement_type = this.mapPnameType.get(pname); + if (ifc_measurement_type != null) { + String unit = ifc_measurement_type.asResource().getLocalName().toLowerCase(); + if (unit.startsWith("ifc")) + unit = unit.substring(3); + if (unit.startsWith("positive")) + unit = unit.substring("positive".length()); + if (unit.endsWith("measure")) + unit = unit.substring(0, unit.length() - "measure".length()); + String si_unit = this.unitmap.get(unit); + if (si_unit != null) { + if (si_unit.equals("METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.METER); + } else if (si_unit.equals("SQUARE_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_METRE); + } else if (si_unit.equals("CUBIC_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_METRE); + } else if (si_unit.equals("MILLI METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.MILLI_METER); + } else if (si_unit.equals("MILLI SQUARE_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_MILLI_METRE); + } else if (si_unit.equals("MILLI CUBIC_METRE")) { + lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_MILLI_METER); + } else if (si_unit.equals("RADIAN")) { + lbd_resource.addProperty(SMLS.unit, UNIT.RADIAN); + } + } else { + if (unit.equals("length")) { + lbd_resource.addProperty(SMLS.unit, UNIT.MILLI_METER); // Default + // named + // in: + // https://standards.buildingsmart.org/IFC/RELEASE/IFC2x3/TC1/HTML/ifcmeasureresource/lexical/ifclengthmeasure.htm + } else if (unit.equals("area")) { + lbd_resource.addProperty(SMLS.unit, UNIT.SQUARE_METRE); // default + // named + // in: + // https://standards.buildingsmart.org/IFC/RELEASE/IFC4/ADD2_TC1/HTML/schema/ifcmeasureresource/lexical/ifcareameasure.htm + } else if (unit.equals("volume")) { + lbd_resource.addProperty(SMLS.unit, UNIT.CUBIC_METRE); // default + // named + // in: + // https://standards.buildingsmart.org/IFC/RELEASE/IFC2x3/TC1/HTML/ifcmeasureresource/lexical/ifcvolumemeasure.htm + } + + } + } + } + + } + + public Optional isExternal() { + + RDFNode val = this.mapPnameValue.get("isExternal"); + + if (val == null) + return Optional.empty(); if (!val.isLiteral()) - return Optional.empty(); + return Optional.empty(); if (val.asLiteral().getValue().equals(true)) - return Optional.of(true); + return Optional.of(true); return Optional.of(false); - } + } - - public Set getPropertynames() { + public Set getPropertynames() { - return mapPnameType.keySet(); - } - + return mapPnameType.keySet(); + } - public String getPropertyset_name() { + public String getPropertyset_name() { return propertyset_name; } @@ -368,17 +377,14 @@ public void setHasSimplified_properties(boolean hasSimplified_properties) { this.hasSimplified_properties = hasSimplified_properties; } - - - public void setProperty_replace_map(Map property_replace_map) { + public void setProperty_replace_map(Map property_replace_map) { this.property_replace_map = property_replace_map; } - private String property_replace(String property) - { - if(property_replace_map==null) - return property; - - return this.property_replace_map.getOrDefault(property,property); - } + private String property_replace(String property) { + if (property_replace_map == null) + return property; + + return this.property_replace_map.getOrDefault(property, property); + } } diff --git a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/QuantitySet.java b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/QuantitySet.java index 6b74ca52..6053709a 100644 --- a/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/QuantitySet.java +++ b/IFCtoLBD/src/main/java/org/linkedbuildingdata/ifc2lbd/core/valuesets/QuantitySet.java @@ -103,8 +103,12 @@ public QuantitySet(String uriBase, Model lbd_model, Model ontology_model, String } public void putPnameValue(String property_name, RDFNode value) { - - mapPnameValue.put(StringOperations.toCamelCase(property_name), value); + if (value.isLiteral()) { + String value_string = value.asLiteral().getLexicalForm(); + Literal literal_value = this.lbd_model.createLiteral(StringOperations.handleUnicode(value_string)); + mapPnameValue.put(StringOperations.toCamelCase(property_name), literal_value); + } else + mapPnameValue.put(StringOperations.toCamelCase(property_name), value); } public void putPnameType(String property_name, RDFNode type) { diff --git a/IFCtoLBD_Geometry/.classpath b/IFCtoLBD_Geometry/.classpath index ad2c6e4d..e5ff0388 100644 --- a/IFCtoLBD_Geometry/.classpath +++ b/IFCtoLBD_Geometry/.classpath @@ -16,7 +16,6 @@ -