From 4509759bc4b09b8677562c3908689dc2abd7e2bc Mon Sep 17 00:00:00 2001 From: Alessandro Adamou Date: Mon, 23 Apr 2018 16:19:18 +0100 Subject: [PATCH] New generalizer that unlocks the Patent query from goldstandard (not yet enabled by default) --- .../core2/QueryTempVarSolutionSpace.java | 12 +- .../kmi/squire/core4/BasicGeneralizer.java | 245 +++++++++++++ .../squire/core4/BestFirstSpecializer.java | 2 +- .../core4/ClassSignatureGeneralizer.java | 175 +++++++++ .../ac/open/kmi/squire/core4/Generalizer.java | 281 +++++--------- .../squire/core4/ProgrammableGeneralizer.java | 21 +- .../kmi/squire/core4/QueryRecommendator4.java | 28 +- ...QueryOperator.java => QueryTransform.java} | 4 +- .../ac/open/kmi/squire/core4/Specializer.java | 42 +-- .../kmi/squire/evaluation/QueryGPESim.java | 18 +- .../evaluation/QueryResultSizeDistance.java | 24 +- .../evaluation/QuerySpecificityDistance.java | 132 ++++--- .../operation/InstantiateTemplateVar.java | 2 +- .../open/kmi/squire/operation/Operation.java | 28 -- .../open/kmi/squire/operation/Operator.java | 34 ++ .../kmi/squire/operation/RemoveTriple.java | 8 +- .../operation/SPARQLQuerySatisfiable.java | 21 +- .../kmi/squire/querytemplate/SQTVisitor.java | 6 +- .../squire/rdfdataset/AbstractRdfDataset.java | 62 +++- .../rdfdataset/FileBasedRDFDataset.java | 7 +- .../kmi/squire/rdfdataset/IRDFDataset.java | 6 + .../squire/rdfdataset/InMemoryRdfDataset.java | 132 +++++++ .../rdfdataset/SparqlIndexedDataset.java | 8 + .../squire/rdfdataset/WritableRdfDataset.java | 29 ++ .../AbstractSQAggregator.java | 19 + .../sparqlqueryvisitor/SQClassAggregator.java | 39 ++ .../sparqlqueryvisitor/SQClassVisitor.java | 163 --------- .../SQDatatypePropertyAggregator.java | 35 ++ .../SQDatatypePropertyVisitor.java | 157 -------- ...> SQGraphPatternExpressionAggregator.java} | 13 +- .../SQObjectPropertyAggregator.java | 35 ++ .../SQObjectPropertyVisitor.java | 157 -------- .../SQRemoveTripleVisitor.java | 145 +------- .../SQVariableAggregator.java | 32 ++ .../sparqlqueryvisitor/SQVariableVisitor.java | 159 -------- .../TemplateVariableScanner.java | 6 +- .../kmi/squire/treequerypatterns/QTTree.java | 10 +- .../QueryRecommendation.java | 10 +- .../open/kmi/squire/core4/DummyDatasets.java | 58 +++ .../core4/TestClassSignatureGeneralizer.java | 345 ++++++++++++++++++ .../kmi/squire/utils/TestJenaAssumptions.java | 4 +- .../uk/ac/open/kmi/squire/utils/TreeNode.java | 54 --- squire/src/test/resources/goldstandard.json | 12 +- squire/src/test/resources/signatures.json | 32 ++ squire/src/test/resources/signatures2.json | 68 ++++ 45 files changed, 1635 insertions(+), 1245 deletions(-) create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/core4/BasicGeneralizer.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/core4/ClassSignatureGeneralizer.java rename squire/src/main/java/uk/ac/open/kmi/squire/core4/{QueryOperator.java => QueryTransform.java} (93%) delete mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/operation/Operation.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/operation/Operator.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/InMemoryRdfDataset.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/WritableRdfDataset.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/AbstractSQAggregator.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassAggregator.java delete mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassVisitor.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyAggregator.java delete mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyVisitor.java rename squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/{SQGraphPatternExpressionVisitor.java => SQGraphPatternExpressionAggregator.java} (66%) create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyAggregator.java delete mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyVisitor.java create mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableAggregator.java delete mode 100644 squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableVisitor.java create mode 100644 squire/src/test/java/uk/ac/open/kmi/squire/core4/DummyDatasets.java create mode 100644 squire/src/test/java/uk/ac/open/kmi/squire/core4/TestClassSignatureGeneralizer.java delete mode 100644 squire/src/test/java/uk/ac/open/kmi/squire/utils/TreeNode.java create mode 100644 squire/src/test/resources/signatures.json create mode 100644 squire/src/test/resources/signatures2.json diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core2/QueryTempVarSolutionSpace.java b/squire/src/main/java/uk/ac/open/kmi/squire/core2/QueryTempVarSolutionSpace.java index f8983e8..0590930 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core2/QueryTempVarSolutionSpace.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core2/QueryTempVarSolutionSpace.java @@ -117,8 +117,9 @@ else if (ex instanceof JsonParseException) { } log.debug("Templated solution size now = {}", qTsol.size()); } catch (NotTemplatedException ex) { - log.error("Apparently the query has no template variables."); - log.error("Assuming empty solution space."); + log.error("Apparently the subquery has no template variables."); + log.error(" ... Subquery was:\r\n{}", qT); + log.error(" ... Assuming empty solution space."); qTsol = Collections.emptyList(); } finally { // 2a. Re-expand the solutions space to include the variables that were reduced @@ -166,7 +167,7 @@ private Map> filter(Map> reductions, Var... variable return filtered; } - private Set getQueryTemplateVariableSet(Query qR) { + private Set getTemplateVariables(Query qR) { TemplateVariableScanner v = new TemplateVariableScanner(); // ... This will walk through all parts of the query ElementWalker.walk(qR.getQueryPattern(), v); @@ -175,7 +176,7 @@ private Set getQueryTemplateVariableSet(Query qR) { /** * Rewrites a given query using its template variables and also tries to - * eliminate computational hogs that could cause endpoints to fail. + * eliminate potential computational hogs that could cause endpoints to fail. * * For example, the query pattern { ?x a ?t ; ?p1 ?y1 ; ?p2 ?y2 } is reduced to * { ?x a ?t ; ?p1 ?y1 } . @@ -188,7 +189,7 @@ private Set getQueryTemplateVariableSet(Query qR) { */ private Query templatizeAndReduce(Query queryOrig, Var... projectToThese) throws NotTemplatedException { log.debug("Original query: {}", queryOrig); - Set templateVars = getQueryTemplateVariableSet(queryOrig); + Set templateVars = getTemplateVariables(queryOrig); if (projectToThese.length > 0) { log.debug("Projection forced to the following variables: {}", (Object[]) projectToThese); templateVars.retainAll(new HashSet<>(Arrays.asList(projectToThese))); @@ -207,7 +208,6 @@ private Query templatizeAndReduce(Query queryOrig, Var... projectToThese) throws public void visit(ElementPathBlock el) { final ElementPathBlock pathBlock = new ElementPathBlock(); // Here we decide what to copy into qpNu and what not to - Set projected = new HashSet<>(Arrays.asList(projectToThese)); // Do a first scan to decide which TPs to keep for (Iterator it = el.patternElts(); it.hasNext();) { diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/BasicGeneralizer.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/BasicGeneralizer.java new file mode 100644 index 0000000..b665fac --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/BasicGeneralizer.java @@ -0,0 +1,245 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package uk.ac.open.kmi.squire.core4; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.apache.jena.graph.Node; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.syntax.ElementPathBlock; +import org.apache.jena.sparql.syntax.ElementVisitorBase; +import org.apache.jena.sparql.syntax.ElementWalker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import uk.ac.open.kmi.squire.entityvariablemapping.VarMapping; +import uk.ac.open.kmi.squire.operation.SparqlQueryGeneralization; +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; + +/** + * A {@link Generalizer} that simply tries to get a query rid of all the + * classes, properties etc. that are never present in the target dataset. Its + * {@link #generalize(Query)} method produces a single query that is reasonably + * likely to (1) be satisfied by the target dataset, and (2) be specialized into + * the optimal recommendation. + *

+ * Note however that neither of the above properties is guaranteed. For example, + * it may retain two properties that are present in the target dataset, but + * never co-exist for the same entity. + *

+ * This generalizer has the following policy: + *
    + *
  1. Object properties always become object property templates, similarly for + * datatype properties. + *
  2. If the property category is unknown, a generic property template is + * applied + *
  3. The class signatures are ignored, i.e. it does not check if there are + * enough object/datatype properties for the target dataset to satisfy the + * condition of (1). + *
  4. Triple patterns are never removed + *
+ * + * @author carloallocca + */ +public class BasicGeneralizer extends QueryTransform implements Generalizer { + + private final Logger log = LoggerFactory.getLogger(getClass()); + + protected final IRDFDataset rdfd1, rdfd2; + + public BasicGeneralizer(IRDFDataset d1, IRDFDataset d2) { + super(); + this.rdfd1 = d1; + if (d2 == null) throw new IllegalArgumentException("Target dataset cannot be null."); + this.rdfd2 = d2; + } + + @Override + public Set generalize(Query query) { + if (query == null) throw new IllegalArgumentException("Query cannot be null."); + // The generalized query is created from a clone of the original one. + Query qGeneral = QueryFactory.create(query); + // Instantiated once, applied wherever possible. + SparqlQueryGeneralization qg = new SparqlQueryGeneralization(); + // SUBJECT + for (Node subj : getEntitySet(query, NodeRole.SUBJECT)) + if (subj.isConcrete() && !subj.isBlank()) { + Var tplVar = ifSubjectIsNotD2ThenGenerateVariableNew(subj); + if (tplVar != null) qGeneral = qg.perform(qGeneral, subj, tplVar); + } + // PREDICATE + for (Node pred : getEntitySet(query, NodeRole.PREDICATE)) + if (pred.isConcrete() && !pred.isBlank()) { + if (!this.rdfd2.getRDFVocabulary().contains(pred.getURI())) { + Var tplVar = makeTplVariableFromPredicate(pred, true); + if (tplVar != null) qGeneral = qg.perform(qGeneral, pred, tplVar); + } + } + // OBJECT + for (Node obj : getEntitySet(query, NodeRole.OBJECT)) + if (obj.isConcrete() && !obj.isBlank()) { + Var tplVar = ifObjectIsNotD2ThenGenerateVariableNew(obj); + if (tplVar != null) qGeneral = qg.perform(qGeneral, obj, tplVar); + } + return Collections.singleton(qGeneral); + } + + private Set getEntitySet(Query q, NodeRole nodeType) { + final Set objects = new HashSet<>(); // Remember distinct objects in this + // This will walk through all parts of the query + ElementWalker.walk(q.getQueryPattern(), new ElementVisitorBase() { + @Override + public void visit(ElementPathBlock el) { + Iterator triples = el.patternElts(); + while (triples.hasNext()) { + TriplePath tp = triples.next(); + Node n; + switch (nodeType) { + case SUBJECT: + n = tp.getSubject(); + break; + case PREDICATE: + n = tp.getPredicate(); + break; + case OBJECT: + n = tp.getObject(); + break; + default: + n = null; + } + if (n != null) objects.add(n); + } + } + }); + return objects; + } + + protected Var ifObjectIsNotD2ThenGenerateVariableNew(Node obj) { + if (obj == null) throw new IllegalArgumentException("Object node cannot be null."); + final String varName; + if (obj.isURI()) { + String o = obj.getURI(); + if (rdfd1.getClassSet().contains(o) && !rdfd2.getClassSet().contains(o)) + varName = classVarTable.getOrCreateVar(o, TEMPLATE_VAR_CLASS); + else if (rdfd1.isInObjectPropertySet(o) && !rdfd2.isInObjectPropertySet(o)) + varName = objectProperyVarTable.getOrCreateVar(o, TEMPLATE_VAR_PROP_OBJ); + else if (rdfd1.isInDatatypePropertySet(o) && !rdfd2.isInDatatypePropertySet(o)) + varName = datatypePropertyVarTable.getOrCreateVar(o, TEMPLATE_VAR_PROP_DT); + else if (rdfd1.isInRDFVocabulary(o) && !rdfd2.isInRDFVocabulary(o)) + varName = rdfVocVarTable.getOrCreateVar(o, "rdf"); + else return null; + } else if (obj.isLiteral()) { + varName = literalVarTable.getOrCreateVar(obj.getLiteralValue().toString(), + QueryTransform.TEMPLATE_VAR_LITERAL); + } else return null; + if (varName == null) throw new IllegalStateException("Object node generated a null variable name."); + return Var.alloc(varName); + } + + protected Var ifSubjectIsNotD2ThenGenerateVariableNew(Node subj) { + if (subj == null) throw new IllegalArgumentException("Subject node cannot be null."); + final String varName; + if (subj.isURI()) { + String sub = subj.getURI(); + if (rdfd1.getClassSet().contains(sub) && !rdfd2.getClassSet().contains(sub)) + varName = classVarTable.getOrCreateVar(sub, TEMPLATE_VAR_CLASS); + else if (rdfd1.isInObjectPropertySet(sub) && !rdfd2.isInObjectPropertySet(sub)) + varName = objectProperyVarTable.getOrCreateVar(sub, TEMPLATE_VAR_PROP_OBJ); + else if (rdfd1.isInDatatypePropertySet(sub) && !rdfd2.isInDatatypePropertySet(sub)) + varName = datatypePropertyVarTable.getOrCreateVar(sub, TEMPLATE_VAR_PROP_DT); + else if (rdfd1.isInRDFVocabulary(sub) && !rdfd2.isInRDFVocabulary(sub)) + varName = rdfVocVarTable.getOrCreateVar(sub, "rdf"); + else + // We assume by exclusion that sub is an individual. + // XXX is that assumption correct? + varName = individualVarTable.getOrCreateVar(sub, TEMPLATE_VAR_INDIVIDUAL); + } else if (subj.isLiteral()) { + varName = literalVarTable.getOrCreateVar(subj.getLiteralValue().toString(), + QueryTransform.TEMPLATE_VAR_LITERAL); + } else return null; + if (varName == null) throw new IllegalStateException("Subject node generated a null variable name."); + return Var.alloc(varName); + } + + /** + * Creates a template variable for the given property URI if that property + * exists in the source dataset and one of the following conditions is met: + *
    + *
  • the property is not present in the target dataset, or + *
  • flag onlyIfNotInTargetDS is set to false. + *
+ * + * @param predicate + * the predicate URI (if not a URI the method will return null). + * @param onlyIfNotInTargetDS + * an override flag that forces the variable to be generated once + * it's found in the source dataset, regardless of its presence in + * the target dataset. + * @return + */ + protected Var makeTplVariableFromPredicate(Node predicate, boolean onlyIfNotInTargetDS) { + if (predicate == null) throw new IllegalArgumentException("Predicate node cannot be null."); + log.trace("Presence of properties in target dataset {} matter", onlyIfNotInTargetDS ? "DOES" : "does NOT"); + final String varName; + if (!predicate.isURI()) return null; + String p = predicate.getURI(); + log.debug("Inspecting predicate '{}' for generalization.", p); + log.trace("rdfd1 object property list : {}", rdfd1.getObjectPropertySet()); + log.trace("rdfd1 datatype property list : {}", rdfd1.getDatatypePropertySet()); + log.trace("rdfd2 object property list : {}", rdfd2.getObjectPropertySet()); + log.trace("rdfd2 datatype property list : {}", rdfd2.getDatatypePropertySet()); + + // XXX what if the property exist in the other dataset but is used as an + // object/data property unlike the first dataset? + if (rdfd1.isInObjectPropertySet(p) && (!onlyIfNotInTargetDS || !rdfd2.isInObjectPropertySet(p))) + varName = objectProperyVarTable.getOrCreateVar(p, TEMPLATE_VAR_PROP_OBJ); + else if (rdfd1.isInDatatypePropertySet(p) && (!onlyIfNotInTargetDS || !rdfd2.isInDatatypePropertySet(p))) + varName = datatypePropertyVarTable.getOrCreateVar(p, TEMPLATE_VAR_PROP_DT); + else if (rdfd1.isInPropertySet(p)) { + // If we don't care if the property exists in the target dataset, generate the + // template variable, but do take a peek at the target dataset anyway, to decide + // what kind of property it shall be. + if (!onlyIfNotInTargetDS) { + String prefix; + VarMapping table; + if (rdfd2.isInObjectPropertySet(p)) { + prefix = TEMPLATE_VAR_PROP_OBJ; + table = objectProperyVarTable; + } else if (rdfd2.isInDatatypePropertySet(p)) { + prefix = TEMPLATE_VAR_PROP_DT; + table = datatypePropertyVarTable; + } else { + prefix = TEMPLATE_VAR_PROP_PLAIN; + table = plainPropertyVarTable; + } + varName = table.getOrCreateVar(p, prefix); + } else if (!(rdfd2.isInPropertySet(p) || rdfd2.isInObjectPropertySet(p) + || rdfd2.isInDatatypePropertySet(p))) { + log.debug(" ... is a plain property in <{}> and not in <{}>", rdfd1, rdfd2); + varName = plainPropertyVarTable.getOrCreateVar(p, TEMPLATE_VAR_PROP_PLAIN); + } else { + log.debug(" ... is present in target dataset <{}> and override is not enable. Will not generalize.", + rdfd2); + return null; + } + } + // Add a case for "plain" properties + else { + log.debug(" ... is either present both in <{}> and <{}>, or in neither. Will not generalize.", rdfd1, + rdfd2); + return null; + } + if (varName == null) throw new IllegalStateException("Predicate node generated a null variable name."); + return Var.alloc(varName); + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/BestFirstSpecializer.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/BestFirstSpecializer.java index f6f862a..c0aa7ec 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core4/BestFirstSpecializer.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/BestFirstSpecializer.java @@ -38,7 +38,7 @@ * @author alessandro * */ -public class BestFirstSpecializer extends QueryOperator { +public class BestFirstSpecializer extends QueryTransform { private final IRDFDataset dFrom, dTo; diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/ClassSignatureGeneralizer.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/ClassSignatureGeneralizer.java new file mode 100644 index 0000000..1aef1a6 --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/ClassSignatureGeneralizer.java @@ -0,0 +1,175 @@ +package uk.ac.open.kmi.squire.core4; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.NodeFactory; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.syntax.ElementPathBlock; +import org.apache.jena.sparql.syntax.ElementVisitorBase; +import org.apache.jena.sparql.syntax.ElementWalker; +import org.apache.jena.vocabulary.RDF; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import uk.ac.open.kmi.squire.operation.SparqlQueryGeneralization; +import uk.ac.open.kmi.squire.rdfdataset.ClassSignature; +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; + +/** + * Tries to compute a least general generalization by taking into account + * whether there are triples on the rdf:type predicate with classes as objects. + * If there are none, it does nothing. + * + * @author Alessandro Adamou + * + */ +public class ClassSignatureGeneralizer extends BasicGeneralizer { + + private final Logger log = LoggerFactory.getLogger(getClass()); + + public ClassSignatureGeneralizer(IRDFDataset dsSource, IRDFDataset dsTarget) { + super(dsSource, dsTarget); + if (dsTarget == null) throw new IllegalArgumentException("Target dataset cannot be null."); + } + + @Override + public Set generalize(Query q) { + + Query[] qGeneral; + Set preProc = super.generalize(q); + if (preProc.isEmpty()) qGeneral = new Query[] { QueryFactory.create(q) }; + else qGeneral = preProc.toArray(new Query[0]); + // The operation that will be applied over and over. + SparqlQueryGeneralization qg = new SparqlQueryGeneralization(); + + Map> typesPerSubjectSurviving = new HashMap<>(); + MappedQuery[] mq = new MappedQuery[] { new MappedQuery(q) }; + + /* + * If there are multiple triple patterns on rdf:type with the same subject, take + * all those targeted for generalization and collapse them into one. + */ + Map keptD2Signatures = new HashMap<>(); + + for (Entry> entry : mq[0].getTypesPerSubject().entrySet()) { + log.debug("Subject {}", entry.getKey()); + log.debug(" - #classes = {}", entry.getValue().size()); + + for (Node claz : entry.getValue()) { + log.debug(" ... {}", claz); + Var v = ifObjectIsNotD2ThenGenerateVariableNew(claz); + log.debug(" ... - var = {}", v); + if (v == null) { + if (claz.isURI()) { + if (!keptD2Signatures.containsKey(claz) + && rdfd2.getClassSignatures().containsKey(claz.getURI())) + keptD2Signatures.put(claz, rdfd2.getClassSignatures().get(claz.getURI())); + } + if (!typesPerSubjectSurviving.containsKey(entry.getKey())) + typesPerSubjectSurviving.put(entry.getKey(), new HashSet<>()); + typesPerSubjectSurviving.get(entry.getKey()).add(claz); + } else qGeneral[0] = qg.perform(qGeneral[0], claz, v); + } + } + + mq[0] = new MappedQuery(qGeneral[0]); + + final Set generalized = new HashSet<>(); + // Treat properties based on concrete types + ElementWalker.walk(qGeneral[0].getQueryPattern(), new ElementVisitorBase() { + @Override + public void visit(ElementPathBlock el) { + Iterator triples = el.patternElts(); + while (triples.hasNext()) { + TriplePath tp = triples.next(); + log.debug("{}", tp); + Node s = tp.getSubject(); + if (mq[0].getTypesPerSubject().containsKey(s)) for (Node type : mq[0].getTypes(s)) { + if (keptD2Signatures.containsKey(type)) { + log.debug("Signature for <{}> in D2: {}", type, + keptD2Signatures.get(type).listPathOrigins()); + // So, what is the predicate? Can it stay? + if (tp.getPredicate().isURI() && !RDF.type.asNode().equals(tp.getPredicate())) { + if (keptD2Signatures.get(type).hasProperty(tp.getPredicate().getURI())) + log.debug(" ... Yay! It stays."); + else { + log.debug(" ... sorry, it goes."); + // XXX should arg1 be true if there is no common class? + Var v = makeTplVariableFromPredicate(tp.getPredicate(), false); + if (v != null) qGeneral[0] = qg.perform(qGeneral[0], tp.getPredicate(), v); + } + } + } else log.warn( + "WTF? There is no signature in D2 for type <{}> - this should have already been dealt with.", + type); + } + } + } + }); + + log.debug("Intermediate generalized query:\r\n{}", mq[0].getQuery()); + + // If the types have been generalized (but a type expression still exists in the + // query), treat the properties based on whether they occur in the same type. + for (Node sub : mq[0].getRootSubjects()) { + Set namedTypes = new HashSet<>(); + for (Node type : mq[0].getTypes(sub)) + if (type.isConcrete()) namedTypes.add(type.getURI()); + // The cases of rdf:type triples having no concrete objects + if (namedTypes.isEmpty()) { + + // group properties in query depending on their presence and co-occurrences + + // First compute the unification, i.e. the largest subset of co-occurring + // properties + TreeMap>> groupsBySize = new TreeMap<>(); + for (Node n : mq[0].getPathOrigins(sub)) { + if (RDF.type.asNode().equals(n)) continue; + Set group = new HashSet<>(); + if (n.isURI()) group.add(n.getURI()); + for (Entry entry : rdfd2.getCoOccurringProperties(n.getURI()).entrySet()) { + Node nx = NodeFactory.createURI(entry.getKey()); + if (mq[0].getPathOrigins(sub).contains(nx) && entry.getValue() > 0) group.add(entry.getKey()); + } + if (!groupsBySize.containsKey(group.size())) groupsBySize.put(group.size(), new HashSet<>()); + groupsBySize.get(group.size()).add(group); + } + + log.debug("Picking the largest groups:"); + if (!groupsBySize.isEmpty()) for (Set group : groupsBySize.firstEntry().getValue()) { + log.debug(" - Group: {}", group); + // Produce the query from the group + Set genUs = new HashSet<>(); + Query groupQ = QueryFactory.create(mq[0].getQuery()); + for (Node n : mq[0].getPathOrigins(sub)) + if (n.isURI() && !RDF.type.asNode().equals(n) && !group.contains(n.getURI())) { + log.debug(" ... should generalize on property <{}>", n); + genUs.add(n); + Var v = makeTplVariableFromPredicate(n, false); + if (v != null) groupQ = qg.perform(groupQ, n, v); + } + generalized.add(groupQ); + } + } + + } + if (generalized.isEmpty()) generalized.add(mq[0].getQuery()); + log.debug("Generalized queries follow:"); + int i = 0; + for (Iterator it = generalized.iterator(); it.hasNext(); i++) + log.debug(" - q{} : {}", i, it.next()); + return Collections.unmodifiableSet(generalized); + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/Generalizer.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/Generalizer.java index 4f79a1f..bb3bdff 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core4/Generalizer.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/Generalizer.java @@ -1,228 +1,121 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ package uk.ac.open.kmi.squire.core4; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.Map; import java.util.Set; import org.apache.jena.graph.Node; import org.apache.jena.query.Query; -import org.apache.jena.query.QueryFactory; import org.apache.jena.sparql.core.TriplePath; -import org.apache.jena.sparql.core.Var; import org.apache.jena.sparql.syntax.ElementPathBlock; import org.apache.jena.sparql.syntax.ElementVisitorBase; import org.apache.jena.sparql.syntax.ElementWalker; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.jena.vocabulary.RDF; -import uk.ac.open.kmi.squire.entityvariablemapping.VarMapping; import uk.ac.open.kmi.squire.operation.SparqlQueryGeneralization; -import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; /** - * - * @author carloallocca + * The {@link BasicGeneralizer} is in charge of deciding which nodes in a query + * pattern are turned into template variables and which are not, and then + * applies a {@link SparqlQueryGeneralization} to the designated nodes. + * + * @author carloallocca, Alessandro Adamou */ -public class Generalizer extends QueryOperator { +public interface Generalizer { - private final Logger log = LoggerFactory.getLogger(getClass()); + /** + * Internal wrapper for {@link Query} which also keeps an index of the + * properties and types in the triple patterns of each root subject (i.e. not an + * intermediate subject in a chain of triple patterns). + */ + class MappedQuery { - protected final Query originalQuery; + private Query embedded; - protected final IRDFDataset rdfd1, rdfd2; + private Map> pathOriginsBySubject = new HashMap<>(); - public Generalizer(Query query, IRDFDataset d1, IRDFDataset d2) { - super(); - if (query == null) throw new IllegalArgumentException("Query cannot be null."); - this.originalQuery = query; - this.rdfd1 = d1; - if (d2 == null) throw new IllegalArgumentException("Target dataset cannot be null."); - this.rdfd2 = d2; - } + private Map> typesBySubject = new HashMap<>(); - /* - * Typically done once per original query - */ - public Query generalize() { - // The generalized query is created from a clone of the original one. - Query qGeneral = QueryFactory.create(this.originalQuery.toString()); - // Instantiated once, applied wherever possible. - SparqlQueryGeneralization qg = new SparqlQueryGeneralization(); - // SUBJECT - for (Node subj : getEntitySet(NodeRole.SUBJECT)) - if (!(subj.isVariable()) && !(subj.isBlank())) { - Var templateVarSub = ifSubjectIsNotD2ThenGenerateVariableNew(subj); - if (templateVarSub != null) qGeneral = qg.perform(qGeneral, subj, templateVarSub); - } - // PREDICATE - for (Node pred : getEntitySet(NodeRole.PREDICATE)) - if (!(pred.isVariable()) && !(pred.isBlank())) { - if (!this.rdfd2.getRDFVocabulary().contains(pred.getURI())) { - Var templateVarPred = makeTplVariableFromPredicate(pred, true); - if (templateVarPred != null) qGeneral = qg.perform(qGeneral, pred, templateVarPred); - } - } - // OBJECT - for (Node obj : getEntitySet(NodeRole.OBJECT)) - if (!(obj.isVariable()) && !(obj.isBlank())) { - Var templateVarObj = ifObjectIsNotD2ThenGenerateVariableNew(obj); - if (templateVarObj != null) qGeneral = qg.perform(qGeneral, obj, templateVarObj); - } - return qGeneral; - } + public MappedQuery(Query query) { + if (query == null) + throw new IllegalArgumentException("Cannot create a generalized query from a null query."); + this.embedded = query; - private Set getEntitySet(NodeRole nodeType) { - final Set objects = new HashSet<>(); // Remember distinct objects in this - // This will walk through all parts of the query - ElementWalker.walk(this.originalQuery.getQueryPattern(), new ElementVisitorBase() { - @Override - public void visit(ElementPathBlock el) { - Iterator triples = el.patternElts(); - while (triples.hasNext()) { - TriplePath tp = triples.next(); - Node n; - switch (nodeType) { - case SUBJECT: - n = tp.getSubject(); - break; - case PREDICATE: - n = tp.getPredicate(); - break; - case OBJECT: - n = tp.getObject(); - break; - default: - n = null; + /* + * Inspect the query + */ + ElementWalker.walk(embedded.getQueryPattern(), new ElementVisitorBase() { + @Override + public void visit(ElementPathBlock el) { + Iterator triples = el.patternElts(); + while (triples.hasNext()) { + TriplePath tp = triples.next(); + Node p = tp.getPredicate(), s = tp.getSubject(); + if (p.isURI()) { + if (RDF.type.getURI().equals(p.getURI()) && tp.getObject().isConcrete()) { + if (!typesBySubject.containsKey(s)) typesBySubject.put(s, new HashSet<>()); + typesBySubject.get(s).add(tp.getObject()); + } else { + if (!pathOriginsBySubject.containsKey(s)) pathOriginsBySubject.put(s, new HashSet<>()); + pathOriginsBySubject.get(s).add(p); + } + } } - if (n != null) objects.add(n); } - } - }); - return objects; - } + }); + } - protected Var ifObjectIsNotD2ThenGenerateVariableNew(Node obj) { - if (obj == null) throw new IllegalArgumentException("Object node cannot be null."); - final String varName; - if (obj.isURI()) { - String o = obj.getURI(); - if (rdfd1.getClassSet().contains(o) && !rdfd2.getClassSet().contains(o)) - varName = classVarTable.getOrCreateVar(o, TEMPLATE_VAR_CLASS); - else if (rdfd1.isInObjectPropertySet(o) && !rdfd2.isInObjectPropertySet(o)) - varName = objectProperyVarTable.getOrCreateVar(o, TEMPLATE_VAR_PROP_OBJ); - else if (rdfd1.isInDatatypePropertySet(o) && !rdfd2.isInDatatypePropertySet(o)) - varName = datatypePropertyVarTable.getOrCreateVar(o, TEMPLATE_VAR_PROP_DT); - else if (rdfd1.isInRDFVocabulary(o) && !rdfd2.isInRDFVocabulary(o)) - varName = rdfVocVarTable.getOrCreateVar(o, "rdf"); - else return null; - } else if (obj.isLiteral()) { - varName = literalVarTable.getOrCreateVar(obj.getLiteralValue().toString(), - QueryOperator.TEMPLATE_VAR_LITERAL); - } else return null; - if (varName == null) throw new IllegalStateException("Object node generated a null variable name."); - return Var.alloc(varName); - } + public Set getPathOrigins(Node subject) { + if (!pathOriginsBySubject.containsKey(subject)) return Collections.emptySet(); + return Collections.unmodifiableSet(pathOriginsBySubject.get(subject)); + } + + public Map> getPathOriginsPerSubject() { + return Collections.unmodifiableMap(pathOriginsBySubject); + } + + /** + * + * @return the embedded query + */ + public Query getQuery() { + return embedded; + } + + public Set getRootSubjects() { + Set set = new HashSet<>(typesBySubject.keySet()); + set.addAll(pathOriginsBySubject.keySet()); + return Collections.unmodifiableSet(set); + } + + public Set getTypes(Node subject) { + if (!typesBySubject.containsKey(subject)) return Collections.emptySet(); + return Collections.unmodifiableSet(typesBySubject.get(subject)); + } + + public Map> getTypesPerSubject() { + return Collections.unmodifiableMap(typesBySubject); + } - protected Var ifSubjectIsNotD2ThenGenerateVariableNew(Node subj) { - if (subj == null) throw new IllegalArgumentException("Subject node cannot be null."); - final String varName; - if (subj.isURI()) { - String sub = subj.getURI(); - if (rdfd1.getClassSet().contains(sub) && !rdfd2.getClassSet().contains(sub)) - varName = classVarTable.getOrCreateVar(sub, TEMPLATE_VAR_CLASS); - else if (rdfd1.isInObjectPropertySet(sub) && !rdfd2.isInObjectPropertySet(sub)) - varName = objectProperyVarTable.getOrCreateVar(sub, TEMPLATE_VAR_PROP_OBJ); - else if (rdfd1.isInDatatypePropertySet(sub) && !rdfd2.isInDatatypePropertySet(sub)) - varName = datatypePropertyVarTable.getOrCreateVar(sub, TEMPLATE_VAR_PROP_DT); - else if (rdfd1.isInRDFVocabulary(sub) && !rdfd2.isInRDFVocabulary(sub)) - varName = rdfVocVarTable.getOrCreateVar(sub, "rdf"); - else - // We assume by exclusion that sub is an individual. - // XXX is that assumption correct? - varName = individualVarTable.getOrCreateVar(sub, TEMPLATE_VAR_INDIVIDUAL); - } else if (subj.isLiteral()) { - varName = literalVarTable.getOrCreateVar(subj.getLiteralValue().toString(), - QueryOperator.TEMPLATE_VAR_LITERAL); - } else return null; - if (varName == null) throw new IllegalStateException("Subject node generated a null variable name."); - return Var.alloc(varName); } /** - * Creates a template variable for the given property URI if that property - * exists in the source dataset and one of the following conditions is met: - *
    - *
  • the property is not present in the target dataset, or - *
  • flag onlyIfNotInTargetDS is set to false. - *
+ * Computes the least general generalization of the given query, i.e. + * the queries that are satisfiable with a target dataset and contain as many + * query patterns from the original query as possible. * - * @param predicate - * the predicate URI (if not a URI the method will return null). - * @param onlyIfNotInTargetDS - * an override flag that forces the variable to be generated once - * it's found in the source dataset, regardless of its presence in - * the target dataset. - * @return + * If the method returns a singleton, i.e. a single general query, this should + * not be the same object as the given query, that is, the method should not + * alter the original query but clone it instead. + * + * @param q + * the query to be generalized + * + * @return the generalized queries. */ - protected Var makeTplVariableFromPredicate(Node predicate, boolean onlyIfNotInTargetDS) { - if (predicate == null) throw new IllegalArgumentException("Predicate node cannot be null."); - log.trace("Presence of properties in target dataset {} matter", onlyIfNotInTargetDS ? "DOES" : "does NOT"); - final String varName; - if (!predicate.isURI()) return null; - String p = predicate.getURI(); - log.debug("Inspecting predicate '{}' for generalization.", p); - log.trace("rdfd1 object property list : {}", rdfd1.getObjectPropertySet()); - log.trace("rdfd1 datatype property list : {}", rdfd1.getDatatypePropertySet()); - log.trace("rdfd2 object property list : {}", rdfd2.getObjectPropertySet()); - log.trace("rdfd2 datatype property list : {}", rdfd2.getDatatypePropertySet()); - - // XXX what if the property exist in the other dataset but is used as an - // object/data property unlike the first dataset? - if (rdfd1.isInObjectPropertySet(p) && (!onlyIfNotInTargetDS || !rdfd2.isInObjectPropertySet(p))) - varName = objectProperyVarTable.getOrCreateVar(p, TEMPLATE_VAR_PROP_OBJ); - else if (rdfd1.isInDatatypePropertySet(p) && (!onlyIfNotInTargetDS || !rdfd2.isInDatatypePropertySet(p))) - varName = datatypePropertyVarTable.getOrCreateVar(p, TEMPLATE_VAR_PROP_DT); - else if (rdfd1.isInPropertySet(p)) { - // If we don't care if the property exists in the target dataset, generate the - // template variable, but do take a peek at the target dataset anyway, to decide - // what kind of property it shall be. - if (!onlyIfNotInTargetDS) { - String prefix; - VarMapping table; - if (rdfd2.isInObjectPropertySet(p)) { - prefix = TEMPLATE_VAR_PROP_OBJ; - table = objectProperyVarTable; - } else if (rdfd2.isInDatatypePropertySet(p)) { - prefix = TEMPLATE_VAR_PROP_DT; - table = datatypePropertyVarTable; - } else { - prefix = TEMPLATE_VAR_PROP_PLAIN; - table = plainPropertyVarTable; - } - varName = table.getOrCreateVar(p, prefix); - } else if (!(rdfd2.isInPropertySet(p) || rdfd2.isInObjectPropertySet(p) - || rdfd2.isInDatatypePropertySet(p))) { - log.debug(" ... is a plain property in <{}> and not in <{}>", rdfd1, rdfd2); - varName = plainPropertyVarTable.getOrCreateVar(p, TEMPLATE_VAR_PROP_PLAIN); - } else { - log.debug(" ... is present in target dataset <{}> and override is not enable. Will not generalize.", - rdfd2); - return null; - } - } - // Add a case for "plain" properties - else { - log.debug(" ... is either present both in <{}> and <{}>, or in neither. Will not generalize.", rdfd1, - rdfd2); - return null; - } - if (varName == null) throw new IllegalStateException("Predicate node generated a null variable name."); - return Var.alloc(varName); - } + public Set generalize(Query q); } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/ProgrammableGeneralizer.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/ProgrammableGeneralizer.java index 857116b..c7cc0d6 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core4/ProgrammableGeneralizer.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/ProgrammableGeneralizer.java @@ -1,6 +1,5 @@ package uk.ac.open.kmi.squire.core4; -import java.util.HashSet; import java.util.Iterator; import java.util.Set; @@ -19,18 +18,23 @@ import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; /** - * A {@link Generalizer} that is able to return multiple generalized queries - * depending on the parameters passed to the generalize operation. + * A {@link BasicGeneralizer} that is able to return multiple generalized + * queries depending on the parameters passed to the generalize operation. + * + * TODO decommission this implementation if redundant * * @author alessandro * */ -public class ProgrammableGeneralizer extends Generalizer { +public class ProgrammableGeneralizer extends BasicGeneralizer { private final Logger log = LoggerFactory.getLogger(getClass()); + private Query originalQuery; + public ProgrammableGeneralizer(Query query, IRDFDataset d1, IRDFDataset d2) { - super(query, d1, d2); + super(d1, d2); + this.originalQuery = query; } /** @@ -43,10 +47,9 @@ public ProgrammableGeneralizer(Query query, IRDFDataset d1, IRDFDataset d2) { * @return */ public Set generalizeMultiple() { - Set result = new HashSet<>(); - Query qGeneral = super.generalize(); - result.add(qGeneral); - generalizeStep(qGeneral, result); + Set result = super.generalize(this.originalQuery); + for (Query q : result) + generalizeStep(q, result); log.debug("Total {} generalized queries", result.size()); return result; } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryRecommendator4.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryRecommendator4.java index 8c7ed62..6080064 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryRecommendator4.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryRecommendator4.java @@ -2,7 +2,9 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedList; import java.util.List; +import java.util.Set; import org.apache.jena.query.Query; @@ -17,7 +19,7 @@ */ public class QueryRecommendator4 extends AbstractQueryRecommender { - protected Query qTemplate; + protected Set qTemplate; /* * This is for storing the output of the QueryRecommendator @@ -33,19 +35,25 @@ public QueryRecommendator4(Query query, IRDFDataset d1, IRDFDataset d2, float re public void buildRecommendation() { // GENERALIZE... - Generalizer qG = new Generalizer(getQuery(), getSourceDataset(), getTargetDataset()); - this.qTemplate = qG.generalize(); + BasicGeneralizer qG + = new BasicGeneralizer(getSourceDataset(), getTargetDataset()); + //= new ClassSignatureGeneralizer(getSourceDataset(), getTargetDataset()); + this.qTemplate = qG.generalize(getQuery()); + List recoms = new LinkedList<>(); // SPECIALIZE... - Specializer qS = new Specializer(getQuery(), this.qTemplate, getSourceDataset(), getTargetDataset(), qG, - getMetrics().resultTypeSimilarityCoefficient, getMetrics().queryRootDistanceCoefficient, - getMetrics().resultSizeSimilarityCoefficient, getMetrics().querySpecificityDistanceCoefficient, - this.token); - qS.register(this); - qS.specialize(); + for (Query q : this.qTemplate) { + Specializer qS = new Specializer(getQuery(), q, getSourceDataset(), getTargetDataset(), qG, + getMetrics().resultTypeSimilarityCoefficient, getMetrics().queryRootDistanceCoefficient, + getMetrics().resultSizeSimilarityCoefficient, getMetrics().querySpecificityDistanceCoefficient, + this.token); + qS.register(this); + qS.specialize(); + recoms.addAll(qS.getRecommendations()); + } // RANK... - rankRecommendations(qS.getRecommendations()); + rankRecommendations(recoms); } protected void rankRecommendations(List qRList) { diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryOperator.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryTransform.java similarity index 93% rename from squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryOperator.java rename to squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryTransform.java index 7d91a43..c8c6c95 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryOperator.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/QueryTransform.java @@ -3,7 +3,7 @@ import uk.ac.open.kmi.squire.entityvariablemapping.GeneralVarMapping; import uk.ac.open.kmi.squire.entityvariablemapping.VarMapping; -public abstract class QueryOperator extends AbstractQueryRecommendationObservable { +public abstract class QueryTransform extends AbstractQueryRecommendationObservable { protected enum NodeRole { OBJECT, PREDICATE, SUBJECT @@ -19,7 +19,7 @@ protected enum NodeRole { protected VarMapping classVarTable, datatypePropertyVarTable, individualVarTable, literalVarTable, objectProperyVarTable, rdfVocVarTable, plainPropertyVarTable; - protected QueryOperator() { + protected QueryTransform() { classVarTable = new GeneralVarMapping(); individualVarTable = new GeneralVarMapping(); literalVarTable = new GeneralVarMapping(); diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/core4/Specializer.java b/squire/src/main/java/uk/ac/open/kmi/squire/core4/Specializer.java index 9f51713..b27fe86 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/core4/Specializer.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/core4/Specializer.java @@ -5,6 +5,7 @@ */ package uk.ac.open.kmi.squire.core4; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -47,12 +48,12 @@ /** * - * XXX for a {@link QueryOperator} in itself, this class has too much control + * XXX for a {@link QueryTransform} in itself, this class has too much control * over other operations. * * @author carloallocca */ -public class Specializer extends QueryOperator { +public class Specializer extends QueryTransform { private static String OPID_INSTANTIATE = "I"; private static String OPID_TP_REMOVE = "R"; @@ -76,7 +77,7 @@ public class Specializer extends QueryOperator { */ private final List specializables = new ArrayList<>(); - public Specializer(Query qo, Query qr, IRDFDataset d1, IRDFDataset d2, Generalizer previousOp, + public Specializer(Query qo, Query qr, IRDFDataset d1, IRDFDataset d2, BasicGeneralizer previousOp, float resultTypeSimilarityDegree, float queryRootDistanceDegree, float resultSizeSimilarityDegree, float querySpecificityDistanceDegree, String token) { super(); @@ -346,12 +347,12 @@ public List specialize() { List qSolList = parentNode.getQueryTempVarSolutionSpace(); log.debug("queryChild Instantiation step: {}", queryChild.toString()); log.debug("qSolList size = {} ", qSolList.size()); - int c = 0; + DecimalFormat format = new DecimalFormat("##.##%"); + for (QuerySolution sol : qSolList) { log.trace("Solution {}: {}", c++, sol); Query childQueryCopy = QueryFactory.create(queryChild.toString()); - // [ REPLACED ] Query childQueryCopyInstanciated= // applyInstanciationOP(childQueryCopy, sol); Set qTempVarSet = getQueryTemplateVariableSet(childQueryCopy); @@ -361,24 +362,21 @@ public List specialize() { // that is going to be instantiated List templVarEntityQoQrInstanciatedList = new ArrayList<>(); for (Var tv : qTempVarSet) { - // log.info("Var tv: " +tv.getVarName()); - // log.info("Var tv: " +tv.getName()); RDFNode node = sol.get(tv.getName()); - // log.info("RDFNode node: " +node.toString()); - - SPARQLQueryInstantiation instOP = new SPARQLQueryInstantiation(); - childQueryCopyInstanciated = instOP.instantiateVarTemplate(childQueryCopy, tv, - node.asNode()); - - String entityQo = getEntityQo(tv); - String entityQr = node.asNode().getURI(); // as it is the name of a concrete node and not of - // a variable; - VarTemplateAndEntityQoQr item = new VarTemplateAndEntityQoQr(tv, entityQo, entityQr); - templVarEntityQoQrInstanciatedList.add(item); + if (node != null && node.asNode().isURI()) { + // XXX The operator is stateful so we have to re-instantiate it... + SPARQLQueryInstantiation op_inst = new SPARQLQueryInstantiation(); + childQueryCopyInstanciated = op_inst.instantiateVarTemplate(childQueryCopy, tv, + node.asNode()); + String entityQo = getEntityQo(tv); + String entityQr = node.asNode().getURI(); // Expected to be concrete and named + VarTemplateAndEntityQoQr item = new VarTemplateAndEntityQoQr(tv, entityQo, entityQr); + templVarEntityQoQrInstanciatedList.add(item); + } else log.error("Unexpected state of node {} for template variable '{}'", node, tv); } if (childQueryCopyInstanciated != null) { // 4.1.2. Check if it is alredy indexed and therefore generated - if (!(isQueryIndexed(childQueryCopyInstanciated))) { + if (!isQueryIndexed(childQueryCopyInstanciated)) { // add qWithoutTriple to the index addQueryToIndexIFAbsent(childQueryCopyInstanciated); @@ -399,8 +397,8 @@ public List specialize() { // ===== - log.info("qR score ======" + childNode.getqRScore()); - log.info("qR " + childNode.getTransformedQuery()); + log.debug("qR score ======" + childNode.getqRScore()); + log.debug("qR " + childNode.getTransformedQuery()); notifyQueryRecommendation(childNode.getTransformedQuery(), childNode.getqRScore()); @@ -411,6 +409,8 @@ public List specialize() { } } } + double ratio = (double) c / qSolList.size(); + if (0 == c % 10) log.info(" ... {} done ({} of {}) ", format.format(ratio), c, qSolList.size()); } // end for } } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryGPESim.java b/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryGPESim.java index 53535cb..9ea841a 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryGPESim.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryGPESim.java @@ -14,7 +14,7 @@ import org.apache.jena.sparql.syntax.ElementWalker; import org.slf4j.LoggerFactory; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQGraphPatternExpressionVisitor; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQGraphPatternExpressionAggregator; /** * @@ -35,15 +35,15 @@ public QueryGPESim() { public float computeQueryPatternsSim(Query qO, Query qR) { // ...get the GPE of qOri - SQGraphPatternExpressionVisitor gpeVisitorO = new SQGraphPatternExpressionVisitor(); + SQGraphPatternExpressionAggregator gpeVisitorO = new SQGraphPatternExpressionAggregator(); ElementWalker.walk(qO.getQueryPattern(), gpeVisitorO); - Set qOGPE = gpeVisitorO.getQueryGPE(); + Set qOGPE = gpeVisitorO.getMembersInQuery(); // log.info("qOGPE : " +qOGPE.toString()); // ...get the GPE of qRec - SQGraphPatternExpressionVisitor gpeVisitorR = new SQGraphPatternExpressionVisitor(); + SQGraphPatternExpressionAggregator gpeVisitorR = new SQGraphPatternExpressionAggregator(); ElementWalker.walk(qR.getQueryPattern(), gpeVisitorR); - Set qRGPE = gpeVisitorR.getQueryGPE(); + Set qRGPE = gpeVisitorR.getMembersInQuery(); // log.info("qRGPE : " +qRGPE.toString()); // this is as it was before 13-04-2017 @@ -59,15 +59,15 @@ public float computeQueryPatternsSim(Query qO, Query qR) { public float computeQueryPatternsSimWithWeighedNonCommonTriplePattern(Query qO, Query qR) { // ...get the GPE of qOri - SQGraphPatternExpressionVisitor gpeVisitorO = new SQGraphPatternExpressionVisitor(); + SQGraphPatternExpressionAggregator gpeVisitorO = new SQGraphPatternExpressionAggregator(); ElementWalker.walk(qO.getQueryPattern(), gpeVisitorO); - Set qOGPE = gpeVisitorO.getQueryGPE(); + Set qOGPE = gpeVisitorO.getMembersInQuery(); // log.info("qOGPE : " +qOGPE.toString()); // ...get the GPE of qRec - SQGraphPatternExpressionVisitor gpeVisitorR = new SQGraphPatternExpressionVisitor(); + SQGraphPatternExpressionAggregator gpeVisitorR = new SQGraphPatternExpressionAggregator(); ElementWalker.walk(qR.getQueryPattern(), gpeVisitorR); - Set qRGPE = gpeVisitorR.getQueryGPE(); + Set qRGPE = gpeVisitorR.getMembersInQuery(); // log.info("qRGPE : " +qRGPE.toString()); // //this is as it was before 13-04-2017 diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryResultSizeDistance.java b/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryResultSizeDistance.java index 6ad1534..72c3187 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryResultSizeDistance.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QueryResultSizeDistance.java @@ -26,9 +26,9 @@ import uk.ac.open.kmi.squire.rdfdataset.FileBasedRDFDataset; import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; import uk.ac.open.kmi.squire.rdfdataset.SparqlIndexedDataset; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQClassVisitor; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQDatatypePropertyVisitor; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQObjectPropertyVisitor; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQClassAggregator; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQDatatypePropertyAggregator; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQObjectPropertyAggregator; /** * @@ -389,32 +389,32 @@ private int computeSPARQLEndPointQueryResultSetCardinality(Query qOri, IRDFDatas } private Set extractQueryClassSet(Query qOri, IRDFDataset d1) { - SQClassVisitor v = new SQClassVisitor(d1); + SQClassAggregator v = new SQClassAggregator(d1); // This will walk through all parts of the query ElementWalker.walk(qOri.getQueryPattern(), v); System.out.println("[QueryResultSizeSimilarity::extractQueryClassSet] v.getQueryClassSet() " - + v.getQueryClassSet().toString()); + + v.getMembersInQuery().toString()); - return v.getQueryClassSet(); + return v.getMembersInQuery(); } private Set extractQueryDatatypePropertySet(Query qOri, IRDFDataset d1) { - SQDatatypePropertyVisitor v = new SQDatatypePropertyVisitor(d1); + SQDatatypePropertyAggregator v = new SQDatatypePropertyAggregator(d1); // This will walk through all parts of the query ElementWalker.walk(qOri.getQueryPattern(), v); System.out.println("[QueryResultSizeSimilarity::extractQueryDatatypePropertySet] v.ObjectProperty() " - + v.getQueryDatatypePropertySet().toString()); + + v.getMembersInQuery().toString()); - return v.getQueryDatatypePropertySet(); + return v.getMembersInQuery(); } private Set extractQueryObjectPropertySet(Query qOri, IRDFDataset d1) { - SQObjectPropertyVisitor v = new SQObjectPropertyVisitor(d1); + SQObjectPropertyAggregator v = new SQObjectPropertyAggregator(d1); // This will walk through all parts of the query ElementWalker.walk(qOri.getQueryPattern(), v); System.out.println("[QueryResultSizeSimilarity::extractQueryObjectPropertySet] v.ObjectProperty() " - + v.getQueryObjectPropertySet().toString()); - return v.getQueryObjectPropertySet(); + + v.getMembersInQuery().toString()); + return v.getMembersInQuery(); } } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QuerySpecificityDistance.java b/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QuerySpecificityDistance.java index fb798f2..0116255 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QuerySpecificityDistance.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/evaluation/QuerySpecificityDistance.java @@ -1,8 +1,3 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ package uk.ac.open.kmi.squire.evaluation; import java.util.List; @@ -12,8 +7,8 @@ import org.apache.jena.sparql.core.TriplePath; import org.apache.jena.sparql.syntax.ElementWalker; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQGraphPatternExpressionVisitor; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQVariableVisitor; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQGraphPatternExpressionAggregator; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQVariableAggregator; /** * @@ -21,44 +16,44 @@ */ public class QuerySpecificityDistance { + public float computeQSDwrtQueryTP(Query originalQuery, Query qR) { + float dist = 0; + + // QueryGPESim simQuery= new QueryGPESim(); + // sim = simQuery.computeQueryPatternsSim(originalQuery, qR); + // + //// sim=simQuery.computeQueryPatternsSimWithWeighedNonCommonTriplePattern(qR, + // qR); + // return sim; + + // ...get the GPE of qOri + SQGraphPatternExpressionAggregator gpeVisitorO = new SQGraphPatternExpressionAggregator(); + ElementWalker.walk(originalQuery.getQueryPattern(), gpeVisitorO); + Set qOGPE = gpeVisitorO.getMembersInQuery(); + + // ...get the GPE of qRec + SQGraphPatternExpressionAggregator gpeVisitorR = new SQGraphPatternExpressionAggregator(); + ElementWalker.walk(qR.getQueryPattern(), gpeVisitorR); + Set qRGPE = gpeVisitorR.getMembersInQuery(); + + dist = 1 - tpOverlapRate(qOGPE, qRGPE); + + return dist; + + } + public float computeQSDwrtQueryVariable(Query qO, Query qR) { float dist; - List qOvarList = computeQueryVariableSet(qO); + Set qOvarList = computeQueryVariableSet(qO); // log.info("qOvarList " +qOvarList.toString()); - List qRvarList = computeQueryVariableSet(qR); + Set qRvarList = computeQueryVariableSet(qR); // log.info("qRvarList " +qRvarList.toString()); // dist = computeQSsim(qOvarList, qRvarList); dist = 1 - varOverlapRate(qOvarList, qRvarList); return dist; } - private List computeQueryVariableSet(Query qO) { - SQVariableVisitor v = new SQVariableVisitor(); - // ... This will walk through all parts of the query - ElementWalker.walk(qO.getQueryPattern(), v); - return v.getQueryVariableSet(); - } - - private float varOverlapRate(List qOvarList, List qRvarList) { - float overlapRate = 0; - if (qOvarList.size() > 0 && qRvarList.size() > 0) { - - // compute the intersectionVarCardinality - int intersectionVarCardinality = 0; - for (String st : qOvarList) { - if (qRvarList.contains(st)) { - intersectionVarCardinality = intersectionVarCardinality + 1; - } - } - - float unionVarCardinality = computeUnionCardinalityVar(qOvarList, qRvarList, intersectionVarCardinality); - overlapRate = (float) ((1.0 * intersectionVarCardinality) / unionVarCardinality); - return overlapRate; - } - return overlapRate; - } - private float computeQSsim(List qOvarList, List qRvarList) { float sim = 0; if (qOvarList.size() > 0 && qRvarList.size() > 0) { @@ -69,30 +64,29 @@ private float computeQSsim(List qOvarList, List qRvarList) { return sim; } - public float computeQSDwrtQueryTP(Query originalQuery, Query qR) { - float dist = 0; - - // QueryGPESim simQuery= new QueryGPESim(); - // sim = simQuery.computeQueryPatternsSim(originalQuery, qR); - // - //// sim=simQuery.computeQueryPatternsSimWithWeighedNonCommonTriplePattern(qR, - // qR); - // return sim; + private Set computeQueryVariableSet(Query qO) { + SQVariableAggregator v = new SQVariableAggregator(); + // ... This will walk through all parts of the query + ElementWalker.walk(qO.getQueryPattern(), v); + return v.getMembersInQuery(); + } - // ...get the GPE of qOri - SQGraphPatternExpressionVisitor gpeVisitorO = new SQGraphPatternExpressionVisitor(); - ElementWalker.walk(originalQuery.getQueryPattern(), gpeVisitorO); - Set qOGPE = gpeVisitorO.getQueryGPE(); + private int computeUnionCardinalityTP(Set qOGPE, Set qRGPE, int intersectionTPCardinality) { + return (qOGPE.size() + qRGPE.size()) - intersectionTPCardinality; - // ...get the GPE of qRec - SQGraphPatternExpressionVisitor gpeVisitorR = new SQGraphPatternExpressionVisitor(); - ElementWalker.walk(qR.getQueryPattern(), gpeVisitorR); - Set qRGPE = gpeVisitorR.getQueryGPE(); + } - dist = 1 - tpOverlapRate(qOGPE, qRGPE); + private float computeUnionCardinalityVar(Set qOvarList, Set qRvarList, + int intersectionVarCardinality) { + return qOvarList.size() + qRvarList.size() - intersectionVarCardinality; - return dist; + } + private boolean contains(Set qRGPE, TriplePath tp) { + String tpAsString = tp.toString(); + for (TriplePath tp1 : qRGPE) + if (tp1.toString().compareTo(tpAsString) == 0) return true; + return false; } private float tpOverlapRate(Set qOGPE, Set qRGPE) { @@ -116,25 +110,23 @@ private float tpOverlapRate(Set qOGPE, Set qRGPE) { return overlapRate; } - private int computeUnionCardinalityTP(Set qOGPE, Set qRGPE, int intersectionTPCardinality) { - return (qOGPE.size() + qRGPE.size()) - intersectionTPCardinality; - - } + private float varOverlapRate(Set qOvarList, Set qRvarList) { + float overlapRate = 0; + if (qOvarList.size() > 0 && qRvarList.size() > 0) { - private boolean contains(Set qRGPE, TriplePath tp) { - String tpAsString = tp.toString(); - for (TriplePath tp1 : qRGPE) { - if (tp1.toString().compareTo(tpAsString) == 0) { - return true; + // compute the intersectionVarCardinality + int intersectionVarCardinality = 0; + for (String st : qOvarList) { + if (qRvarList.contains(st)) { + intersectionVarCardinality = intersectionVarCardinality + 1; + } } - } - return false; - } - - private float computeUnionCardinalityVar(List qOvarList, List qRvarList, - int intersectionVarCardinality) { - return (qOvarList.size() + qRvarList.size()) - intersectionVarCardinality; + float unionVarCardinality = computeUnionCardinalityVar(qOvarList, qRvarList, intersectionVarCardinality); + overlapRate = (float) ((1.0 * intersectionVarCardinality) / unionVarCardinality); + return overlapRate; + } + return overlapRate; } } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/operation/InstantiateTemplateVar.java b/squire/src/main/java/uk/ac/open/kmi/squire/operation/InstantiateTemplateVar.java index 3fb0728..1b747de 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/operation/InstantiateTemplateVar.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/operation/InstantiateTemplateVar.java @@ -10,7 +10,7 @@ * * @author carloallocca */ -public class InstantiateTemplateVar implements Operation { +public class InstantiateTemplateVar implements Operator { private Logger log = LoggerFactory.getLogger(getClass()); diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/operation/Operation.java b/squire/src/main/java/uk/ac/open/kmi/squire/operation/Operation.java deleted file mode 100644 index b6aa203..0000000 --- a/squire/src/main/java/uk/ac/open/kmi/squire/operation/Operation.java +++ /dev/null @@ -1,28 +0,0 @@ -package uk.ac.open.kmi.squire.operation; - -/** - * An operation that, once applied, returns an object that can be assimilated - * with a query (e.g. a Query or set of queries). - * - * @author carloallocca, alessandro.adamou - */ -public interface Operation { - - /** - * Performs the operation. The operands are taken from the object that - * implements the operation. - * - * @return the result of applying the operation (can be e.g. a query or set - * thereof). - */ - public Q apply(); - - /** - * Returns the list of operands in the order supplied to the constructor of this - * Operation. - * - * @return the list of parameters. - */ - public Object[] getOperands(); - -} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/operation/Operator.java b/squire/src/main/java/uk/ac/open/kmi/squire/operation/Operator.java new file mode 100644 index 0000000..3bbcd4a --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/operation/Operator.java @@ -0,0 +1,34 @@ +package uk.ac.open.kmi.squire.operation; + +import uk.ac.open.kmi.squire.core4.QueryTransform; + +/** + * An operator that, once applied, returns an object that can be assimilated + * with a query (e.g. a Jena Query or set of queries). + * + * In general, operators do not implement policies, i.e. they do not "decide" + * when to be applied and when not to. Operators blindly apply what is decided + * by a {@link QueryTransform}. + * + * @author carloallocca, Alessandro Adamou + */ +public interface Operator { + + /** + * Performs the operation. The operands are taken from the object that + * implements the operation (e.g. passed to the its constructor). + * + * @return the result of applying the operation (can be e.g. a query or set + * thereof). + */ + public Q apply(); + + /** + * Returns the list of operands in the order supplied to the constructor of this + * Operation. + * + * @return the list of parameters. + */ + public Object[] getOperands(); + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/operation/RemoveTriple.java b/squire/src/main/java/uk/ac/open/kmi/squire/operation/RemoveTriple.java index f535944..daefb8c 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/operation/RemoveTriple.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/operation/RemoveTriple.java @@ -11,13 +11,13 @@ import org.apache.jena.sparql.syntax.syntaxtransform.QueryTransformOps; import uk.ac.open.kmi.squire.sparqlqueryvisitor.RemoveOpTransform; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQGraphPatternExpressionVisitor; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQGraphPatternExpressionAggregator; /** * * @author carloallocca */ -public class RemoveTriple implements Operation { +public class RemoveTriple implements Operator { private Query query; @@ -30,9 +30,9 @@ public RemoveTriple(Query q, Triple tp) { @Override public Query apply() { - SQGraphPatternExpressionVisitor gpeVisitorO = new SQGraphPatternExpressionVisitor(); + SQGraphPatternExpressionAggregator gpeVisitorO = new SQGraphPatternExpressionAggregator(); ElementWalker.walk(this.query.getQueryPattern(), gpeVisitorO); - if (gpeVisitorO.getQueryGPE().size() <= 1) return this.query; + if (gpeVisitorO.getMembersInQuery().size() <= 1) return this.query; RemoveOpTransform rOpTransform = new RemoveOpTransform(this.query, this.triple); Query qPostOp = QueryTransformOps.transform(this.query, rOpTransform); // if(qPostOp.getGroupBy()!=null){ diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/operation/SPARQLQuerySatisfiable.java b/squire/src/main/java/uk/ac/open/kmi/squire/operation/SPARQLQuerySatisfiable.java index b9c8ace..aa1358f 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/operation/SPARQLQuerySatisfiable.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/operation/SPARQLQuerySatisfiable.java @@ -9,6 +9,7 @@ import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; +import java.util.Set; import org.apache.jena.ontology.OntModel; import org.apache.jena.ontology.OntModelSpec; @@ -28,7 +29,7 @@ import uk.ac.open.kmi.squire.core4.AbstractQueryRecommendationObservable; import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; import uk.ac.open.kmi.squire.rdfdataset.SparqlIndexedDataset; -import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQVariableVisitor; +import uk.ac.open.kmi.squire.sparqlqueryvisitor.SQVariableAggregator; import uk.ac.open.kmi.squire.utils.SparqlUtils; import uk.ac.open.kmi.squire.utils.SparqlUtils.SparqlException; @@ -62,7 +63,6 @@ public boolean isSatisfiable(Query q, IRDFDataset d) { // ResultSet results = qexec.execSelect(); // resList = ResultSetFormatter.toList(results); //.out(, results, q); // return resList.size() >= 1; - try { QueryExecution qexec = QueryExecutionFactory.sparqlService(datasetPath, qTMP, (String) d.getGraph()); ResultSet results = qexec.execSelect(); @@ -70,7 +70,10 @@ public boolean isSatisfiable(Query q, IRDFDataset d) { resList = ResultSetFormatter.toList(results); // .out(, results, q); return resList.size() >= 1; } catch (Exception ex) { - log.error("", ex); + log.error("Query failed"); + log.error("Dataset path was: <{}>", datasetPath); + log.error("Query was:\r\n{}", qTMP); + log.error("Exception stack trace follows.", ex); return false; } @@ -109,7 +112,7 @@ public boolean isSatisfiable(Query q, IRDFDataset d) { public boolean isSatisfiableWRTProjectVar(Query qRec) { - List qOvarList = computeQueryVariableSet(qRec); + Set qOvarList = computeQueryVariableSet(qRec); // System.out.println("[SPARQLQuerySatisfiable::isSatisfiableWRTProjectVar] 1 " // + qOvarList.toString()); @@ -142,17 +145,21 @@ public boolean isSatisfiableWrtResults(Query q, IRDFDataset rdfd2) { return cond; } catch (SparqlException e) { log.warn("Satisfiability query failed. Reason follows.", e); + log.error("Query failed"); + log.error("Dataset path was: <{}>", datasetPath); + log.error("Query was:\r\n{}", qTMP); + log.error("Reason:", e); return false; } } - private List computeQueryVariableSet(Query qO) { - SQVariableVisitor v = new SQVariableVisitor(); + private Set computeQueryVariableSet(Query qO) { + SQVariableAggregator v = new SQVariableAggregator(); // ... This will walk through all parts of the query ElementWalker.walk(qO.getQueryPattern(), v); // System.out.println("[QuerySpecificityDistance::computeQueryVariableSet] // v.getQueryClassSet() " + v.getQueryClassSet().toString()); - return v.getQueryVariableSet(); + return v.getMembersInQuery(); } } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/querytemplate/SQTVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/querytemplate/SQTVisitor.java index fd16dc3..cf84ecd 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/querytemplate/SQTVisitor.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/querytemplate/SQTVisitor.java @@ -5,9 +5,9 @@ */ package uk.ac.open.kmi.squire.querytemplate; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_CLASS; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_INDIVIDUAL; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_LITERAL; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_CLASS; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_INDIVIDUAL; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_LITERAL; import java.util.ListIterator; diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/AbstractRdfDataset.java b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/AbstractRdfDataset.java index ebdfb4d..d5d0314 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/AbstractRdfDataset.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/AbstractRdfDataset.java @@ -1,8 +1,10 @@ package uk.ac.open.kmi.squire.rdfdataset; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; /** @@ -17,6 +19,9 @@ public abstract class AbstractRdfDataset implements IRDFDataset { protected Set individualSet = new HashSet<>(); protected Set literalSet = new HashSet<>(); protected Set objectPropertySet = new HashSet<>(); + + protected Map> propertyCoOc = new HashMap<>(); + protected Set rdfVocabulary = new HashSet<>(); @Override @@ -31,7 +36,7 @@ public void clear() { @Override public Set getClassSet() { - return classSignatures.keySet(); + return Collections.unmodifiableSet(classSignatures.keySet()); } @Override @@ -39,24 +44,39 @@ public Map getClassSignatures() { return this.classSignatures; } + @Override + public int getCoOccurrences(String property1, String property2) { + if (propertyCoOc.containsKey(property1) && propertyCoOc.get(property1).containsKey(property2)) + return propertyCoOc.get(property1).get(property2).intValue(); + else if (propertyCoOc.containsKey(property2) && propertyCoOc.get(property2).containsKey(property1)) + return propertyCoOc.get(property2).get(property1).intValue(); + return 0; + } + + @Override + public Map getCoOccurringProperties(String property) { + if (propertyCoOc.containsKey(property)) return Collections.unmodifiableMap(propertyCoOc.get(property)); + return Collections.emptyMap(); + } + @Override public Set getDatatypePropertySet() { - return datatypePropertySet; + return Collections.unmodifiableSet(datatypePropertySet); } @Override public Set getIndividualSet() { - return individualSet; + return Collections.unmodifiableSet(individualSet); } @Override public Set getLiteralSet() { - return literalSet; + return Collections.unmodifiableSet(literalSet); } @Override public Set getObjectPropertySet() { - return objectPropertySet; + return Collections.unmodifiableSet(objectPropertySet); } /** @@ -69,12 +89,12 @@ public Set getPropertySet() { Set result = new HashSet<>(); result.addAll(getDatatypePropertySet()); result.addAll(getObjectPropertySet()); - return result; + return Collections.unmodifiableSet(result); } @Override public Set getRDFVocabulary() { - return rdfVocabulary; + return Collections.unmodifiableSet(rdfVocabulary); } @Override @@ -112,4 +132,32 @@ public boolean isInRDFVocabulary(String rdfEntity) { return rdfVocabulary.contains(rdfEntity); } + protected Map> buildPropertyCoOccurrence() { + Map> res = new HashMap<>(); + // Co-occurrence with details on the classes. + Map>> cooc = new HashMap<>(); + for (Entry entry : classSignatures.entrySet()) { + ClassSignature sign = entry.getValue(); + for (String p1 : sign.listPathOrigins()) { + if (!cooc.containsKey(p1)) cooc.put(p1, new HashMap<>()); + for (String p2 : sign.listPathOrigins()) + if (p1 != p2) { + if (!cooc.get(p1).containsKey(p2)) cooc.get(p1).put(p2, new HashSet<>()); + cooc.get(p1).get(p2).add(sign.getOwlClass()); + if (!cooc.containsKey(p2)) cooc.put(p2, new HashMap<>()); + if (!cooc.get(p2).containsKey(p1)) cooc.get(p2).put(p1, new HashSet<>()); + cooc.get(p2).get(p1).add(sign.getOwlClass()); + } + + } + } + for (Entry>> entry : cooc.entrySet()) { + res.put(entry.getKey(), new HashMap<>()); + Map row = res.get(entry.getKey()); + for (Entry> e2 : entry.getValue().entrySet()) + if (!row.containsKey(e2.getKey())) row.put(e2.getKey(), e2.getValue().size()); + } + return res; + } + } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/FileBasedRDFDataset.java b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/FileBasedRDFDataset.java index 2c477ec..40c2517 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/FileBasedRDFDataset.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/FileBasedRDFDataset.java @@ -34,8 +34,8 @@ public class FileBasedRDFDataset extends AbstractRdfDataset { */ private Object datasetPath; - private final Logger log = LoggerFactory.getLogger(getClass()); private OntModel inf = null; + private final Logger log = LoggerFactory.getLogger(getClass()); public FileBasedRDFDataset(String rdfDatasetFilePath) { InputStream in = null; @@ -157,6 +157,11 @@ public boolean isIndexed() { throw new UnsupportedOperationException("Not supported yet."); } + @Override + public void rebuildPropertyCoOccurrenceMap() { + this.propertyCoOc = buildPropertyCoOccurrence(); + } + @Override public void run() { throw new UnsupportedOperationException("This class is not associated to a process."); diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/IRDFDataset.java b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/IRDFDataset.java index a707975..5903718 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/IRDFDataset.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/IRDFDataset.java @@ -39,6 +39,10 @@ public interface IRDFDataset extends Runnable { public Map getClassSignatures(); + public int getCoOccurrences(String property1, String property2); + + public Map getCoOccurringProperties(String property1); + public Set getDatatypePropertySet(); public Object getEndPointURL(); @@ -71,6 +75,8 @@ public interface IRDFDataset extends Runnable { public boolean isInRDFVocabulary(String rdfEntity); + public void rebuildPropertyCoOccurrenceMap(); + public void setGraph(Object path); public void setPath(Object path); diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/InMemoryRdfDataset.java b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/InMemoryRdfDataset.java new file mode 100644 index 0000000..19f663a --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/InMemoryRdfDataset.java @@ -0,0 +1,132 @@ +package uk.ac.open.kmi.squire.rdfdataset; + +/** + * compute() methods do nothing. The dataset can only be populated through add, + * remove and clear methods. + * + * @author alessandro + * + */ +public class InMemoryRdfDataset extends AbstractRdfDataset implements WritableRdfDataset { + + @Override + public void addDatatypeProperty(String uri) { + this.datatypePropertySet.add(uri); + } + + @Override + public void addIndividual(String uri) { + this.individualSet.add(uri); + } + + @Override + public void addLiteral(String value) { + this.literalSet.add(value); + } + + @Override + public void addObjectProperty(String uri) { + this.objectPropertySet.add(uri); + } + + @Override + public void clearDatatypeProperties() { + this.datatypePropertySet.clear(); + } + + @Override + public void clearIndividuals() { + this.individualSet.clear(); + } + + @Override + public void clearLiterals() { + this.literalSet.clear(); + } + + @Override + public void clearObjectProperties() { + this.objectPropertySet.clear(); + } + + @Override + public void computeClassSet() { + } + + @Override + public void computeDataTypePropertySet() throws BootedException { + } + + @Override + public void computeIndividualSet() { + } + + @Override + public void computeLiteralSet() { + } + + @Override + public void computeObjectPropertySet() throws BootedException { + } + + @Override + public void computePropertySet() { + } + + @Override + public void computeRDFVocabularySet() { + } + + @Override + public Object getEndPointURL() { + return null; + } + + @Override + public Object getGraph() { + return null; + } + + @Override + public boolean isIndexed() { + return true; + } + + @Override + public void rebuildPropertyCoOccurrenceMap() { + this.propertyCoOc = buildPropertyCoOccurrence(); + } + + @Override + public void removeDatatypeProperty(String uri) { + this.datatypePropertySet.remove(uri); + } + + @Override + public void removeIndividual(String uri) { + this.individualSet.remove(uri); + } + + @Override + public void removeLiteral(String value) { + this.literalSet.remove(value); + } + + @Override + public void removeObjectProperty(String uri) { + this.objectPropertySet.remove(uri); + } + + @Override + public void run() { + } + + @Override + public void setGraph(Object path) { + } + + @Override + public void setPath(Object path) { + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/SparqlIndexedDataset.java b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/SparqlIndexedDataset.java index 32c1d9e..e96d3e0 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/SparqlIndexedDataset.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/SparqlIndexedDataset.java @@ -75,6 +75,7 @@ public SparqlIndexedDataset(String urlAddress, String graphName, boolean replaci RDFDatasetIndexer instance = RDFDatasetIndexer.getInstance(); this.signatureDoc = instance.getSignature(this.endpointURL, this.graphName); loadAll(); + this.propertyCoOc = buildPropertyCoOccurrence(); } @Override @@ -269,6 +270,13 @@ public boolean isInPropertySet(String propertyUri) { return super.isInPropertySet(propertyUri); } + @Override + public void rebuildPropertyCoOccurrenceMap() { + throw new UnsupportedOperationException( + "Class " + getClass().getName() + " does not support programmatic rebuilding of its indices." + + " They are fixed once built by the constructor method."); + } + @Override public void run() { System.out.println("[SPARQLEndPoint:run()] run is in execution...."); diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/WritableRdfDataset.java b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/WritableRdfDataset.java new file mode 100644 index 0000000..9655c4c --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/rdfdataset/WritableRdfDataset.java @@ -0,0 +1,29 @@ +package uk.ac.open.kmi.squire.rdfdataset; + +public interface WritableRdfDataset extends IRDFDataset { + + public void addDatatypeProperty(String uri); + + public void addIndividual(String uri); + + public void addLiteral(String value); + + public void addObjectProperty(String uri); + + public void clearDatatypeProperties(); + + public void clearIndividuals(); + + public void clearLiterals(); + + public void clearObjectProperties(); + + public void removeDatatypeProperty(String uri); + + public void removeIndividual(String uri); + + public void removeLiteral(String value); + + public void removeObjectProperty(String uri); + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/AbstractSQAggregator.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/AbstractSQAggregator.java new file mode 100644 index 0000000..65955d3 --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/AbstractSQAggregator.java @@ -0,0 +1,19 @@ +package uk.ac.open.kmi.squire.sparqlqueryvisitor; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +import org.apache.jena.sparql.syntax.ElementVisitorBase; + +public abstract class AbstractSQAggregator extends ElementVisitorBase { + + protected Collection datasetEntitySet; + + protected Set queryEntitySet = new HashSet<>(); + + public Set getMembersInQuery() { + return queryEntitySet; + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassAggregator.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassAggregator.java new file mode 100644 index 0000000..ae947f4 --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassAggregator.java @@ -0,0 +1,39 @@ +package uk.ac.open.kmi.squire.sparqlqueryvisitor; + +import java.util.ListIterator; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.syntax.ElementPathBlock; + +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; + +/** + * + * @author carloallocca + */ +public class SQClassAggregator extends AbstractSQAggregator { + + public SQClassAggregator(IRDFDataset d1) { + if (d1 == null) throw new IllegalArgumentException("The dataset cannot be null"); + this.datasetEntitySet = d1.getClassSet(); + } + + @Override + public void visit(ElementPathBlock el) { + if (el == null) throw new IllegalArgumentException("The ElementPathBlock is null"); + ListIterator it = el.getPattern().iterator(); + while (it.hasNext()) { + final TriplePath tp = it.next(); + Node subject = tp.getSubject(); + // SUBJECT + if (subject.isURI() && this.datasetEntitySet.contains(subject.getURI())) + this.queryEntitySet.add(subject.getURI()); + // OBJECT + Node object = tp.getObject(); + if (object.isURI() && this.datasetEntitySet.contains(object.getURI())) + this.queryEntitySet.add(object.getURI()); + } + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassVisitor.java deleted file mode 100644 index 2301c31..0000000 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQClassVisitor.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package uk.ac.open.kmi.squire.sparqlqueryvisitor; - -import java.util.Collection; -import java.util.HashSet; -import java.util.ListIterator; -import java.util.Set; - -import org.apache.jena.graph.Node; -import org.apache.jena.sparql.core.TriplePath; -import org.apache.jena.sparql.syntax.ElementAssign; -import org.apache.jena.sparql.syntax.ElementBind; -import org.apache.jena.sparql.syntax.ElementData; -import org.apache.jena.sparql.syntax.ElementDataset; -import org.apache.jena.sparql.syntax.ElementExists; -import org.apache.jena.sparql.syntax.ElementFilter; -import org.apache.jena.sparql.syntax.ElementGroup; -import org.apache.jena.sparql.syntax.ElementMinus; -import org.apache.jena.sparql.syntax.ElementNamedGraph; -import org.apache.jena.sparql.syntax.ElementNotExists; -import org.apache.jena.sparql.syntax.ElementOptional; -import org.apache.jena.sparql.syntax.ElementPathBlock; -import org.apache.jena.sparql.syntax.ElementService; -import org.apache.jena.sparql.syntax.ElementSubQuery; -import org.apache.jena.sparql.syntax.ElementTriplesBlock; -import org.apache.jena.sparql.syntax.ElementUnion; -import org.apache.jena.sparql.syntax.ElementVisitorBase; - -import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; - -/** - * - * @author carloallocca - */ -public class SQClassVisitor extends ElementVisitorBase { - - private IRDFDataset d; - private Collection datasetClassSet; - - private Set queryClassSet = new HashSet(); - - public SQClassVisitor(IRDFDataset d1) { - if (d1 == null) { - throw new IllegalStateException("[SQClassVisitor]The IRDFDataset d1 is null!!"); - } - this.d = d1; - this.datasetClassSet = d1.getClassSet(); - } - - @Override - public void visit(ElementPathBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementPathBlock el)] "); - if (el == null) { - throw new IllegalStateException( - "[SQClassVisitor::visit(ElementPathBlock el)] The ElementPathBlock is null!!"); - } - ListIterator it = el.getPattern().iterator(); - while (it.hasNext()) { - final TriplePath tp = it.next(); - // System.out.println("The triple ==> " + tp.toString()); - Node subject = tp.getSubject(); - // SUBJECT - if (subject.isURI()) { - if (this.datasetClassSet.contains(subject.getURI())) { - this.queryClassSet.add(subject.getURI()); - } - } - // OBJECT - Node object = tp.getObject(); - if (object.isURI()) { - if (this.datasetClassSet.contains(object.getURI())) { - this.queryClassSet.add(object.getURI()); - } - } - } - } - - @Override - public void visit(ElementAssign el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementAssign el))] "); - - } - - @Override - public void visit(ElementBind el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementBind el)] "); - - } - - @Override - public void visit(ElementSubQuery el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementSubQuery el)] "); - } - - public Set getQueryClassSet() { - return queryClassSet; - } - - @Override - public void visit(ElementService el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementService el)] "); - } - - @Override - public void visit(ElementMinus el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementMinus el)] "); - } - - @Override - public void visit(ElementNotExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNotExists el)] "); - } - - @Override - public void visit(ElementExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementExists el)] "); - } - - @Override - public void visit(ElementNamedGraph el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNamedGraph el)] "); - } - - @Override - public void visit(ElementGroup el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementGroup el)] "); - } - - @Override - public void visit(ElementOptional el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementOptional el)] "); - } - - @Override - public void visit(ElementDataset el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementDataset el)] "); - } - - @Override - public void visit(ElementUnion el) { - } - - @Override - public void visit(ElementData el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementData el)] "); - } - - @Override - public void visit(ElementFilter el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementFilter el)] "); - } - - @Override - public void visit(ElementTriplesBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementTriplesBlock el)] - // "); - } - -} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyAggregator.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyAggregator.java new file mode 100644 index 0000000..7077357 --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyAggregator.java @@ -0,0 +1,35 @@ +package uk.ac.open.kmi.squire.sparqlqueryvisitor; + +import java.util.ListIterator; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.syntax.ElementPathBlock; + +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; + +/** + * + * @author carloallocca + */ +public class SQDatatypePropertyAggregator extends AbstractSQAggregator { + + public SQDatatypePropertyAggregator(IRDFDataset d1) { + if (d1 == null) throw new IllegalArgumentException("The dataset cannot be null"); + this.datasetEntitySet = d1.getDatatypePropertySet(); + } + + @Override + public void visit(ElementPathBlock el) { + if (el == null) throw new IllegalArgumentException("The ElementPathBlock is null"); + ListIterator it = el.getPattern().iterator(); + while (it.hasNext()) { + final TriplePath tp = it.next(); + Node predicate = tp.getPredicate(); + // PREDICATE + if (predicate.isURI() && this.datasetEntitySet.contains(predicate.getURI())) + this.queryEntitySet.add(predicate.getURI()); + } + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyVisitor.java deleted file mode 100644 index 8620153..0000000 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQDatatypePropertyVisitor.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package uk.ac.open.kmi.squire.sparqlqueryvisitor; - -import java.util.Collection; -import java.util.HashSet; -import java.util.ListIterator; -import java.util.Set; - -import org.apache.jena.graph.Node; -import org.apache.jena.sparql.core.TriplePath; -import org.apache.jena.sparql.syntax.ElementAssign; -import org.apache.jena.sparql.syntax.ElementBind; -import org.apache.jena.sparql.syntax.ElementData; -import org.apache.jena.sparql.syntax.ElementDataset; -import org.apache.jena.sparql.syntax.ElementExists; -import org.apache.jena.sparql.syntax.ElementFilter; -import org.apache.jena.sparql.syntax.ElementGroup; -import org.apache.jena.sparql.syntax.ElementMinus; -import org.apache.jena.sparql.syntax.ElementNamedGraph; -import org.apache.jena.sparql.syntax.ElementNotExists; -import org.apache.jena.sparql.syntax.ElementOptional; -import org.apache.jena.sparql.syntax.ElementPathBlock; -import org.apache.jena.sparql.syntax.ElementService; -import org.apache.jena.sparql.syntax.ElementSubQuery; -import org.apache.jena.sparql.syntax.ElementTriplesBlock; -import org.apache.jena.sparql.syntax.ElementUnion; -import org.apache.jena.sparql.syntax.ElementVisitorBase; - -import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; - -/** - * - * @author carloallocca - */ -public class SQDatatypePropertyVisitor extends ElementVisitorBase { - - private IRDFDataset d; - private Collection datasetDatatypePropertySet; - - private final Set queryDatatypePropertySet = new HashSet(); - - public SQDatatypePropertyVisitor(IRDFDataset d1) { - if (d1 == null) { - throw new IllegalStateException("[SQDatatypePropertyVisitor]The IRDFDataset d1 is null!!"); - } - this.d = d1; - this.datasetDatatypePropertySet = d1.getDatatypePropertySet(); - } - - @Override - public void visit(ElementPathBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementPathBlock el)] "); - if (el == null) { - throw new IllegalStateException( - "[SQObjectPropertyVisitor::visit(ElementPathBlock el)] The ElementPathBlock is null!!"); - } - ListIterator it = el.getPattern().iterator(); - while (it.hasNext()) { - final TriplePath tp = it.next(); - // System.out.println("The triple ==> " + tp.toString()); - Node predicate = tp.getPredicate(); - - // PREDICATE - if (predicate.isURI()) { - if (this.datasetDatatypePropertySet.contains(predicate.getURI())) { - this.queryDatatypePropertySet.add(predicate.getURI()); - } - } - } - } - - public Set getQueryDatatypePropertySet() { - return queryDatatypePropertySet; - } - - @Override - public void visit(ElementAssign el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementAssign el))] "); - - } - - @Override - public void visit(ElementBind el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementBind el)] "); - - } - - @Override - public void visit(ElementSubQuery el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementSubQuery el)] "); - } - - @Override - public void visit(ElementService el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementService el)] "); - } - - @Override - public void visit(ElementMinus el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementMinus el)] "); - } - - @Override - public void visit(ElementNotExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNotExists el)] "); - } - - @Override - public void visit(ElementExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementExists el)] "); - } - - @Override - public void visit(ElementNamedGraph el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNamedGraph el)] "); - } - - @Override - public void visit(ElementGroup el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementGroup el)] "); - } - - @Override - public void visit(ElementOptional el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementOptional el)] "); - } - - @Override - public void visit(ElementDataset el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementDataset el)] "); - } - - @Override - public void visit(ElementUnion el) { - } - - @Override - public void visit(ElementData el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementData el)] "); - } - - @Override - public void visit(ElementFilter el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementFilter el)] "); - } - - @Override - public void visit(ElementTriplesBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementTriplesBlock el)] - // "); - } - -} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQGraphPatternExpressionVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQGraphPatternExpressionAggregator.java similarity index 66% rename from squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQGraphPatternExpressionVisitor.java rename to squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQGraphPatternExpressionAggregator.java index a2a648b..1dfada6 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQGraphPatternExpressionVisitor.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQGraphPatternExpressionAggregator.java @@ -5,32 +5,23 @@ */ package uk.ac.open.kmi.squire.sparqlqueryvisitor; -import java.util.HashSet; import java.util.ListIterator; -import java.util.Set; import org.apache.jena.sparql.core.TriplePath; import org.apache.jena.sparql.syntax.ElementPathBlock; -import org.apache.jena.sparql.syntax.ElementVisitorBase; /** * * @author carloallocca */ -public class SQGraphPatternExpressionVisitor extends ElementVisitorBase { - - private Set queryGPE = new HashSet<>(); - - public Set getQueryGPE() { - return queryGPE; - } +public class SQGraphPatternExpressionAggregator extends AbstractSQAggregator { @Override public void visit(ElementPathBlock el) { if (el == null) throw new IllegalArgumentException("The element path block must not be null."); ListIterator it = el.getPattern().iterator(); while (it.hasNext()) - queryGPE.add(it.next()); + queryEntitySet.add(it.next()); } } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyAggregator.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyAggregator.java new file mode 100644 index 0000000..3cd26dc --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyAggregator.java @@ -0,0 +1,35 @@ +package uk.ac.open.kmi.squire.sparqlqueryvisitor; + +import java.util.ListIterator; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.syntax.ElementPathBlock; + +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; + +/** + * + * @author carloallocca + */ +public class SQObjectPropertyAggregator extends AbstractSQAggregator { + + public SQObjectPropertyAggregator(IRDFDataset d1) { + if (d1 == null) throw new IllegalArgumentException("The dataset cannot be null"); + this.datasetEntitySet = d1.getObjectPropertySet(); + } + + @Override + public void visit(ElementPathBlock el) { + if (el == null) throw new IllegalArgumentException("The ElementPathBlock is null"); + ListIterator it = el.getPattern().iterator(); + while (it.hasNext()) { + final TriplePath tp = it.next(); + Node predicate = tp.getPredicate(); + // PREDICATE + if (predicate.isURI() && this.datasetEntitySet.contains(predicate.getURI())) + this.queryEntitySet.add(predicate.getURI()); + } + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyVisitor.java deleted file mode 100644 index c09106f..0000000 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQObjectPropertyVisitor.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package uk.ac.open.kmi.squire.sparqlqueryvisitor; - -import java.util.Collection; -import java.util.HashSet; -import java.util.ListIterator; -import java.util.Set; - -import org.apache.jena.graph.Node; -import org.apache.jena.sparql.core.TriplePath; -import org.apache.jena.sparql.syntax.ElementAssign; -import org.apache.jena.sparql.syntax.ElementBind; -import org.apache.jena.sparql.syntax.ElementData; -import org.apache.jena.sparql.syntax.ElementDataset; -import org.apache.jena.sparql.syntax.ElementExists; -import org.apache.jena.sparql.syntax.ElementFilter; -import org.apache.jena.sparql.syntax.ElementGroup; -import org.apache.jena.sparql.syntax.ElementMinus; -import org.apache.jena.sparql.syntax.ElementNamedGraph; -import org.apache.jena.sparql.syntax.ElementNotExists; -import org.apache.jena.sparql.syntax.ElementOptional; -import org.apache.jena.sparql.syntax.ElementPathBlock; -import org.apache.jena.sparql.syntax.ElementService; -import org.apache.jena.sparql.syntax.ElementSubQuery; -import org.apache.jena.sparql.syntax.ElementTriplesBlock; -import org.apache.jena.sparql.syntax.ElementUnion; -import org.apache.jena.sparql.syntax.ElementVisitorBase; - -import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; - -/** - * - * @author carloallocca - */ -public class SQObjectPropertyVisitor extends ElementVisitorBase { - - private IRDFDataset d; - private Collection datasetObjectPropertySet; - - private Set queryObjectPropertySet = new HashSet(); - - public SQObjectPropertyVisitor(IRDFDataset d1) { - if (d1 == null) { - throw new IllegalStateException("[SQObjectPropertyVisitor]The IRDFDataset d1 is null!!"); - } - this.d = d1; - this.datasetObjectPropertySet = d1.getObjectPropertySet(); - } - - @Override - public void visit(ElementPathBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementPathBlock el)] "); - if (el == null) { - throw new IllegalStateException( - "[SQObjectPropertyVisitor::visit(ElementPathBlock el)] The ElementPathBlock is null!!"); - } - ListIterator it = el.getPattern().iterator(); - while (it.hasNext()) { - final TriplePath tp = it.next(); - // System.out.println("The triple ==> " + tp.toString()); - Node predicate = tp.getPredicate(); - - // PREDICATE - if (predicate.isURI()) { - if (this.datasetObjectPropertySet.contains(predicate.getURI())) { - this.queryObjectPropertySet.add(predicate.getURI()); - } - } - } - } - - public Set getQueryObjectPropertySet() { - return queryObjectPropertySet; - } - - @Override - public void visit(ElementAssign el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementAssign el))] "); - - } - - @Override - public void visit(ElementBind el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementBind el)] "); - - } - - @Override - public void visit(ElementSubQuery el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementSubQuery el)] "); - } - - @Override - public void visit(ElementService el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementService el)] "); - } - - @Override - public void visit(ElementMinus el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementMinus el)] "); - } - - @Override - public void visit(ElementNotExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNotExists el)] "); - } - - @Override - public void visit(ElementExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementExists el)] "); - } - - @Override - public void visit(ElementNamedGraph el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNamedGraph el)] "); - } - - @Override - public void visit(ElementGroup el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementGroup el)] "); - } - - @Override - public void visit(ElementOptional el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementOptional el)] "); - } - - @Override - public void visit(ElementDataset el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementDataset el)] "); - } - - @Override - public void visit(ElementUnion el) { - } - - @Override - public void visit(ElementData el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementData el)] "); - } - - @Override - public void visit(ElementFilter el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementFilter el)] "); - } - - @Override - public void visit(ElementTriplesBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementTriplesBlock el)] - // "); - } - -} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQRemoveTripleVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQRemoveTripleVisitor.java index 723d5ed..9c6de1a 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQRemoveTripleVisitor.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQRemoveTripleVisitor.java @@ -1,8 +1,3 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ package uk.ac.open.kmi.squire.sparqlqueryvisitor; import java.util.HashSet; @@ -15,22 +10,9 @@ import org.apache.jena.sparql.core.Var; import org.apache.jena.sparql.expr.Expr; import org.apache.jena.sparql.syntax.Element; -import org.apache.jena.sparql.syntax.ElementAssign; -import org.apache.jena.sparql.syntax.ElementBind; -import org.apache.jena.sparql.syntax.ElementData; -import org.apache.jena.sparql.syntax.ElementDataset; -import org.apache.jena.sparql.syntax.ElementExists; import org.apache.jena.sparql.syntax.ElementFilter; -import org.apache.jena.sparql.syntax.ElementGroup; -import org.apache.jena.sparql.syntax.ElementMinus; -import org.apache.jena.sparql.syntax.ElementNamedGraph; -import org.apache.jena.sparql.syntax.ElementNotExists; import org.apache.jena.sparql.syntax.ElementOptional; import org.apache.jena.sparql.syntax.ElementPathBlock; -import org.apache.jena.sparql.syntax.ElementService; -import org.apache.jena.sparql.syntax.ElementSubQuery; -import org.apache.jena.sparql.syntax.ElementTriplesBlock; -import org.apache.jena.sparql.syntax.ElementUnion; import org.apache.jena.sparql.syntax.ElementVisitorBase; import org.apache.jena.sparql.syntax.ElementWalker; @@ -42,157 +24,49 @@ public class SQRemoveTripleVisitor extends ElementVisitorBase { private Triple tp; // This is the triple pattern that we need to remove from the given query. - // private subjVar - public SQRemoveTripleVisitor() { - super(); - } - public SQRemoveTripleVisitor(Triple triplePattern) { this.tp = triplePattern; // triplePattern.getObject().isVariable() } - @Override - public void visit(ElementAssign el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementAssign el))] "); - - } - - @Override - public void visit(ElementBind el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementBind el)] "); - - } - - @Override - public void visit(ElementSubQuery el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementSubQuery el)] "); - } - - @Override - public void visit(ElementService el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementService el)] "); - } - - @Override - public void visit(ElementMinus el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementMinus el)] "); - } - - @Override - public void visit(ElementNotExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNotExists el)] "); - } - - @Override - public void visit(ElementExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementExists el)] "); - } - - @Override - public void visit(ElementNamedGraph el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNamedGraph el)] "); - } - - @Override - public void visit(ElementGroup el) { - - System.out.println("[SQRemoveTripleVisitor::visit(ElementGroup el)] " + el.toString()); - System.out.println(""); - } - @Override public void visit(ElementOptional el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementOptional el)] "); - // get optional elements and walk them Element optionalQP = el.getOptionalElement(); ElementWalker.walk(optionalQP, this); - } @Override public void visit(ElementPathBlock el) { - if (el == null) { - throw new IllegalStateException( - "[SQRemoveTripleVisitor::visit(ElementPathBlock el)] The ElementPathBlock is null!!"); - } - + if (el == null) throw new IllegalArgumentException("The ElementPathBlock is null"); ListIterator it = el.getPattern().iterator(); while (it.hasNext()) { final TriplePath tp1 = it.next(); - System.out - .println("[SQRemoveTripleVisitor::11111111111111111] TriplePath tp1 " + tp1.asTriple().toString()); - - if (this.tp != null) { - if (this.tp.matches(tp1.asTriple())) { - System.out - .println("[SQRemoveTripleVisitor::3333333333333] this.tp.toString() " + this.tp.toString()); - - it.remove(); - } else { - System.out.println( - "[SQRemoveTripleVisitor::222222222222222222] this.tp.toString() " + this.tp.toString()); - - } - } + if (this.tp != null && this.tp.matches(tp1.asTriple())) it.remove(); // queryGPE.add(tp.toString()); // System.out.println(tp.toString()); } - System.out.println("[SQRemoveTripleVisitor::visit(ElementPathBlock el)] " + el.toString()); - System.out.println(""); - - } - - @Override - public void visit(ElementDataset el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementDataset el)] "); - } - - @Override - public void visit(ElementUnion el) { - System.out.println("[SQRemoveTripleVisitor::visit(ElementUnion el)] " + el.toString()); - System.out.println(""); - - } - - @Override - public void visit(ElementData el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementData el)] "); } @Override public void visit(ElementFilter el) { - System.out.println("[SQRemoveTripleVisitor::visit(ElementFilter el)] " + el.toString()); - System.out.println(""); - // el.getExpr() - // ...get the variables of the FILTER expression Expr filterExp = el.getExpr();// .getVarsMentioned().contains(el); Set expVars = filterExp.getVarsMentioned(); // ...get the variables of the triple pattern that we want to delete - Set tpVars = new HashSet(); + Set tpVars = new HashSet<>(); Node subj = this.tp.getSubject(); - if (subj.isVariable()) { - tpVars.add((Var) subj); - } + if (subj.isVariable()) tpVars.add((Var) subj); Node pred = this.tp.getPredicate(); - if (pred.isVariable()) { - tpVars.add((Var) pred); - } + if (pred.isVariable()) tpVars.add((Var) pred); Node obj = this.tp.getObject(); - if (obj.isVariable()) { - tpVars.add((Var) obj); - } + if (obj.isVariable()) tpVars.add((Var) obj); // ...check whether the FILTER expression contains any of the triple pattern // variable for (Var var : expVars) { // ..if it does then we have to delete the entire FILTER expression if (tpVars.contains(var)) { - System.out.println("[SQRemoveTripleVisitor::visit(ElementFilter el)] YESssssssssssssssss "); - // filterExp. - // UpdateRequest updates = UpdateFactory.create(); // // // set ?username to "test" @@ -202,16 +76,9 @@ public void visit(ElementFilter el) { // UpdateRequest transform = UpdateTransformOps.transform(updates, // varNodeHashMap); // System.out.println("--- TRANSFORMED ---\n"+transform.toString()); - } } } - @Override - public void visit(ElementTriplesBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementTriplesBlock el)] - // "); - } - } diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableAggregator.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableAggregator.java new file mode 100644 index 0000000..5884ea3 --- /dev/null +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableAggregator.java @@ -0,0 +1,32 @@ +package uk.ac.open.kmi.squire.sparqlqueryvisitor; + +import java.util.ListIterator; + +import org.apache.jena.graph.Node; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.syntax.ElementPathBlock; + +/** + * + * @author carloallocca + */ +public class SQVariableAggregator extends AbstractSQAggregator { + + @Override + public void visit(ElementPathBlock el) { + if (el == null) throw new IllegalArgumentException("The ElementPathBlock is null"); + ListIterator it = el.getPattern().iterator(); + while (it.hasNext()) { + final TriplePath tp = it.next(); + handle(tp.getSubject()); + handle(tp.getPredicate()); + handle(tp.getObject()); + + } + } + + private void handle(Node n) { + if (n.isVariable() && !this.queryEntitySet.contains(n.getName())) this.queryEntitySet.add(n.getName()); + } + +} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableVisitor.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableVisitor.java deleted file mode 100644 index 5e8007f..0000000 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/SQVariableVisitor.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package uk.ac.open.kmi.squire.sparqlqueryvisitor; - -import java.util.ArrayList; -import java.util.ListIterator; - -import org.apache.jena.graph.Node; -import org.apache.jena.sparql.core.TriplePath; -import org.apache.jena.sparql.syntax.ElementAssign; -import org.apache.jena.sparql.syntax.ElementBind; -import org.apache.jena.sparql.syntax.ElementData; -import org.apache.jena.sparql.syntax.ElementDataset; -import org.apache.jena.sparql.syntax.ElementExists; -import org.apache.jena.sparql.syntax.ElementFilter; -import org.apache.jena.sparql.syntax.ElementGroup; -import org.apache.jena.sparql.syntax.ElementMinus; -import org.apache.jena.sparql.syntax.ElementNamedGraph; -import org.apache.jena.sparql.syntax.ElementNotExists; -import org.apache.jena.sparql.syntax.ElementOptional; -import org.apache.jena.sparql.syntax.ElementPathBlock; -import org.apache.jena.sparql.syntax.ElementService; -import org.apache.jena.sparql.syntax.ElementSubQuery; -import org.apache.jena.sparql.syntax.ElementTriplesBlock; -import org.apache.jena.sparql.syntax.ElementUnion; -import org.apache.jena.sparql.syntax.ElementVisitorBase; - -/** - * - * @author carloallocca - */ -public class SQVariableVisitor extends ElementVisitorBase { - - private final ArrayList queryVariableSet = new ArrayList(); - - public SQVariableVisitor() { - super(); - } - - public ArrayList getQueryVariableSet() { - return queryVariableSet; - } - - @Override - public void visit(ElementPathBlock el) { - if (el == null) { - throw new IllegalStateException( - "[SQVariableVisitor::visit(ElementPathBlock el)] The ElementPathBlock is null!!"); - } - ListIterator it = el.getPattern().iterator(); - while (it.hasNext()) { - final TriplePath tp = it.next(); - // System.out.println("The triple ==> " + tp.toString()); - Node subject = tp.getSubject(); - // SUBJECT - if (subject.isVariable()) { - if (!this.queryVariableSet.contains(subject.getName())) { - this.queryVariableSet.add(subject.getName()); - } - - } - // PREDICATE - Node predicate = tp.getPredicate(); - if (predicate.isVariable()) { - if (!this.queryVariableSet.contains(predicate.getName())) { - this.queryVariableSet.add(predicate.getName()); - } - } - // OBJECT - Node object = tp.getObject(); - if (object.isVariable()) { - if (!this.queryVariableSet.contains(object.getName())) { - this.queryVariableSet.add(object.getName()); - } - } - } - } - - @Override - public void visit(ElementAssign el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementAssign el))] "); - - } - - @Override - public void visit(ElementBind el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementBind el)] "); - - } - - @Override - public void visit(ElementSubQuery el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementSubQuery el)] "); - } - - @Override - public void visit(ElementService el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementService el)] "); - } - - @Override - public void visit(ElementMinus el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementMinus el)] "); - } - - @Override - public void visit(ElementNotExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNotExists el)] "); - } - - @Override - public void visit(ElementExists el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementExists el)] "); - } - - @Override - public void visit(ElementNamedGraph el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementNamedGraph el)] "); - } - - @Override - public void visit(ElementGroup el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementGroup el)] "); - } - - @Override - public void visit(ElementOptional el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementOptional el)] "); - } - - @Override - public void visit(ElementDataset el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementDataset el)] "); - } - - @Override - public void visit(ElementUnion el) { - } - - @Override - public void visit(ElementData el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementData el)] "); - } - - @Override - public void visit(ElementFilter el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementFilter el)] "); - } - - @Override - public void visit(ElementTriplesBlock el) { - // System.out.println("[SQInstantiationVisitor::visit(ElementTriplesBlock el)] - // "); - } - -} diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/TemplateVariableScanner.java b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/TemplateVariableScanner.java index a01c7c7..64bc35b 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/TemplateVariableScanner.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/sparqlqueryvisitor/TemplateVariableScanner.java @@ -5,9 +5,9 @@ */ package uk.ac.open.kmi.squire.sparqlqueryvisitor; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_CLASS; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_PROP_DT; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_PROP_OBJ; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_CLASS; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_PROP_DT; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_PROP_OBJ; import java.util.HashSet; import java.util.ListIterator; diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QTTree.java b/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QTTree.java index 53fc42c..c63214e 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QTTree.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QTTree.java @@ -5,11 +5,11 @@ */ package uk.ac.open.kmi.squire.treequerypatterns; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_CLASS; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_INDIVIDUAL; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_LITERAL; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_PROP_DT; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_PROP_OBJ; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_CLASS; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_INDIVIDUAL; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_LITERAL; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_PROP_DT; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_PROP_OBJ; import java.util.ArrayList; import java.util.Collections; diff --git a/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QueryRecommendation.java b/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QueryRecommendation.java index 82ed55f..562f765 100644 --- a/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QueryRecommendation.java +++ b/squire/src/main/java/uk/ac/open/kmi/squire/treequerypatterns/QueryRecommendation.java @@ -5,11 +5,11 @@ */ package uk.ac.open.kmi.squire.treequerypatterns; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_CLASS; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_INDIVIDUAL; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_LITERAL; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_PROP_DT; -import static uk.ac.open.kmi.squire.core4.QueryOperator.TEMPLATE_VAR_PROP_OBJ; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_CLASS; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_INDIVIDUAL; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_LITERAL; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_PROP_DT; +import static uk.ac.open.kmi.squire.core4.QueryTransform.TEMPLATE_VAR_PROP_OBJ; import java.util.ArrayList; import java.util.Collection; diff --git a/squire/src/test/java/uk/ac/open/kmi/squire/core4/DummyDatasets.java b/squire/src/test/java/uk/ac/open/kmi/squire/core4/DummyDatasets.java new file mode 100644 index 0000000..8c902a2 --- /dev/null +++ b/squire/src/test/java/uk/ac/open/kmi/squire/core4/DummyDatasets.java @@ -0,0 +1,58 @@ +package uk.ac.open.kmi.squire.core4; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.apache.jena.atlas.json.JSON; +import org.apache.jena.atlas.json.JsonObject; +import org.apache.jena.atlas.json.JsonValue; + +import uk.ac.open.kmi.squire.evaluation.TestGoldStandard; +import uk.ac.open.kmi.squire.rdfdataset.ClassSignature; +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; +import uk.ac.open.kmi.squire.rdfdataset.InMemoryRdfDataset; +import uk.ac.open.kmi.squire.rdfdataset.WritableRdfDataset; + +/** + * Utility methods to initialize the fake {@link IRDFDataset}s used in the unit + * tests. + * + * @author alessandro + * + */ +public class DummyDatasets { + + public static Map populate(String path) { + final Map datasets = new HashMap<>(); + JsonObject testdata = JSON.parse(TestGoldStandard.class.getResourceAsStream(path)); + if (!testdata.isObject()) + throw new IllegalArgumentException("Could not parse a JSON object from resource at " + path); + for (String endpoint : testdata.keys()) { + if (!datasets.containsKey(endpoint)) datasets.put(endpoint, new InMemoryRdfDataset()); + WritableRdfDataset ds = datasets.get(endpoint); + JsonObject jds = testdata.get(endpoint).getAsObject(); + for (String clazz : jds.keys()) { + if (!ds.getClassSignatures().containsKey(clazz)) + ds.getClassSignatures().put(clazz, new ClassSignature(clazz)); + ClassSignature sign = ds.getClassSignatures().get(clazz); + JsonObject jClazz = jds.get(clazz).getAsObject(); + if (jClazz.hasKey("dps")) + for (Iterator it = jClazz.get("dps").getAsArray().iterator(); it.hasNext();) { + String p = it.next().getAsString().value(); + sign.addProperty(p); + ds.addDatatypeProperty(p); + } + if (jClazz.hasKey("ops")) + for (Iterator it = jClazz.get("ops").getAsArray().iterator(); it.hasNext();) { + String p = it.next().getAsString().value(); + sign.addProperty(p); + ds.addObjectProperty(p); + } + } + ds.rebuildPropertyCoOccurrenceMap(); + } + return datasets; + } + +} diff --git a/squire/src/test/java/uk/ac/open/kmi/squire/core4/TestClassSignatureGeneralizer.java b/squire/src/test/java/uk/ac/open/kmi/squire/core4/TestClassSignatureGeneralizer.java new file mode 100644 index 0000000..c434f26 --- /dev/null +++ b/squire/src/test/java/uk/ac/open/kmi/squire/core4/TestClassSignatureGeneralizer.java @@ -0,0 +1,345 @@ +package uk.ac.open.kmi.squire.core4; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.NodeFactory; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.sparql.core.TriplePath; +import org.apache.jena.sparql.core.Var; +import org.apache.jena.sparql.syntax.Element; +import org.apache.jena.sparql.syntax.ElementGroup; +import org.apache.jena.sparql.syntax.ElementPathBlock; +import org.apache.jena.sparql.syntax.ElementVisitorBase; +import org.apache.jena.sparql.vocabulary.FOAF; +import org.apache.jena.vocabulary.DC; +import org.apache.jena.vocabulary.DCTerms; +import org.apache.jena.vocabulary.RDF; +import org.apache.jena.vocabulary.RDFS; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import uk.ac.open.kmi.squire.rdfdataset.IRDFDataset; +import uk.ac.open.kmi.squire.rdfdataset.WritableRdfDataset; + +public class TestClassSignatureGeneralizer { + + /** + * Node or node type. Utility class to unify the check by type or by value. + * + * @author alessandro + * + */ + private class NorT { + + private Object me; + + public NorT(Class type) { + this.me = type; + } + + public NorT(Node value) { + this.me = value; + } + + @SuppressWarnings("unchecked") + public Class asClass() { + if (!isClass()) + throw new UnsupportedOperationException("This is not a Class but a " + me.getClass().getName()); + return (Class) me; + } + + public Node asNode() { + if (!isNode()) + throw new UnsupportedOperationException("This is not a Node but a " + me.getClass().getName()); + return (Node) me; + } + + public boolean isClass() { + return me instanceof Class; + } + + public boolean isNode() { + return me instanceof Node; + } + + } + + private static Map datasets; + + private static final String prefix = "http://example.org/dataset/"; + + /** + * Constructs the testbed and checks that it has what the tests need. + */ + @BeforeClass + public static void setUp() throws Exception { + String patent = "http://purl.org/ontology/bibo/Patent"; + datasets = DummyDatasets.populate("/signatures2.json"); + assertEquals(2, datasets.size()); + assertTrue(datasets.containsKey(prefix + "1")); + IRDFDataset ds = datasets.get(prefix + "1"); + assertTrue(ds.getClassSet().contains(FOAF.Document.getURI())); + assertEquals(2, ds.getClassSignatures().get(FOAF.Document.getURI()).listPathOrigins().size()); + assertTrue(ds.getClassSet().contains(patent)); + assertEquals(5, ds.getClassSignatures().get(patent).listPathOrigins().size()); + assertTrue(datasets.containsKey(prefix + "2")); + ds = datasets.get(prefix + "2"); + assertTrue(ds.getClassSet().contains(FOAF.Document.getURI())); + assertEquals(6, ds.getClassSignatures().get(FOAF.Document.getURI()).listPathOrigins().size()); + assertTrue(ds.getClassSet().contains(patent)); + assertEquals(9, ds.getClassSignatures().get(patent).listPathOrigins().size()); + } + + private Generalizer _op; + + private final NorT bibo_Patent = new NorT(NodeFactory.createURI("http://purl.org/ontology/bibo/Patent")); + + private final NorT ol_Article = new NorT( + NodeFactory.createURI("http://data.open.ac.uk/openlearn/ontology/OpenLearnArticle")); + + private final NorT rdf_type = new NorT(NodeFactory.createURI(RDF.type.getURI())); + + /** + * Denotes "a variable" when we don't care which. + */ + private final NorT type_var = new NorT(Var.class); + + private final NorT var_date = new NorT(NodeFactory.createVariable("date")); + + private final NorT var_s = new NorT(NodeFactory.createVariable("s")); + + private final NorT var_title = new NorT(NodeFactory.createVariable("title")); + + @Before + public void before() throws Exception { + _op = new ClassSignatureGeneralizer(datasets.get(prefix + "1"), datasets.get(prefix + "2")); + } + + /** + * Class and one of two properties exists in both datasets, regardless of its + * usage. + * + * The TP with the property in common stays, while the other is replaced + */ + @Test + public void classAndOnePropertyInCommon() throws Exception { + String q = "PREFIX bibo: " + " SELECT DISTINCT ?author ?title WHERE {" + + "?s a bibo:Patent ; ?author" + + " ; ?title" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertTrue(find(q1, var_s, rdf_type, bibo_Patent)); + assertTrue(find(q1, var_s, new NorT(DCTerms.title.asNode()), var_title)); + assertFalse(find(q1, var_s, new NorT(DC.contributor.asNode()), type_var)); + assertTrue(find(q1, var_s, type_var, null)); + } + + /** + * Class and two properties exist in both datasets, regardless of their usage. + * + * All the TPs stay as they are + */ + @Test + public void classAndTwoPropertiesInCommon() throws Exception { + String q = "PREFIX bibo: " + " SELECT DISTINCT ?author ?title WHERE {" + + "?s a bibo:Patent ; bibo:authorList ?author ; ?title" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertTrue(find(q1, var_s, rdf_type, bibo_Patent)); + assertTrue(find(q1, var_s, new NorT(NodeFactory.createURI("http://purl.org/ontology/bibo/authorList")), + new NorT(NodeFactory.createVariable("author")))); + assertTrue(find(q1, var_s, new NorT(DCTerms.title.asNode()), var_title)); + assertFalse(find(q1, var_s, type_var, null)); + } + + /** + * Class and one of two properties exists in both datasets, regardless of its + * usage. + * + * The TP with the property in common stays, while the other is replaced + */ + @Test + public void classNoAndOnePropertyInCommon() throws Exception { + // dc:title is present, dc:subject is not + String q = "PREFIX dc: " + " SELECT DISTINCT ?author ?subject WHERE {" + "?s a <" + + ol_Article.asNode() + "> ; dc:title ?title" + " ; dc:subject ?subject" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertFalse(find(q1, var_s, rdf_type, ol_Article)); + assertTrue(find(q1, var_s, rdf_type, type_var)); + assertTrue(find(q1, var_s, new NorT(DCTerms.title.asNode()), var_title)); + assertFalse(find(q1, var_s, new NorT(DCTerms.subject.asNode()), type_var)); + assertTrue(find(q1, var_s, type_var, null)); + } + + /** + * One RDF type not present in the target dataset. + * + * It must be replaced by a generalized TP. + */ + // @Test + public void oneTypeNotPresent() throws Exception { + String q = "PREFIX foaf: " + "SELECT DISTINCT ?author WHERE {" + "?s a <" + + ol_Article.asNode() + "> ; foaf:maker ?author" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertFalse(find(q1, var_s, rdf_type, ol_Article)); + assertTrue(find(q1, var_s, rdf_type, type_var)); + } + + /** + * One RDF type present in the target dataset as well. + * + * It must stay as it is. + */ + @Test + public void oneTypePresent() throws Exception { + String q = "PREFIX foaf: " + "SELECT DISTINCT ?author WHERE {" + + "?s a foaf:Document . ?s foaf:maker ?author" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertTrue(find(q1, var_s, rdf_type, new NorT(NodeFactory.createURI(FOAF.Document.getURI())))); + assertFalse(find(q1, var_s, rdf_type, type_var)); + } + + /** + * One type not appearing in the target dataset; two properties, both used in + * the target dataset but never for the same type. + * + * The type assertion is generalized, and two queries are produced, one for each + * property. + */ + @Test + public void twoPropertiesNeverTogether() throws Exception { + String q = "SELECT DISTINCT ?title ?pic WHERE {" + " ?s a <" + ol_Article.asNode() + ">" + + " ; ?title" + " ; ?pic" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(2, qG.size()); + NorT varc = new NorT(Var.class); + for (Iterator it = qG.iterator(); it.hasNext();) { + Query q1 = it.next(); + assertFalse(find(q1, var_s, rdf_type, ol_Article)); + assertTrue(find(q1, var_s, rdf_type, varc)); + } + } + + /** + * One type appearing in the target dataset; two properties, both used in the + * target dataset but only one for that type. + * + * The generalization must preserve the type and common property, and generalize + * the other. + */ + @Test + public void twoPropertiesNotForSameTypePresent() throws Exception { + String q = "SELECT DISTINCT ?title ?date WHERE {" + " ?s a " + + " ; ?title" + " ; ?date" + + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertTrue(find(q1, var_s, rdf_type, bibo_Patent)); + assertTrue(find(q1, var_s, type_var, var_title)); + assertFalse(find(q1, var_s, new NorT(NodeFactory.createURI(RDFS.label.getURI())), var_title)); + assertTrue(find(q1, var_s, new NorT(DCTerms.date.asNode()), var_date)); + assertFalse(find(q1, var_s, type_var, var_date)); + } + + /** + * Two RDF types for a subject, both are present in the target dataset as well. + * + * Both RDF type assertions must remain as they are. + */ + @Test + public void twoTypesBothPresent() throws Exception { + String q = "PREFIX bibo: " + " PREFIX foaf: " + + " SELECT DISTINCT ?author WHERE {" + " ?s a bibo:Patent , foaf:Document ; foaf:maker ?author" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertTrue(find(q1, var_s, rdf_type, bibo_Patent)); + assertTrue(find(q1, var_s, rdf_type, new NorT(NodeFactory.createURI(FOAF.Document.getURI())))); + assertFalse(find(q1, var_s, rdf_type, type_var)); + } + + /** + * Two RDF types for a subject, but neither is present in the target dataset. + * + * There must be a single generalized rdf:type triple pattern. + */ + @Test + public void twoTypesNeitherPresent() throws Exception { + String q = "PREFIX ol: " + + " PREFIX foaf: " + " SELECT DISTINCT ?author WHERE {" + + " ?s a ol:OpenLearnArticle , ol:Podcast ; foaf:maker ?author" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertFalse(find(q1, var_s, rdf_type, + new NorT(NodeFactory.createURI("http://data.open.ac.uk/openlearn/ontology/Podcast")))); + assertFalse(find(q1, var_s, rdf_type, ol_Article)); + assertTrue(find(q1, var_s, rdf_type, type_var)); + } + + /** + * Two RDF types for a subject, but only one is present in the target dataset as + * well. + * + * There must be one type assertion on the common class only + one generalized + * triple pattern. + */ + @Test + public void twoTypesOnePresent() throws Exception { + String q = "SELECT DISTINCT ?title WHERE {" + "?s a <" + ol_Article.asNode() + + ">, " + " ; ?title" + " }"; + Set qG = _op.generalize(QueryFactory.create(q)); + assertEquals(1, qG.size()); + Query q1 = qG.iterator().next(); + assertTrue(find(q1, var_s, rdf_type, new NorT(NodeFactory.createVariable("ct1")))); + assertTrue(find(q1, var_s, rdf_type, new NorT(NodeFactory.createURI(FOAF.Document.getURI())))); + assertFalse(find(q1, var_s, rdf_type, ol_Article)); + } + + private boolean find(Query q, NorT s, NorT p, NorT o) { + final boolean[] found = new boolean[] { false }; + q.getQueryPattern().visit(new ElementVisitorBase() { + @Override + public void visit(ElementGroup el) { + for (Element el2 : el.getElements()) + el2.visit(this); + } + + @Override + public void visit(ElementPathBlock el) { + for (Iterator it = el.patternElts(); it.hasNext();) { + TriplePath tp = it.next(); + if (check(s, tp.getSubject()) && check(p, tp.getPredicate()) && check(o, tp.getObject())) { + found[0] = true; + return; + } + } + } + + private boolean check(NorT nort, Node node) { + return nort == null || nort.isClass() && nort.asClass().isAssignableFrom(node.getClass()) + || nort.isNode() && nort.asNode().equals(node); + } + }); + return found[0]; + } + +} diff --git a/squire/src/test/java/uk/ac/open/kmi/squire/utils/TestJenaAssumptions.java b/squire/src/test/java/uk/ac/open/kmi/squire/utils/TestJenaAssumptions.java index 72437b7..ba891e3 100644 --- a/squire/src/test/java/uk/ac/open/kmi/squire/utils/TestJenaAssumptions.java +++ b/squire/src/test/java/uk/ac/open/kmi/squire/utils/TestJenaAssumptions.java @@ -53,7 +53,7 @@ public void qSolExists() throws Exception { } /* - * Two triple paths made only of the same variables are equal. + * Two triple paths made only of the same variables are equivalent. */ @Test public void tpEquals() throws Exception { @@ -63,7 +63,7 @@ public void tpEquals() throws Exception { } /** - * Two sets of triple paths made only of the same variables are equal. + * Two sets of triple paths made only of the same variables are equivalent. * * @throws Exception */ diff --git a/squire/src/test/java/uk/ac/open/kmi/squire/utils/TreeNode.java b/squire/src/test/java/uk/ac/open/kmi/squire/utils/TreeNode.java deleted file mode 100644 index 6da5d61..0000000 --- a/squire/src/test/java/uk/ac/open/kmi/squire/utils/TreeNode.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package uk.ac.open.kmi.squire.utils; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * - * @author callocca - * - * @param =List - * but the next version is going to be the Query. - */ -public class TreeNode { - - private T data = null; - - private List> children = new ArrayList<>(); - - public TreeNode(T data) { - this(data, Collections.emptyList()); - } - - public TreeNode(T data, List> children) { - if (data != null) this.data = data; - if (children != null) this.children.addAll(children); - } - - public void addChild(TreeNode child) { - if (this.children == null) this.children = new ArrayList<>(); - this.children.add(child);// .addSibling(childNode); - } - - public List> getChildren() { - return children; - } - - public T getData() { - return data; - } - - public void setChildren(List> children) { - this.children = children; - } - - public void setData(T data) { - this.data = data; - } -} diff --git a/squire/src/test/resources/goldstandard.json b/squire/src/test/resources/goldstandard.json index f944303..1e52edb 100644 --- a/squire/src/test/resources/goldstandard.json +++ b/squire/src/test/resources/goldstandard.json @@ -64,9 +64,10 @@ }, { "description": "List the labels and denominations of all schools.", - "original": "SELECT DISTINCT ?school ?label ?notation WHERE { ?school rdf:type . ?school ?label . ?school ?notation }", + "note":"A case where 2+ properties co-exist in the target dataset, but not for the class that maps best (none of them in fact!). Ideally both properties should be lost.", + "original": "SELECT DISTINCT ?school ?name ?denomination WHERE { ?school a ; ?name ; ?denomination }", "expected": [ - "SELECT DISTINCT ?school ?denomination ?label WHERE { ?school a . ?school ?notation . ?school ?label }" + "SELECT DISTINCT ?school ?name ?denomination WHERE { ?school a ; ?name ; ?denomination }" ] }, { @@ -147,10 +148,11 @@ ] }, { - "description": "Data about catalogue patents", - "original": "PREFIX dc: PREFIX bibo: SELECT DISTINCT ?patent ?label ?date ?title ?creator ?status ?authorList WHERE { ?patent a bibo:Patent . ?patent ?label . ?patent dc:date ?date .?patent dc:title ?title .?patent dc:creator ?creator . ?patent bibo:status ?status . ?patent bibo:authorList ?authorList }", + "description": "Data about catalogued patents", + "note": "An instance of the common properties not being present together in the target dataset. Note that dce:contributor is also used by Aalto but as a datatype/annotation property. Also note that ideally the recommended query should lose the '?status' binding amd create a rather complex chain on '?authorList'", + "original": "PREFIX dc: PREFIX bibo: SELECT DISTINCT ?title ?date ?author ?status WHERE { ?patent a bibo:Patent ; ?title ; dc:date ?date ; dc:creator ?author . ?patent bibo:status ?status }", "expected": [ - "PREFIX dc: PREFIX bibo: SELECT DISTINCT ?patent ?label ?date ?title ?creator ?status ?authorList WHERE { ?patent a bibo:Patent . ?patent dc:date ?date . ?patent dc:title ?title . ?patent bibo:authorList ?authorList }" + "PREFIX dc: PREFIX bibo: SELECT DISTINCT ?title ?date ?author WHERE { ?patent a bibo:Patent ; dc:title ?title ; dc:date ?date ; bibo:authorList/(rdf:first|(rdf:rest/rdf:first)+) ?creator }" ] }, { diff --git a/squire/src/test/resources/signatures.json b/squire/src/test/resources/signatures.json new file mode 100644 index 0000000..833ff87 --- /dev/null +++ b/squire/src/test/resources/signatures.json @@ -0,0 +1,32 @@ +{ + "http://data.aalto.fi/sparql" : { + "http://purl.org/ontology/bibo/Patent" : { + "dps": [ + "http://purl.org/dc/elements/1.1/contributor", + "http://purl.org/dc/terms/date", + "http://purl.org/dc/terms/title" + ], + "ops": [ + "http://purl.org/ontology/bibo/authorList" + ] + } + }, + "http://data.open.ac.uk/query" : { + "http://purl.org/ontology/bibo/Patent" : { + "dps": [ + "http://purl.org/dc/terms/date", + "http://purl.org/dc/terms/title", + "http://purl.org/ontology/bibo/abstract", + "http://purl.org/ontology/bibo/uri", + "http://www.w3.org/2000/01/rdf-schema#label" + ], + "ops": [ + "http://purl.org/dc/terms/creator", + "http://purl.org/dc/terms/isPartOf", + "http://purl.org/ontology/bibo/authorList", + "http://purl.org/ontology/bibo/status", + "http://rdfs.org/ns/void#inDataset" + ] + } + } +} \ No newline at end of file diff --git a/squire/src/test/resources/signatures2.json b/squire/src/test/resources/signatures2.json new file mode 100644 index 0000000..0b7f5a1 --- /dev/null +++ b/squire/src/test/resources/signatures2.json @@ -0,0 +1,68 @@ +{ + "http://example.org/dataset/1": { + "http://purl.org/ontology/bibo/Patent": { + "dps": [ + "http://purl.org/dc/elements/1.1/contributor", + "http://purl.org/dc/terms/date", + "http://purl.org/dc/terms/title", + "http://www.w3.org/2000/01/rdf-schema#label" + ], + "ops": [ + "http://purl.org/ontology/bibo/authorList" + ] + }, + "http://xmlns.com/foaf/0.1/Document": { + "dps": [ + "http://purl.org/dc/elements/1.1/contributor", + "http://purl.org/dc/terms/title" + ] + }, + "http://data.open.ac.uk/openlearn/ontology/OpenLearnArticle": { + "dps": [ + "http://purl.org/dc/terms/title" + ], + "ops": [ + "http://purl.org/dc/terms/subject", + "http://xmlns.com/foaf/0.1/depiction" + ] + }, + "http://data.open.ac.uk/openlearn/ontology/Podcast": { + + } + }, + "http://example.org/dataset/2": { + "http://purl.org/ontology/bibo/Patent": { + "dps": [ + "http://purl.org/dc/terms/date", + "http://purl.org/dc/terms/title", + "http://purl.org/ontology/bibo/abstract", + "http://purl.org/ontology/bibo/uri" + ], + "ops": [ + "http://purl.org/dc/terms/creator", + "http://purl.org/dc/terms/isPartOf", + "http://purl.org/ontology/bibo/authorList", + "http://purl.org/ontology/bibo/status", + "http://rdfs.org/ns/void#inDataset" + ] + }, + "http://xmlns.com/foaf/0.1/Document": { + "dps": [ + "http://purl.org/dc/terms/date", + "http://purl.org/dc/terms/title" + ], + "ops": [ + "http://purl.org/dc/terms/creator", + "http://purl.org/dc/terms/isPartOf", + "http://purl.org/ontology/bibo/authorList", + "http://rdfs.org/ns/void#inDataset" + ] + }, + "http://xmlns.com/foaf/0.1/Person": { + "dps": [ + "http://www.w3.org/2000/01/rdf-schema#label" + ], + "ops": ["http://xmlns.com/foaf/0.1/depiction"] + } + } +} \ No newline at end of file