From 8fa48c51c6abaa1ab7a9943396fdbbbc62d4e757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Fri, 8 Nov 2024 20:46:13 +0100 Subject: [PATCH] GH-5189 cache Value objects retrieved from parent sail --- .../rdf4j/sail/shacl/ast/ValidationQuery.java | 3 +- .../ast/planNodes/AllTargetsPlanNode.java | 3 +- .../ast/planNodes/FilterByPredicate.java | 20 ++--- .../planNodes/FilterByPredicateObject.java | 58 +++++++------ .../shacl/ast/planNodes/LeftOuterJoin.java | 6 +- .../ast/planNodes/NonUniqueTargetLang.java | 3 +- .../shacl/wrapper/data/ConnectionsGroup.java | 84 ++++++++++++++++++- 7 files changed, 133 insertions(+), 44 deletions(-) diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/ValidationQuery.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/ValidationQuery.java index f3b6fdd26b6..959b9b04c4b 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/ValidationQuery.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/ValidationQuery.java @@ -232,8 +232,9 @@ private String getFullQueryString() { } else { extraVariablesString = ""; } - } else + } else { extraVariablesString = ""; + } if (scope_validationReport == ConstraintComponent.Scope.propertyShape && propertyShapeWithValue_validationReport) { diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/AllTargetsPlanNode.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/AllTargetsPlanNode.java index 7aacec352ac..4067c60f46b 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/AllTargetsPlanNode.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/AllTargetsPlanNode.java @@ -71,8 +71,9 @@ protected void init() { @Override public void localClose() { - if (iterator != null) + if (iterator != null) { iterator.close(); + } } @Override diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicate.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicate.java index 8c5b49b0218..b69d024bbb5 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicate.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicate.java @@ -21,7 +21,6 @@ import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.sail.SailConnection; import org.eclipse.rdf4j.sail.memory.MemoryStoreConnection; @@ -36,6 +35,7 @@ public class FilterByPredicate implements PlanNode { private final Set filterOnPredicates; final PlanNode parent; private final On on; + private final ConnectionsGroup connectionsGroup; private boolean printed = false; private ValidationExecutionLogger validationExecutionLogger; private final Resource[] dataGraph; @@ -53,6 +53,7 @@ public FilterByPredicate(SailConnection connection, Set filterOnPredicates, assert this.connection != null; this.filterOnPredicates = filterOnPredicates; this.on = on; + this.connectionsGroup = connectionsGroup; } @Override @@ -77,18 +78,11 @@ void calculateNext() { return; } - filterOnPredicates = FilterByPredicate.this.filterOnPredicates.stream() - .map(predicate -> { - try (var stream = connection - .getStatements(null, predicate, null, true, dataGraph) - .stream()) { - return stream.map(Statement::getPredicate) - .findAny() - .orElse(null); - } - } - ) - .filter(Objects::nonNull) + filterOnPredicates = FilterByPredicate.this.filterOnPredicates + .stream() + .map(iri -> connectionsGroup.getSailSpecificValue(iri, + ConnectionsGroup.StatementPosition.predicate, connection + )) .collect(Collectors.toList()); } diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicateObject.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicateObject.java index 9a44baba198..50dd91eb17e 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicateObject.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/FilterByPredicateObject.java @@ -20,8 +20,8 @@ import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.sail.SailConnection; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.memory.MemoryStoreConnection; @@ -42,10 +42,12 @@ public class FilterByPredicateObject implements PlanNode { private final FilterOn filterOn; private final PlanNode parent; private final boolean returnMatching; + private final ConnectionsGroup connectionsGroup; private StackTraceElement[] stackTrace; private boolean printed = false; private ValidationExecutionLogger validationExecutionLogger; private final Resource[] dataGraph; + boolean typeFilterWithInference; private final Cache cache; @@ -68,6 +70,11 @@ public FilterByPredicateObject(SailConnection connection, Resource[] dataGraph, cache = CacheBuilder.newBuilder().maximumSize(10000).build(); } + this.connectionsGroup = connectionsGroup; + if (includeInferred && connectionsGroup.getRdfsSubClassOfReasoner() != null + && RDF.TYPE.equals(filterOnPredicate)) { + typeFilterWithInference = true; + } // this.stackTrace = Thread.currentThread().getStackTrace(); } @@ -148,31 +155,32 @@ void calculateNext() { private void internResources() { if (filterOnObject == null) { - - try (var stream = connection - .getStatements(null, FilterByPredicateObject.this.filterOnPredicate, null, includeInferred, - dataGraph) - .stream()) { - filterOnPredicate = stream.map(Statement::getPredicate).findAny().orElse(null); - } - + filterOnPredicate = connectionsGroup.getSailSpecificValue( + FilterByPredicateObject.this.filterOnPredicate, + ConnectionsGroup.StatementPosition.predicate, connection + ); if (filterOnPredicate == null) { filterOnObject = new Resource[0]; } else { - filterOnObject = FilterByPredicateObject.this.filterOnObject.stream() - .map(object -> { - try (var stream = connection - .getStatements(null, filterOnPredicate, object, includeInferred, dataGraph) - .stream()) { - return stream.map(Statement::getObject) - .map(o -> ((Resource) o)) - .findAny() - .orElse(null); - } - } - ) - .filter(Objects::nonNull) - .toArray(Resource[]::new); + if (typeFilterWithInference) { + filterOnObject = FilterByPredicateObject.this.filterOnObject.stream() + .flatMap(type -> connectionsGroup.getRdfsSubClassOfReasoner() + .backwardsChain(type) + .stream()) + .distinct() + .map(object -> connectionsGroup.getSailSpecificValue(object, + ConnectionsGroup.StatementPosition.object, connection + )) + .filter(Objects::nonNull) + .toArray(Resource[]::new); + } else { + filterOnObject = FilterByPredicateObject.this.filterOnObject.stream() + .map(object -> connectionsGroup.getSailSpecificValue(object, + ConnectionsGroup.StatementPosition.object, connection + )) + .filter(Objects::nonNull) + .toArray(Resource[]::new); + } } } @@ -237,8 +245,8 @@ private Boolean matchesCached(Resource subject, IRI filterOnPredicate, Resource[ private boolean matchesUnCached(Resource subject, IRI filterOnPredicate, Resource[] filterOnObject) { for (Resource object : filterOnObject) { - if (connection.hasStatement(subject, filterOnPredicate, object, includeInferred, - dataGraph)) { + if (connection.hasStatement(subject, filterOnPredicate, object, + includeInferred && !typeFilterWithInference, dataGraph)) { return true; } } diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/LeftOuterJoin.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/LeftOuterJoin.java index bfe8c47d520..9384889e0d0 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/LeftOuterJoin.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/LeftOuterJoin.java @@ -119,11 +119,13 @@ void calculateNext() { @Override public void localClose() { try { - if (leftIterator != null) + if (leftIterator != null) { leftIterator.close(); + } } finally { - if (rightIterator != null) + if (rightIterator != null) { rightIterator.close(); + } } } diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/NonUniqueTargetLang.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/NonUniqueTargetLang.java index 6ebeb1716ce..eab7562ecac 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/NonUniqueTargetLang.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/NonUniqueTargetLang.java @@ -166,8 +166,9 @@ private void calculateNext() { @Override public void localClose() { - if (parentIterator != null) + if (parentIterator != null) { parentIterator.close(); + } } @Override diff --git a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/wrapper/data/ConnectionsGroup.java b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/wrapper/data/ConnectionsGroup.java index 38461cb1edf..a12aab4c84c 100644 --- a/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/wrapper/data/ConnectionsGroup.java +++ b/core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/wrapper/data/ConnectionsGroup.java @@ -13,11 +13,18 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import org.eclipse.rdf4j.common.annotation.InternalUseOnly; import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.util.Values; import org.eclipse.rdf4j.sail.Sail; import org.eclipse.rdf4j.sail.SailConnection; +import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.shacl.ShaclSailConnection; import org.eclipse.rdf4j.sail.shacl.Stats; import org.eclipse.rdf4j.sail.shacl.ast.planNodes.BufferedSplitter; @@ -27,6 +34,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + /** * @apiNote since 3.0. This feature is for internal use only: its existence, signature or behavior may change without * warning from one release to the next. @@ -52,6 +62,11 @@ public class ConnectionsGroup implements AutoCloseable { // used to cache Select plan nodes so that we don't query a store for the same data during the same validation step. private final Map nodeCache = new ConcurrentHashMap<>(); + private final Cache INTERNED_VALUE_CACHE = CacheBuilder.newBuilder() + .concurrencyLevel(Runtime.getRuntime().availableProcessors() * 2) + .maximumSize(10000) + .build(); + public ConnectionsGroup(SailConnection baseConnection, SailConnection previousStateConnection, Sail addedStatements, Sail removedStatements, Stats stats, RdfsSubClassOfReasonerProvider rdfsSubClassOfReasonerProvider, @@ -95,6 +110,67 @@ public SailConnection getRemovedStatements() { return removedStatements; } + public enum StatementPosition { + subject, + predicate, + object + } + + /** + * This method is a performance optimization for converting a more general value object, like RDF.TYPE, to the + * specific Value object that the underlying sail would use for that node. It uses a cache to avoid querying the + * store for the same value multiple times during the same validation. + * + * @param value the value object to be converted + * @param statementPosition the position of the statement (subject, predicate, or object) + * @param connection the SailConnection used to retrieve the specific Value object + * @param the type of the value + * @return the specific Value object used by the underlying sail, or the original value if no specific Value is + * found + * @throws SailException if an error occurs while retrieving the specific Value object + */ + public T getSailSpecificValue(T value, StatementPosition statementPosition, + SailConnection connection) { + try { + + Value t = INTERNED_VALUE_CACHE.get(value, () -> { + + switch (statementPosition) { + case subject: + try (var statements = connection.getStatements(((Resource) value), null, null, false).stream()) { + Resource ret = statements.map(Statement::getSubject).findAny().orElse(null); + if (ret == null) { + return value; + } + return ret; + } + case predicate: + try (var statements = connection.getStatements(null, ((IRI) value), null, false).stream()) { + IRI ret = statements.map(Statement::getPredicate).findAny().orElse(null); + if (ret == null) { + return value; + } + return ret; + } + case object: + try (var statements = connection.getStatements(null, null, value, false).stream()) { + Value ret = statements.map(Statement::getObject).findAny().orElse(null); + if (ret == null) { + return value; + } + return ret; + } + } + + throw new IllegalStateException("Unknown statement position: " + statementPosition); + + }); + return ((T) t); + } catch (ExecutionException e) { + throw new SailException(e); + } + } + @Override public void close() { if (addedStatements != null) { @@ -143,9 +219,15 @@ public PlanNode getCachedNodeFor(PlanNode planNode) { } + /** + * Returns the RdfsSubClassOfReasoner if it is enabled. If it is not enabled this method will return null. + * + * @return RdfsSubClassOfReasoner or null + */ public RdfsSubClassOfReasoner getRdfsSubClassOfReasoner() { - if (rdfsSubClassOfReasonerProvider == null) + if (rdfsSubClassOfReasonerProvider == null) { return null; + } return rdfsSubClassOfReasonerProvider.getRdfsSubClassOfReasoner(); }