Skip to content

Commit

Permalink
GH-5182 improve performance of SHACL sh:pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
hmottestad committed Nov 7, 2024
1 parent 3e4f94f commit 32d167b
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
Expand All @@ -30,11 +31,16 @@
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PatternFilter;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PlanNode;
import org.eclipse.rdf4j.sail.shacl.wrapper.data.ConnectionsGroup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PatternConstraintComponent extends AbstractSimpleConstraintComponent {

private static final Logger logger = LoggerFactory.getLogger(PatternConstraintComponent.class);

String pattern;
String flags;
private final Pattern compiledPattern;

public PatternConstraintComponent(String pattern, String flags) {
super();
Expand All @@ -44,6 +50,52 @@ public PatternConstraintComponent(String pattern, String flags) {
if (flags == null) {
this.flags = "";
}

if (flags != null && !flags.isEmpty()) {
int flag = 0b0;

if (flags.contains("i")) {
flag = flag | Pattern.CASE_INSENSITIVE;
logger.trace("PatternFilter constructed with case insensitive flag");
}

if (flags.contains("d")) {
flag = flag | Pattern.UNIX_LINES;
logger.trace("PatternFilter constructed with UNIX lines flag");
}

if (flags.contains("m")) {
flag = flag | Pattern.MULTILINE;
logger.trace("PatternFilter constructed with multiline flag");
}

if (flags.contains("s")) {
flag = flag | Pattern.DOTALL;
logger.trace("PatternFilter constructed with dotall flag");
}

if (flags.contains("u")) {
flag = flag | Pattern.UNICODE_CASE;
logger.trace("PatternFilter constructed with unicode case flag");
}

if (flags.contains("x")) {
flag = flag | Pattern.COMMENTS;
logger.trace("PatternFilter constructed with comments flag");
}

if (flags.contains("U")) {
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
logger.trace("PatternFilter constructed with unicode character class flag");
}

this.compiledPattern = Pattern.compile(pattern, flag);
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);

} else {
this.compiledPattern = Pattern.compile(pattern, 0b0);
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
}
}

@Override
Expand Down Expand Up @@ -87,7 +139,7 @@ public ConstraintComponent deepClone() {

@Override
Function<PlanNode, FilterPlanNode> getFilterAttacher(ConnectionsGroup connectionsGroup) {
return (parent) -> new PatternFilter(parent, pattern, flags, connectionsGroup);
return (parent) -> new PatternFilter(parent, compiledPattern, connectionsGroup);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,53 +34,9 @@ public class PatternFilter extends FilterPlanNode {

private final Pattern pattern;

public PatternFilter(PlanNode parent, String pattern, String flags, ConnectionsGroup connectionsGroup) {
public PatternFilter(PlanNode parent, Pattern pattern, ConnectionsGroup connectionsGroup) {
super(parent, connectionsGroup);
if (flags != null && !flags.isEmpty()) {
int flag = 0b0;

if (flags.contains("i")) {
flag = flag | Pattern.CASE_INSENSITIVE;
logger.trace("PatternFilter constructed with case insensitive flag");
}

if (flags.contains("d")) {
flag = flag | Pattern.UNIX_LINES;
logger.trace("PatternFilter constructed with UNIX lines flag");
}

if (flags.contains("m")) {
flag = flag | Pattern.MULTILINE;
logger.trace("PatternFilter constructed with multiline flag");
}

if (flags.contains("s")) {
flag = flag | Pattern.DOTALL;
logger.trace("PatternFilter constructed with dotall flag");
}

if (flags.contains("u")) {
flag = flag | Pattern.UNICODE_CASE;
logger.trace("PatternFilter constructed with unicode case flag");
}

if (flags.contains("x")) {
flag = flag | Pattern.COMMENTS;
logger.trace("PatternFilter constructed with comments flag");
}

if (flags.contains("U")) {
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
logger.trace("PatternFilter constructed with unicode character class flag");
}

this.pattern = Pattern.compile(pattern, flag);
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);

} else {
this.pattern = Pattern.compile(pattern, 0b0);
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
}
this.pattern = pattern;
}

private static Literal str(Value argValue, ValueFactory valueFactory) {
Expand All @@ -104,8 +60,9 @@ boolean checkTuple(Reference t) {
Value literal = t.get().getValue();
literal = str(literal, SimpleValueFactory.getInstance());

if (literal == null)
if (literal == null) {
return false;
}

if (QueryEvaluationUtility.isStringLiteral(literal)) {
boolean result = pattern.matcher(((Literal) literal).getLabel()).find();
Expand Down

0 comments on commit 32d167b

Please sign in to comment.