diff --git a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java index 4b44b11b..b4820c2b 100644 --- a/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java +++ b/src/main/java/org/aksw/iguana/cc/query/handler/QueryHandler.java @@ -25,17 +25,14 @@ import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Objects; +import java.util.*; import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * The QueryHandler is used by every worker that extends the AbstractWorker. - * It initializes the QuerySource, QuerySelector, QueryList and, if needed, PatternHandler. + * It initializes the QuerySource, QuerySelector, QueryList and, if needed, TemplateHandler. * After the initialization, it provides the next query to the worker using the generated QuerySource * and the order given by the QuerySelector. * @@ -71,9 +68,9 @@ public record Config ( Order order, Long seed, Language lang, - Pattern pattern + Template template ) { - public Config(@JsonProperty(required = true) String path, Format format, String separator, Boolean caching, Order order, Long seed, Language lang, Pattern pattern) { + public Config(@JsonProperty(required = true) String path, Format format, String separator, Boolean caching, Order order, Long seed, Language lang, Template template) { this.path = path; this.format = (format == null ? Format.ONE_PER_LINE : format); this.caching = (caching == null || caching); @@ -81,7 +78,7 @@ public Config(@JsonProperty(required = true) String path, Format format, String this.seed = (seed == null ? 0 : seed); this.lang = (lang == null ? Language.SPARQL : lang); this.separator = (separator == null ? "" : separator); - this.pattern = pattern; + this.template = template; } public Config(@JsonProperty(required = true) String path, Format format, String separator, Boolean caching, Order order, Long seed, Language lang) { @@ -137,8 +134,8 @@ public String value() { } } - public record Pattern(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save) { - public Pattern(URI endpoint, Long limit, Boolean save) { + public record Template(@JsonProperty(required = true) URI endpoint, Long limit, Boolean save) { + public Template(URI endpoint, Long limit, Boolean save) { this.endpoint = endpoint; this.limit = limit == null ? 2000 : limit; this.save = save == null || save; @@ -176,17 +173,17 @@ public QueryHandler(Config config) throws IOException { this.config = config; var querySource = createQuerySource(Path.of(config.path)); - if (config.pattern() != null) { + if (config.template() != null) { final var originalPath = querySource.getPath(); - Path instancePath = Files.isDirectory(originalPath) ? + final Path instancePath = Files.isDirectory(originalPath) ? originalPath.resolveSibling(originalPath.getFileName() + "_instances.txt") : originalPath.resolveSibling(originalPath.getFileName().toString().split("\\.")[0] + "_instances.txt"); if (Files.exists(instancePath)) { - LOGGER.info("Already existing query pattern instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); + LOGGER.info("Already existing query template instances have been found and will be reused. Delete the following file to regenerate them: {}", instancePath.toAbsolutePath()); querySource = createQuerySource(instancePath); } else { - final List instances = instantiatePatternQueries(querySource, config.pattern); - if (config.pattern.save) { + final List instances = instantiateTemplateQueries(querySource, config.template); + if (config.template.save) { Files.createFile(instancePath); try (var writer = Files.newBufferedWriter(instancePath)) { for (String instance : instances) { @@ -287,55 +284,56 @@ public Config getConfig() { /** - * Instantiates pattern queries from the given query source by querying a SPARQL endpoint. - * A query pattern is a SPARQL 1.1 Query, which can have additional variables in the regex form of - * %%var[0-9]+%% in the Basic Graph Pattern. + * Instantiates template queries from the given query source by querying a SPARQL endpoint. + * A query template is a SPARQL 1.1 Query, which can have additional variables in the regex form of + * %%var[0-9]+%% in the Basic Graph Template. *

- * Exemplary pattern:
+ * Exemplary template:
* SELECT * WHERE {?s %%var1%% ?o . ?o <http://exa.com> %%var2%%}
- * This pattern will then be converted to:
+ * This template will then be converted to:
* SELECT ?var1 ?var2 {?s ?var1 ?o . ?o <http://exa.com> ?var2}
* and will request query solutions from the given sparql endpoint (e.g DBpedia).
- * The solutions will then be instantiated into the query pattern. + * The solutions will then be instantiated into the query template. * The result may look like the following:
* SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "123"}
* SELECT * WHERE {?s <http://prop/1> ?o . ?o <http://exa.com> "12"}
* SELECT * WHERE {?s <http://prop/2> ?o . ?o <http://exa.com> "1234"}
*/ - private static List instantiatePatternQueries(QuerySource querySource, Config.Pattern config) throws IOException { - final var patternQueries = new FileCachingQueryList(querySource); - final Pattern pattern = Pattern.compile("%%var\\d+%%"); + private static List instantiateTemplateQueries(QuerySource querySource, Config.Template config) throws IOException { + final var templateQueries = new FileCachingQueryList(querySource); + final Pattern template = Pattern.compile("%%var[0-9]+%%"); final var instances = new ArrayList(); - for (int i = 0; i < patternQueries.size(); i++) { - // replace all variables in the query pattern with SPARQL variables + for (int i = 0; i < templateQueries.size(); i++) { + // replace all variables in the query template with SPARQL variables // and store the variable names - var patternQueryString = patternQueries.getQuery(i); - final Matcher matcher = pattern.matcher(patternQueryString); - final var variables = new ArrayList(); + var templateQueryString = templateQueries.getQuery(i); + final Matcher matcher = template.matcher(templateQueryString); + final var variables = new HashSet(); while (matcher.find()) { final var match = matcher.group(); final var variable = "?" + match.replaceAll("%%", ""); variables.add(variable); - patternQueryString = patternQueryString.replaceAll(match, variable); + templateQueryString = templateQueryString.replaceAll(match, variable); } // build SELECT query for finding bindings for the variables - final var patternQuery = QueryFactory.create(patternQueryString); - final var whereClause = "WHERE " + patternQuery.getQueryPattern(); + final var templateQuery = QueryFactory.create(templateQueryString); + final var whereClause = "WHERE " + templateQuery.getQueryPattern(); final var selectQueryString = new ParameterizedSparqlString(); selectQueryString.setCommandText("SELECT DISTINCT " + String.join(" ", variables)); selectQueryString.append(" " + whereClause); selectQueryString.append(" LIMIT " + config.limit()); - selectQueryString.setNsPrefixes(patternQuery.getPrefixMapping()); + selectQueryString.setNsPrefixes(templateQuery.getPrefixMapping()); - // send request to SPARQL endpoint and instantiate the pattern based on results + // send request to SPARQL endpoint and instantiate the template based on results try (QueryExecution exec = QueryExecutionFactory.createServiceRequest(config.endpoint().toString(), selectQueryString.asQuery())) { ResultSet resultSet = exec.execSelect(); if (!resultSet.hasNext()) { - LOGGER.warn("No results for query pattern: {}", patternQueryString); + LOGGER.warn("No results for query template: {}", templateQueryString); } - while (resultSet.hasNext()) { - var instance = new ParameterizedSparqlString(patternQueryString); + int count = 0; + while (resultSet.hasNext() && count++ < config.limit()) { + var instance = new ParameterizedSparqlString(templateQueryString); QuerySolution solution = resultSet.next(); for (String var : resultSet.getResultVars()) { instance.clearParam(var); diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java index 131e94b5..71e25605 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerConfigTest.java @@ -73,10 +73,10 @@ private static Stream testDeserializationData() { QueryHandler.Config.Order.RANDOM, 42L, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Pattern(URI.create("http://example.org/sparql"), 2000L, true) + new QueryHandler.Config.Template(URI.create("http://example.org/sparql"), 2000L, true) ), """ - {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","pattern": {"endpoint": "http://example.org/sparql"}} + {"path":"some.queries","format":"separator", "separator": "\\n", "caching":true,"order":"random","seed":42,"lang":"SPARQL","template": {"endpoint": "http://example.org/sparql"}} """ ) ); diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java index 3b3b33a9..aa083231 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/QueryHandlerTest.java @@ -29,7 +29,6 @@ public class QueryHandlerTest { static Path tempDir; static Path tempFileSep; static Path tempFileLine; - static Path tempPatternFile; static List queries; static List folderQueries; diff --git a/src/test/java/org/aksw/iguana/cc/query/handler/PatternQueriesTest.java b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java similarity index 61% rename from src/test/java/org/aksw/iguana/cc/query/handler/PatternQueriesTest.java rename to src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java index 327aaab5..e52710e4 100644 --- a/src/test/java/org/aksw/iguana/cc/query/handler/PatternQueriesTest.java +++ b/src/test/java/org/aksw/iguana/cc/query/handler/TemplateQueriesTest.java @@ -4,6 +4,7 @@ import com.github.tomakehurst.wiremock.core.Options; import com.github.tomakehurst.wiremock.core.WireMockConfiguration; import com.github.tomakehurst.wiremock.junit5.WireMockExtension; +import org.apache.jena.query.QueryParseException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.DisabledInNativeImage; @@ -13,11 +14,14 @@ import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.Path; import static com.github.tomakehurst.wiremock.client.WireMock.*; @DisabledInNativeImage // WireMock is not supported in native image -public class PatternQueriesTest extends QueryHandlerTest { +public class TemplateQueriesTest extends QueryHandlerTest { + + public static Path tempTemplateFile; private static final String RESPONSE_JSON = """ { @@ -46,26 +50,26 @@ public class PatternQueriesTest extends QueryHandlerTest { .options(new WireMockConfiguration() .useChunkedTransferEncoding(Options.ChunkedEncodingPolicy.NEVER) .dynamicPort() - .notifier(new ConsoleNotifier(true)) + .notifier(new ConsoleNotifier(false)) .jettyIdleTimeout(2000L) .jettyStopTimeout(2000L) .timeout(2000)) .build(); @Test - public void testPatternQueries() throws IOException { - String patternQuery = "PREFIX rdf: SELECT * WHERE {?s rdf:type %%var0%% ; %%var1%% %%var2%%. %%var2%% ?p }"; - tempPatternFile = Files.createTempFile(parentFolder, "Pattern", ".txt"); - Files.writeString(tempPatternFile, patternQuery, StandardCharsets.UTF_8); + public void testTemplateQueries() throws IOException { + String templateQuery = "PREFIX rdf: SELECT * WHERE {?s rdf:type %%var0%% ; %%var1%% %%var2%%. %%var2%% ?p }"; + tempTemplateFile = Files.createTempFile(parentFolder, "Template", ".txt"); + Files.writeString(tempTemplateFile, templateQuery, StandardCharsets.UTF_8); final var queryHandlerConfig = new QueryHandler.Config( - tempPatternFile.toString(), + tempTemplateFile.toString(), QueryHandler.Config.Format.ONE_PER_LINE, null, true, QueryHandler.Config.Order.LINEAR, null, QueryHandler.Config.Language.SPARQL, - new QueryHandler.Config.Pattern(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) ); wm.stubFor(get(anyUrl()) .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) @@ -82,4 +86,29 @@ public void testPatternQueries() throws IOException { } } + @Test + public void testMalformedTemplateQuery() throws IOException { + String template = "SELECT * WHERE {%%var0%% %%var0%% %%var0%% %%var0%%}"; + tempTemplateFile = Files.createTempFile(parentFolder, "Template", ".txt"); + Files.writeString(tempTemplateFile, template, StandardCharsets.UTF_8); + final var queryHandlerConfig = new QueryHandler.Config( + tempTemplateFile.toString(), + QueryHandler.Config.Format.ONE_PER_LINE, + null, + true, + QueryHandler.Config.Order.LINEAR, + null, + QueryHandler.Config.Language.SPARQL, + new QueryHandler.Config.Template(URI.create("http://localhost:" + wm.getPort()), 2000L, false) + ); + wm.stubFor(get(anyUrl()) + .withQueryParam("query", matching("PREFIX\\s+rdf:\\s+\\s+SELECT\\s+DISTINCT\\s+\\?var0\\s+\\?var1\\s+\\?var2\\s+WHERE\\s+\\{\\s*\\?s\\s+rdf:type\\s+\\?var0\\s*;\\s*\\?var1\\s+\\?var2\\s*\\.\\s*\\?var2\\s+\\?p\\s+\\s*}\\s+LIMIT\\s+2000\\s*")) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "application/sparql-results+json") + .withBody(RESPONSE_JSON))); + + Assertions.assertThrows(QueryParseException.class, () -> new QueryHandler(queryHandlerConfig)); + } + }