Skip to content

Commit fcae47a

Browse files
Lorenz BuehmannAklakan
Lorenz Buehmann
authored andcommitted
apacheGH-3026: spatial index per graph and kryo serialization
1 parent 9fa2fe0 commit fcae47a

File tree

53 files changed

+3670
-203
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3670
-203
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.jena.system;
19+
20+
import java.util.Objects;
21+
22+
import org.apache.jena.query.ReadWrite;
23+
import org.apache.jena.query.TxnType;
24+
import org.apache.jena.sparql.core.Transactional;
25+
26+
/**
27+
* Txn variant for use with try-with-resources. Allows raising
28+
* checked exceptions in an idiomatic way. Closing the TxnCtl
29+
* instance will abort the transaction unless it
30+
* has been manually committed.
31+
* <p>
32+
*
33+
* Usage example:
34+
* <pre>
35+
* public void myMethod() throws IOException {
36+
* try (TxnCtl txn = TxnCtl.begin(dataset, TxnType.WRITE)) {
37+
* // Do work
38+
* if (someError) {
39+
* throw new IOException();
40+
* }
41+
* // Must manually call commit on success.
42+
* txn.commit();
43+
* }
44+
* }
45+
* </pre>
46+
*/
47+
public class TxnCtl
48+
implements AutoCloseable
49+
{
50+
private Transactional txn;
51+
private boolean b;
52+
53+
private TxnCtl(Transactional txn, boolean b) {
54+
super();
55+
this.txn = txn;
56+
this.b = b;
57+
}
58+
59+
public static TxnCtl begin(Transactional txn, ReadWrite readWrite) {
60+
return begin(txn, TxnType.convert(readWrite));
61+
}
62+
63+
public static TxnCtl begin(Transactional txn, TxnType txnType) {
64+
Objects.requireNonNull(txn);
65+
Objects.requireNonNull(txnType);
66+
boolean b = txn.isInTransaction();
67+
if ( b )
68+
TxnOp.compatibleWithPromote(txnType, txn);
69+
else
70+
txn.begin(txnType);
71+
return new TxnCtl(txn, b);
72+
}
73+
74+
public void commit() {
75+
if ( txn.isInTransaction() ) {
76+
77+
// May have been explicit commit or abort.
78+
txn.commit();
79+
}
80+
}
81+
82+
@Override
83+
public void close() {
84+
if ( !b ) {
85+
if ( txn.isInTransaction() )
86+
// May have been explicit commit or abort.
87+
txn.abort();
88+
txn.end();
89+
}
90+
}
91+
}

jena-benchmarks/jena-benchmarks-jmh/pom.xml

+7
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@
7171
<scope>test</scope>
7272
</dependency>
7373

74+
<dependency>
75+
<groupId>org.apache.jena</groupId>
76+
<artifactId>jena-geosparql</artifactId>
77+
<version>5.4.0-SNAPSHOT</version>
78+
<scope>test</scope>
79+
</dependency>
80+
7481
<dependency>
7582
<groupId>org.apache.jena</groupId>
7683
<artifactId>jena-arq</artifactId>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.jena.geosparql.spatial.index;
19+
20+
import java.io.ByteArrayOutputStream;
21+
import java.nio.charset.StandardCharsets;
22+
import java.time.LocalDateTime;
23+
import java.time.format.DateTimeFormatter;
24+
import java.util.Map;
25+
import java.util.concurrent.TimeUnit;
26+
27+
import org.apache.jena.geosparql.implementation.vocabulary.SRS_URI;
28+
import org.apache.jena.geosparql.spatial.index.v2.GeometryGenerator;
29+
import org.apache.jena.geosparql.spatial.index.v2.GeometryGenerator.GeometryType;
30+
import org.apache.jena.graph.Graph;
31+
import org.apache.jena.riot.RDFDataMgr;
32+
import org.apache.jena.riot.RDFFormat;
33+
import org.apache.jena.sparql.graph.GraphFactory;
34+
import org.locationtech.jts.geom.Envelope;
35+
import org.openjdk.jmh.annotations.Benchmark;
36+
import org.openjdk.jmh.annotations.Level;
37+
import org.openjdk.jmh.annotations.Mode;
38+
import org.openjdk.jmh.annotations.Param;
39+
import org.openjdk.jmh.annotations.Scope;
40+
import org.openjdk.jmh.annotations.Setup;
41+
import org.openjdk.jmh.annotations.State;
42+
import org.openjdk.jmh.annotations.TearDown;
43+
import org.openjdk.jmh.results.format.ResultFormatType;
44+
import org.openjdk.jmh.runner.Runner;
45+
import org.openjdk.jmh.runner.RunnerException;
46+
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
47+
import org.openjdk.jmh.runner.options.Options;
48+
import org.openjdk.jmh.runner.options.OptionsBuilder;
49+
import org.openjdk.jmh.runner.options.TimeValue;
50+
51+
/**
52+
* Benchmarking of the spatial index.
53+
* Evaluates the time it takes to load an index from disk.
54+
*/
55+
@State(Scope.Benchmark)
56+
public class BenchmarkSpatialIndex {
57+
@Param({
58+
"current",
59+
"5.1.0"
60+
})
61+
public String param0_jenaVersion;
62+
63+
@Param({
64+
"1000",
65+
"10000",
66+
"100000",
67+
})
68+
public long param1_geometryMixes;
69+
70+
@Param({
71+
// "",
72+
SRS_URI.DEFAULT_WKT_CRS84
73+
})
74+
public String param2_srs;
75+
76+
private SpatialIndexLifeCycle task;
77+
78+
@Benchmark
79+
public void task() throws Exception {
80+
task.load();
81+
}
82+
83+
@Setup(Level.Trial)
84+
public void setupTrial() throws Exception {
85+
Envelope envelope = new Envelope(-175, 175, -85, 85);
86+
Map<GeometryType, Number> config = GeometryGenerator.createConfig(param1_geometryMixes);
87+
Graph graph = GraphFactory.createDefaultGraph();
88+
GeometryGenerator.generateGraph(graph, envelope, config);
89+
90+
String data;
91+
RDFFormat fmt = RDFFormat.TURTLE_PRETTY;
92+
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
93+
RDFDataMgr.write(out, graph, fmt);
94+
out.flush();
95+
data = new String(out.toByteArray(), StandardCharsets.UTF_8);
96+
}
97+
98+
String srs = param2_srs.isEmpty() ? null : param2_srs;
99+
100+
switch (param0_jenaVersion) {
101+
case "current":
102+
task = SpatialIndexCurrent.setup(data, envelope, srs, false);
103+
break;
104+
case "5.1.0":
105+
task = SpatialIndex510.setup(data, envelope, srs, false);
106+
break;
107+
default:
108+
throw new RuntimeException("No task registered for this jena version:" + param0_jenaVersion);
109+
}
110+
111+
task.findSrs();
112+
task.build();
113+
}
114+
115+
@TearDown(Level.Trial)
116+
public void tearDownTrial() throws Exception {
117+
task.close();
118+
}
119+
120+
public static ChainedOptionsBuilder getDefaults(Class<?> c) {
121+
return new OptionsBuilder()
122+
// Specify which benchmarks to run.
123+
// You can be more specific if you'd like to run only one benchmark per test.
124+
.include(c.getName())
125+
// Set the following options as needed
126+
.mode(Mode.AverageTime)
127+
.timeUnit(TimeUnit.SECONDS)
128+
.warmupTime(TimeValue.NONE)
129+
.warmupIterations(5)
130+
.measurementIterations(5)
131+
.measurementTime(TimeValue.NONE)
132+
.threads(1)
133+
.forks(1)
134+
.shouldFailOnError(true)
135+
.shouldDoGC(true)
136+
//.jvmArgs("-XX:+UnlockDiagnosticVMOptions", "-XX:+PrintInlining")
137+
.jvmArgs("-Xmx8G")
138+
//.addProfiler(WinPerfAsmProfiler.class)
139+
.resultFormat(ResultFormatType.JSON)
140+
.result(c.getSimpleName() + "_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")) + ".json");
141+
}
142+
143+
public static void main(String[] args) throws RunnerException {
144+
Options opt = getDefaults(BenchmarkSpatialIndex.class).build();
145+
new Runner(opt).run();
146+
}
147+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package org.apache.jena.geosparql.spatial.index;
2+
3+
import java.nio.file.Files;
4+
import java.nio.file.Path;
5+
6+
import org.apache.shadedJena510.geosparql.configuration.GeoSPARQLOperations;
7+
import org.apache.shadedJena510.geosparql.spatial.SpatialIndex;
8+
import org.apache.shadedJena510.geosparql.spatial.SpatialIndexException;
9+
import org.apache.shadedJena510.query.Dataset;
10+
import org.apache.shadedJena510.riot.Lang;
11+
import org.apache.shadedJena510.riot.RDFParserBuilder;
12+
import org.junit.Assert;
13+
import org.locationtech.jts.geom.Envelope;
14+
15+
public class SpatialIndex510
16+
implements SpatialIndexLifeCycle
17+
{
18+
protected Dataset ds;
19+
protected Envelope envelope;
20+
protected String srs;
21+
protected boolean validate;
22+
23+
public SpatialIndex510(Dataset ds, Envelope envelope, String srs, boolean validate) {
24+
super();
25+
this.ds = ds;
26+
this.envelope = envelope;
27+
this.validate = validate;
28+
this.srs = srs;
29+
}
30+
31+
public static SpatialIndex510 setup(String data, Envelope envelope, String srs, boolean validate) throws SpatialIndexException {
32+
Dataset ds = RDFParserBuilder.create().fromString(data).lang(Lang.TURTLE).toDataset();
33+
return new SpatialIndex510(ds, envelope, srs, validate);
34+
}
35+
36+
protected Path indexFile;
37+
protected String finalSrs = null;
38+
protected SpatialIndex indexA;
39+
protected SpatialIndex indexB;
40+
41+
@Override
42+
public void init() {
43+
ds.getContext().remove(SpatialIndex.SPATIAL_INDEX_SYMBOL);
44+
}
45+
46+
@Override
47+
public void findSrs() {
48+
finalSrs = srs == null
49+
? GeoSPARQLOperations.findModeSRS(ds)
50+
: srs;
51+
}
52+
53+
@Override
54+
public void build() throws Exception {
55+
indexFile = Files.createTempFile("jena-", ".spatial-index");
56+
Files.deleteIfExists(indexFile); // buildSpatialIndex in v1 will attempt to load the file first
57+
58+
indexA = SpatialIndex.buildSpatialIndex(ds, finalSrs, indexFile.toFile());
59+
}
60+
61+
@Override
62+
public void load() throws Exception {
63+
indexB = SpatialIndex.load(indexFile.toFile());
64+
}
65+
66+
@Override
67+
public void close() throws Exception {
68+
Files.deleteIfExists(indexFile);
69+
70+
if (validate) {
71+
int itemCountA = indexA.query(envelope).size();
72+
int itemCountB = indexB.query(envelope).size();
73+
// Assert.assertTrue(itemCountA > 0);
74+
Assert.assertEquals(itemCountA, itemCountB);
75+
}
76+
}
77+
}

0 commit comments

Comments
 (0)