Skip to content

Commit

Permalink
Avoid deserializing entire geometry just to determine type (#898)
Browse files Browse the repository at this point in the history
  • Loading branch information
msbarry authored May 26, 2024
1 parent f8e64a4 commit 9dbd5d3
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import com.fasterxml.jackson.annotation.JsonProperty;
import com.onthegomap.planetiler.expression.Expression;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Locale;
import java.util.regex.Pattern;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.Lineal;
import org.locationtech.jts.geom.Polygonal;
Expand Down Expand Up @@ -41,6 +45,39 @@ public static GeometryType valueOf(VectorTileProto.Tile.GeomType geomType) {
};
}

/** Returns the type of a WKB-encoded geometry without needing to deserialize the whole thing. */
public static GeometryType fromWKB(byte[] wkb) {
var bb = ByteBuffer.wrap(wkb);
byte byteOrder = bb.get();
int geomType = bb.order(byteOrder == 1 ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN).getInt();
return switch (geomType) {
case 1, 4 -> GeometryType.POINT;
case 2, 5 -> GeometryType.LINE;
case 3, 6 -> GeometryType.POLYGON;
default -> GeometryType.UNKNOWN;
};
}

private static final Pattern TYPE_PATTERN =
Pattern.compile("^\\s*(multi)?(point|line|polygon)", Pattern.CASE_INSENSITIVE);

/** Returns the type of a WKT-encoded geometry without needing to deserialize the whole thing. */
public static GeometryType fromWKT(String wkt) {
var matcher = TYPE_PATTERN.matcher(wkt);
if (matcher.find()) {
String group = matcher.group(2);
if (group != null) {
return switch (group.toLowerCase(Locale.ROOT)) {
case "point" -> GeometryType.POINT;
case "line" -> GeometryType.LINE;
case "polygon" -> GeometryType.POLYGON;
default -> GeometryType.UNKNOWN;
};
}
}
return GeometryType.UNKNOWN;
}

public static GeometryType valueOf(byte val) {
return valueOf(VectorTileProto.Tile.GeomType.forNumber(val));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,59 @@

import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.geo.GeometryException;
import com.onthegomap.planetiler.geo.GeometryType;
import com.onthegomap.planetiler.reader.WithTags;
import com.onthegomap.planetiler.util.FunctionThatThrows;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import org.locationtech.jts.geom.Geometry;

/**
* Decodes geometries from a parquet record based on the {@link GeoParquetMetadata} provided.
*/
class GeometryReader {

private final Map<String, FunctionThatThrows<Object, Geometry>> converters = new HashMap<>();
private final Map<String, FormatHandler> converters = new HashMap<>();
final String geometryColumn;

private record FormatHandler(
FunctionThatThrows<Object, Geometry> parse,
Function<Object, GeometryType> sniffType
) {}

private static <L extends List<?>> FormatHandler arrowHandler(GeometryType type,
FunctionThatThrows<L, Geometry> parser) {
return new FormatHandler(obj -> obj instanceof List<?> list ? parser.apply((L) list) : null, any -> type);
}

GeometryReader(GeoParquetMetadata geoparquet) {
this.geometryColumn = geoparquet.primaryColumn();
for (var entry : geoparquet.columns().entrySet()) {
String column = entry.getKey();
GeoParquetMetadata.ColumnMetadata columnInfo = entry.getValue();
FunctionThatThrows<Object, Geometry> converter = switch (columnInfo.encoding()) {
case "WKB" -> obj -> obj instanceof byte[] bytes ? GeoUtils.wkbReader().read(bytes) : null;
case "WKT" -> obj -> obj instanceof String string ? GeoUtils.wktReader().read(string) : null;
FormatHandler converter = switch (columnInfo.encoding()) {
case "WKB" -> new FormatHandler(
obj -> obj instanceof byte[] bytes ? GeoUtils.wkbReader().read(bytes) : null,
obj -> obj instanceof byte[] bytes ? GeometryType.fromWKB(bytes) : GeometryType.UNKNOWN
);
case "WKT" -> new FormatHandler(
obj -> obj instanceof String string ? GeoUtils.wktReader().read(string) : null,
obj -> obj instanceof String string ? GeometryType.fromWKT(string) : GeometryType.UNKNOWN
);
case "multipolygon", "geoarrow.multipolygon" ->
obj -> obj instanceof List<?> list ? GeoArrow.multipolygon((List<List<List<Object>>>) list) : null;
arrowHandler(GeometryType.POLYGON, GeoArrow::multipolygon);
case "polygon", "geoarrow.polygon" ->
obj -> obj instanceof List<?> list ? GeoArrow.polygon((List<List<Object>>) list) : null;
arrowHandler(GeometryType.POLYGON, GeoArrow::polygon);
case "multilinestring", "geoarrow.multilinestring" ->
obj -> obj instanceof List<?> list ? GeoArrow.multilinestring((List<List<Object>>) list) : null;
arrowHandler(GeometryType.LINE, GeoArrow::multilinestring);
case "linestring", "geoarrow.linestring" ->
obj -> obj instanceof List<?> list ? GeoArrow.linestring((List<Object>) list) : null;
arrowHandler(GeometryType.LINE, GeoArrow::linestring);
case "multipoint", "geoarrow.multipoint" ->
obj -> obj instanceof List<?> list ? GeoArrow.multipoint((List<Object>) list) : null;
case "point", "geoarrow.point" -> GeoArrow::point;
arrowHandler(GeometryType.POINT, GeoArrow::multipoint);
case "point", "geoarrow.point" ->
arrowHandler(GeometryType.POINT, GeoArrow::point);
default -> throw new IllegalArgumentException("Unhandled type: " + columnInfo.encoding());
};
converters.put(column, converter);
Expand All @@ -58,9 +77,17 @@ Geometry parseGeometry(Object value, String column) throws GeometryException {
throw new GeometryException("no_converter", "No geometry converter for " + column);
}
try {
return converter.apply(value);
return converter.parse.apply(value);
} catch (Exception e) {
throw new GeometryException("error_reading", "Error reading " + column, e);
}
}

GeometryType sniffGeometryType(Object value, String column) {
var converter = converters.get(column);
if (value != null && converter != null) {
return converter.sniffType.apply(value);
}
return GeometryType.UNKNOWN;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.geo.GeometryException;
import com.onthegomap.planetiler.geo.GeometryType;
import com.onthegomap.planetiler.reader.SourceFeature;
import com.onthegomap.planetiler.reader.Struct;
import java.nio.file.Path;
Expand All @@ -25,6 +26,7 @@ public class ParquetFeature extends SourceFeature {
private Geometry latLon;
private Geometry world;
private Struct struct = null;
private GeometryType geometryType = null;

ParquetFeature(String source, String sourceLayer, long id, GeometryReader geometryParser,
Map<String, Object> tags, Path path, MessageType schema) {
Expand Down Expand Up @@ -56,31 +58,39 @@ public Geometry worldGeometry() throws GeometryException {
(world = GeoUtils.sortPolygonsByAreaDescending(GeoUtils.latLonToWorldCoords(latLonGeometry())));
}

private GeometryType geometryType() {
if (geometryType != null) {
return geometryType;
}
geometryType = geometryParser.sniffGeometryType(rawGeometry, geometryParser.geometryColumn);
if (geometryType == GeometryType.UNKNOWN) {
try {
geometryType = switch (latLonGeometry()) {
case Puntal ignored -> GeometryType.POINT;
case Lineal ignored -> GeometryType.LINE;
case Polygonal ignored -> GeometryType.POLYGON;
default -> GeometryType.UNKNOWN;
};
} catch (GeometryException e) {
throw new IllegalStateException(e);
}
}
return geometryType;
}

@Override
public boolean isPoint() {
try {
return latLonGeometry() instanceof Puntal;
} catch (GeometryException e) {
throw new IllegalStateException(e);
}
return geometryType() == GeometryType.POINT;
}

@Override
public boolean canBePolygon() {
try {
return latLonGeometry() instanceof Polygonal;
} catch (GeometryException e) {
throw new IllegalStateException(e);
}
return geometryType() == GeometryType.POLYGON;
}

@Override
public boolean canBeLine() {
try {
return latLonGeometry() instanceof Lineal;
} catch (GeometryException e) {
throw new IllegalStateException(e);
}
return geometryType() == GeometryType.LINE;
}

private Struct cachedStruct() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
package com.onthegomap.planetiler.geo;

import static java.util.Collections.emptyList;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.onthegomap.planetiler.TestUtils;
import com.onthegomap.planetiler.reader.SimpleFeature;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKBWriter;
import org.locationtech.jts.io.WKTReader;

class GeometryTypeTest {

Expand All @@ -22,20 +30,38 @@ void testGeometryFactory() {
SimpleFeature.createFakeOsmFeature(TestUtils.newPolygon(0, 0, 1, 0, 1, 1, 0, 0), tags, "osm", null, 1,
emptyList());

Assertions.assertTrue(GeometryType.LINE.featureTest().evaluate(line));
Assertions.assertFalse(GeometryType.LINE.featureTest().evaluate(point));
Assertions.assertFalse(GeometryType.LINE.featureTest().evaluate(poly));
assertTrue(GeometryType.LINE.featureTest().evaluate(line));
assertFalse(GeometryType.LINE.featureTest().evaluate(point));
assertFalse(GeometryType.LINE.featureTest().evaluate(poly));

Assertions.assertFalse(GeometryType.POINT.featureTest().evaluate(line));
Assertions.assertTrue(GeometryType.POINT.featureTest().evaluate(point));
Assertions.assertFalse(GeometryType.POINT.featureTest().evaluate(poly));
assertFalse(GeometryType.POINT.featureTest().evaluate(line));
assertTrue(GeometryType.POINT.featureTest().evaluate(point));
assertFalse(GeometryType.POINT.featureTest().evaluate(poly));

Assertions.assertFalse(GeometryType.POLYGON.featureTest().evaluate(line));
Assertions.assertFalse(GeometryType.POLYGON.featureTest().evaluate(point));
Assertions.assertTrue(GeometryType.POLYGON.featureTest().evaluate(poly));
assertFalse(GeometryType.POLYGON.featureTest().evaluate(line));
assertFalse(GeometryType.POLYGON.featureTest().evaluate(point));
assertTrue(GeometryType.POLYGON.featureTest().evaluate(poly));

Assertions.assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(point));
Assertions.assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(line));
Assertions.assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(poly));
assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(point));
assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(line));
assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(poly));
}

@ParameterizedTest
@CsvSource(value = {
"POINT; POINT EMPTY",
"POINT; POINT(1 1)",
"POINT; MULTIPOINT(1 1, 2 2)",
"LINE; lineString(1 1, 2 2)",
"LINE; LINESTRING ZM(1 1 2 3, 2 2 4 5)",
"LINE; multiLineString((1 1, 2 2))",
"POLYGON; POLYGON((0 0, 0 1, 1 0, 0 0))",
"POLYGON; MULTIPOLYGON(((0 0, 0 1, 1 0, 0 0)))",
"UNKNOWN; GEOMETRYCOLLECTION EMPTY",
}, delimiter = ';')
void testSniffTypes(GeometryType expected, String wkt) throws ParseException {
assertEquals(expected, GeometryType.fromWKT(wkt));
var wkb = new WKBWriter().write(new WKTReader().read(wkt));
assertEquals(expected, GeometryType.fromWKB(wkb));
}
}

0 comments on commit 9dbd5d3

Please sign in to comment.