diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadata.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadata.java index 563176a426..ac7f31b94e 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadata.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadata.java @@ -5,12 +5,21 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.fasterxml.jackson.databind.annotation.JsonNaming; +import com.onthegomap.planetiler.config.Bounds; +import com.onthegomap.planetiler.geo.GeoUtils; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; +import org.apache.parquet.filter2.predicate.FilterApi; +import org.apache.parquet.filter2.predicate.FilterPredicate; +import org.apache.parquet.filter2.predicate.Filters; import org.apache.parquet.hadoop.metadata.FileMetaData; +import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; +import org.locationtech.jts.geom.Envelope; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +40,11 @@ public record GeoParquetMetadata( private static final ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + public ColumnMetadata primaryColumnMetadata() { + return Objects.requireNonNull(columns.get(primaryColumn), + "No geoparquet metadata for primary column " + primaryColumn); + } + @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) public record ColumnMetadata( String encoding, @@ -49,6 +63,78 @@ public record ColumnMetadata( ColumnMetadata(String encoding, List geometryTypes) { this(encoding, geometryTypes, null, null, null, null, null, null); } + + public Envelope envelope() { + return (bbox == null || bbox.size() != 4) ? GeoUtils.WORLD_LAT_LON_BOUNDS : + new Envelope(bbox.get(0), bbox.get(2), bbox.get(1), bbox.get(3)); + } + + /** + * Returns a parquet filter that filters records read to only those where the covering bbox overlaps {@code bounds} + * or null if unable to infer that from the metadata. + *

+ * If covering bbox metadata is missing from geoparquet metadata, it will try to use bbox.xmin, bbox.xmax, + * bbox.ymin, and bbox.ymax if present. + */ + public FilterPredicate bboxFilter(MessageType schema, Bounds bounds) { + if (!bounds.isWorld()) { + var covering = covering(); + // if covering metadata missing, use default bbox:{xmin,xmax,ymin,ymax} + if (covering == null) { + if (hasNumericField(schema, "bbox.xmin") && + hasNumericField(schema, "bbox.xmax") && + hasNumericField(schema, "bbox.ymin") && + hasNumericField(schema, "bbox.ymax")) { + covering = new GeoParquetMetadata.Covering(new GeoParquetMetadata.CoveringBbox( + List.of("bbox.xmin"), + List.of("bbox.ymin"), + List.of("bbox.xmax"), + List.of("bbox.ymax") + )); + } else if (hasNumericField(schema, "bbox", "xmin") && + hasNumericField(schema, "bbox", "xmax") && + hasNumericField(schema, "bbox", "ymin") && + hasNumericField(schema, "bbox", "ymax")) { + covering = new GeoParquetMetadata.Covering(new GeoParquetMetadata.CoveringBbox( + List.of("bbox", "xmin"), + List.of("bbox", "ymin"), + List.of("bbox", "xmax"), + List.of("bbox", "ymax") + )); + } + } + if (covering != null) { + var latLonBounds = bounds.latLon(); + // TODO apply projection + var coveringBbox = covering.bbox(); + var coordinateType = + schema.getColumnDescription(coveringBbox.xmax().toArray(String[]::new)) + .getPrimitiveType() + .getPrimitiveTypeName(); + BiFunction, Number, FilterPredicate> gtEq = switch (coordinateType) { + case DOUBLE -> (p, v) -> FilterApi.gtEq(Filters.doubleColumn(p), v.doubleValue()); + case FLOAT -> (p, v) -> FilterApi.gtEq(Filters.floatColumn(p), v.floatValue()); + default -> throw new UnsupportedOperationException(); + }; + BiFunction, Number, FilterPredicate> ltEq = switch (coordinateType) { + case DOUBLE -> (p, v) -> FilterApi.ltEq(Filters.doubleColumn(p), v.doubleValue()); + case FLOAT -> (p, v) -> FilterApi.ltEq(Filters.floatColumn(p), v.floatValue()); + default -> throw new UnsupportedOperationException(); + }; + return FilterApi.and( + FilterApi.and( + gtEq.apply(coveringBbox.xmax(), latLonBounds.getMinX()), + ltEq.apply(coveringBbox.xmin(), latLonBounds.getMaxX()) + ), + FilterApi.and( + gtEq.apply(coveringBbox.ymax(), latLonBounds.getMinY()), + ltEq.apply(coveringBbox.ymin(), latLonBounds.getMaxY()) + ) + ); + } + } + return null; + } } public record CoveringBbox( @@ -63,6 +149,10 @@ public record Covering( ) {} + /** + * Extracts geoparquet metadata from the {@code "geo"} key value metadata field for the file, or tries to generate a + * default one if missing that uses geometry, wkb_geometry, or wkt_geometry column. + */ public static GeoParquetMetadata parse(FileMetaData metadata) throws IOException { String string = metadata.getKeyValueMetaData().get("geo"); if (string != null) { @@ -95,4 +185,16 @@ public static GeoParquetMetadata parse(FileMetaData metadata) throws IOException "No valid geometry columns found: " + metadata.getSchema().asGroupType().getFields().stream().map( Type::getName).toList()); } + + private static boolean hasNumericField(MessageType root, String... path) { + if (root.containsPath(path)) { + var type = root.getType(path); + if (!type.isPrimitive()) { + return false; + } + var typeName = type.asPrimitiveType().getPrimitiveTypeName(); + return typeName == PrimitiveType.PrimitiveTypeName.DOUBLE || typeName == PrimitiveType.PrimitiveTypeName.FLOAT; + } + return false; + } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java new file mode 100644 index 0000000000..90f65b0f78 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java @@ -0,0 +1,64 @@ +package com.onthegomap.planetiler.reader.parquet; + +import com.onthegomap.planetiler.geo.GeoUtils; +import com.onthegomap.planetiler.reader.WithTags; +import com.onthegomap.planetiler.util.FunctionThatThrows; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.locationtech.jts.geom.Geometry; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class GeometryReader { + private static final Logger LOGGER = LoggerFactory.getLogger(GeometryReader.class); + private final Map> converters = new HashMap<>(); + private final String geometryColumn; + + GeometryReader(GeoParquetMetadata geoparquet) { + this.geometryColumn = geoparquet.primaryColumn(); + for (var entry : geoparquet.columns().entrySet()) { + String column = entry.getKey(); + GeoParquetMetadata.ColumnMetadata columnInfo = entry.getValue(); + FunctionThatThrows converter = switch (columnInfo.encoding()) { + case "WKB" -> obj -> obj instanceof byte[] bytes ? GeoUtils.wkbReader().read(bytes) : null; + case "WKT" -> obj -> obj instanceof String string ? GeoUtils.wktReader().read(string) : null; + case "multipolygon", "geoarrow.multipolygon" -> + obj -> obj instanceof List list ? GeoArrow.multipolygon((List>>) list) : null; + case "polygon", "geoarrow.polygon" -> + obj -> obj instanceof List list ? GeoArrow.polygon((List>) list) : null; + case "multilinestring", "geoarrow.multilinestring" -> + obj -> obj instanceof List list ? GeoArrow.multilinestring((List>) list) : null; + case "linestring", "geoarrow.linestring" -> + obj -> obj instanceof List list ? GeoArrow.linestring((List) list) : null; + case "multipoint", "geoarrow.multipoint" -> + obj -> obj instanceof List list ? GeoArrow.multipoint((List) list) : null; + case "point", "geoarrow.point" -> GeoArrow::point; + default -> throw new IllegalArgumentException("Unhandled type: " + columnInfo.encoding()); + }; + + converters.put(column, converter); + } + } + + Geometry readPrimaryGeometry(WithTags tags) { + return readGeometry(tags, geometryColumn); + } + + Geometry readGeometry(WithTags tags, String column) { + var value = tags.getTag(column); + var converter = converters.get(column); + if (value == null) { + LOGGER.warn("Missing {} column", column); + return GeoUtils.EMPTY_GEOMETRY; + } else if (converter == null) { + throw new IllegalArgumentException("No geometry converter for " + column); + } + try { + return converter.apply(value); + } catch (Exception e) { + LOGGER.warn("Error reading geometry {}", column, e); + return GeoUtils.EMPTY_GEOMETRY; + } + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFile.java index 592287ae5a..b654c4a75d 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFile.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFile.java @@ -3,11 +3,8 @@ import blue.strategic.parquet.ParquetReader; import com.google.common.collect.Iterators; import com.onthegomap.planetiler.config.Bounds; -import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.GeometryException; import com.onthegomap.planetiler.reader.SourceFeature; -import com.onthegomap.planetiler.reader.WithTags; -import com.onthegomap.planetiler.util.FunctionThatThrows; import com.onthegomap.planetiler.util.Hashing; import java.io.Closeable; import java.io.IOException; @@ -16,13 +13,10 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.Collections; -import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; -import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.ToLongFunction; import java.util.stream.IntStream; @@ -31,16 +25,12 @@ import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.predicate.FilterApi; import org.apache.parquet.filter2.predicate.FilterPredicate; -import org.apache.parquet.filter2.predicate.Filters; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.InputFile; import org.apache.parquet.io.MessageColumnIO; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.PrimitiveType; import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.geom.Geometry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,12 +46,11 @@ public class ParquetInputFile { private final Path path; private final FilterCompat.Filter filter; private final String source; - private final String geometryColumn; - private final Map> converters = new HashMap<>(); private final ToLongFunction> idGenerator; private final String layer; private final long count; private final int blockCount; + private final GeometryReader geometryReader; private Envelope postFilterBounds = null; private boolean outOfBounds = false; private final Map extraFields; @@ -70,18 +59,6 @@ public ParquetInputFile(String source, String layer, Path path) { this(source, layer, path, null, Bounds.WORLD, null, null); } - private static boolean hasNumericField(MessageType root, String... path) { - if (root.containsPath(path)) { - var type = root.getType(path); - if (!type.isPrimitive()) { - return false; - } - var typeName = type.asPrimitiveType().getPrimitiveTypeName(); - return typeName == PrimitiveType.PrimitiveTypeName.DOUBLE || typeName == PrimitiveType.PrimitiveTypeName.FLOAT; - } - return false; - } - public ParquetInputFile(String source, String layer, Path path, FilterPredicate filter, Bounds bounds, Map extraFields, Function, Object> idGenerator) { this.idGenerator = idGenerator == null ? null : map -> hashToLong(idGenerator.apply(map)); @@ -94,103 +71,17 @@ public ParquetInputFile(String source, String layer, Path path, FilterPredicate metadata = file.getFooter(); var fileMetadata = metadata.getFileMetaData(); var geoparquet = GeoParquetMetadata.parse(fileMetadata); - geometryColumn = geoparquet.primaryColumn(); - for (var entry : geoparquet.columns().entrySet()) { - String column = entry.getKey(); - GeoParquetMetadata.ColumnMetadata columnInfo = entry.getValue(); - FunctionThatThrows converter = switch (columnInfo.encoding()) { - case "WKB" -> obj -> obj instanceof byte[] bytes ? GeoUtils.wkbReader().read(bytes) : null; - case "WKT" -> obj -> obj instanceof String string ? GeoUtils.wktReader().read(string) : null; - case "multipolygon", "geoarrow.multipolygon" -> - obj -> obj instanceof List list ? GeoArrow.multipolygon((List>>) list) : null; - case "polygon", "geoarrow.polygon" -> - obj -> obj instanceof List list ? GeoArrow.polygon((List>) list) : null; - case "multilinestring", "geoarrow.multilinestring" -> - obj -> obj instanceof List list ? GeoArrow.multilinestring((List>) list) : null; - case "linestring", "geoarrow.linestring" -> - obj -> obj instanceof List list ? GeoArrow.linestring((List) list) : null; - case "multipoint", "geoarrow.multipoint" -> - obj -> obj instanceof List list ? GeoArrow.multipoint((List) list) : null; - case "point", "geoarrow.point" -> GeoArrow::point; - default -> throw new IllegalArgumentException("Unhandled type: " + columnInfo.encoding()); - }; - - converters.put(column, converter); - - if (columnInfo.crs() != null) { - // TODO handle projjson - LOGGER.warn("Custom CRS not supported in {}", path); - } - - if (column.equals(geometryColumn)) { - if (columnInfo.bbox() != null && columnInfo.bbox().size() == 4) { - var bbox = columnInfo.bbox(); - Envelope env = new Envelope(bbox.get(0), bbox.get(2), bbox.get(1), bbox.get(3)); - // TODO apply projection - if (!bounds.latLon().intersects(env)) { - this.outOfBounds = true; - } - } - if (!this.outOfBounds && !bounds.isWorld()) { - var covering = columnInfo.covering(); - // if covering metadata missing, use default bbox:{xmin,xmax,ymin,ymax} - if (covering == null) { - var root = metadata.getFileMetaData().getSchema(); - if (hasNumericField(root, "bbox.xmin") && - hasNumericField(root, "bbox.xmax") && - hasNumericField(root, "bbox.ymin") && - hasNumericField(root, "bbox.ymax")) { - covering = new GeoParquetMetadata.Covering(new GeoParquetMetadata.CoveringBbox( - List.of("bbox.xmin"), - List.of("bbox.ymin"), - List.of("bbox.xmax"), - List.of("bbox.ymax") - )); - } else if (hasNumericField(root, "bbox", "xmin") && - hasNumericField(root, "bbox", "xmax") && - hasNumericField(root, "bbox", "ymin") && - hasNumericField(root, "bbox", "ymax")) { - covering = new GeoParquetMetadata.Covering(new GeoParquetMetadata.CoveringBbox( - List.of("bbox", "xmin"), - List.of("bbox", "ymin"), - List.of("bbox", "xmax"), - List.of("bbox", "ymax") - )); - } - } - if (covering != null) { - var latLonBounds = bounds.latLon(); - // TODO apply projection - var coveringBbox = covering.bbox(); - var coordinateType = - fileMetadata.getSchema().getColumnDescription(coveringBbox.xmax().toArray(String[]::new)) - .getPrimitiveType() - .getPrimitiveTypeName(); - BiFunction, Number, FilterPredicate> gtEq = switch (coordinateType) { - case DOUBLE -> (p, v) -> FilterApi.gtEq(Filters.doubleColumn(p), v.doubleValue()); - case FLOAT -> (p, v) -> FilterApi.gtEq(Filters.floatColumn(p), v.floatValue()); - default -> throw new UnsupportedOperationException(); - }; - BiFunction, Number, FilterPredicate> ltEq = switch (coordinateType) { - case DOUBLE -> (p, v) -> FilterApi.ltEq(Filters.doubleColumn(p), v.doubleValue()); - case FLOAT -> (p, v) -> FilterApi.ltEq(Filters.floatColumn(p), v.floatValue()); - default -> throw new UnsupportedOperationException(); - }; - var bboxFilter = FilterApi.and( - FilterApi.and( - gtEq.apply(coveringBbox.xmax(), latLonBounds.getMinX()), - ltEq.apply(coveringBbox.xmin(), latLonBounds.getMaxX()) - ), - FilterApi.and( - gtEq.apply(coveringBbox.ymax(), latLonBounds.getMinY()), - ltEq.apply(coveringBbox.ymin(), latLonBounds.getMaxY()) - ) - ); - filter = filter == null ? bboxFilter : FilterApi.and(filter, bboxFilter); - } else { - LOGGER.warn("No covering column specified in geoparquet metadata, fall back to post-filtering"); - postFilterBounds = bounds.latLon(); - } + this.geometryReader = new GeometryReader(geoparquet); + if (!bounds.isWorld()) { + if (!geoparquet.primaryColumnMetadata().envelope().intersects(bounds.latLon())) { + outOfBounds = true; + } else { + var bboxFilter = geoparquet.primaryColumnMetadata().bboxFilter(fileMetadata.getSchema(), bounds); + if (bboxFilter != null) { + filter = filter == null ? bboxFilter : FilterApi.and(filter, bboxFilter); + } else { + LOGGER.warn("No covering column specified in geoparquet metadata, fall back to post-filtering"); + postFilterBounds = bounds.latLon(); } } } @@ -299,7 +190,7 @@ public ParquetFeature next() { path, idGenerator != null ? idGenerator.applyAsLong(item) : Hashing.fnv1a64(blockHash, ByteBuffer.allocate(8).putLong(i).array()), - ParquetInputFile.this::readPrimaryGeometry, + geometryReader::readPrimaryGeometry, item ); @@ -328,27 +219,6 @@ private ParquetFileReader open() throws IOException { .build()); } - private Geometry readPrimaryGeometry(WithTags tags) { - return readGeometry(tags, geometryColumn); - } - - private Geometry readGeometry(WithTags tags, String column) { - var value = tags.getTag(column); - var converter = converters.get(column); - if (value == null) { - LOGGER.warn("Missing {} column", column); - return GeoUtils.EMPTY_GEOMETRY; - } else if (converter == null) { - throw new IllegalArgumentException("No geometry converter for " + column); - } - try { - return converter.apply(value); - } catch (Exception e) { - LOGGER.warn("Error reading geometry {}", column, e); - return GeoUtils.EMPTY_GEOMETRY; - } - } - public long getCount() { return count; } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadataTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadataTest.java new file mode 100644 index 0000000000..24089bec41 --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/GeoParquetMetadataTest.java @@ -0,0 +1,446 @@ +package com.onthegomap.planetiler.reader.parquet; + +import static com.onthegomap.planetiler.geo.GeoUtils.createMultiPoint; +import static com.onthegomap.planetiler.geo.GeoUtils.point; +import static org.apache.parquet.filter2.predicate.FilterApi.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.onthegomap.planetiler.config.Bounds; +import com.onthegomap.planetiler.reader.WithTags; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import org.apache.parquet.filter2.predicate.Filters; +import org.apache.parquet.hadoop.metadata.FileMetaData; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Types; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestFactory; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.io.WKBWriter; +import org.locationtech.jts.io.WKTWriter; + +class GeoParquetMetadataTest { + // https://github.com/opengeospatial/geoparquet/blob/main/examples/example_metadata.json + private static final String EXAMPLE_METADATA = """ + { + "columns": { + "geometry": { + "bbox": [ + -180.0, + -90.0, + 180.0, + 83.6451 + ], + "covering": { + "bbox": { + "xmax": [ + "bbox", + "xmax" + ], + "xmin": [ + "bbox", + "xmin" + ], + "ymax": [ + "bbox", + "ymax" + ], + "ymin": [ + "bbox", + "ymin" + ] + } + }, + "crs": { + "$schema": "https://proj.org/schemas/v0.6/projjson.schema.json", + "area": "World.", + "bbox": { + "east_longitude": 180, + "north_latitude": 90, + "south_latitude": -90, + "west_longitude": -180 + }, + "coordinate_system": { + "axis": [ + { + "abbreviation": "Lon", + "direction": "east", + "name": "Geodetic longitude", + "unit": "degree" + }, + { + "abbreviation": "Lat", + "direction": "north", + "name": "Geodetic latitude", + "unit": "degree" + } + ], + "subtype": "ellipsoidal" + }, + "datum_ensemble": { + "accuracy": "2.0", + "ellipsoid": { + "inverse_flattening": 298.257223563, + "name": "WGS 84", + "semi_major_axis": 6378137 + }, + "id": { + "authority": "EPSG", + "code": 6326 + }, + "members": [ + { + "id": { + "authority": "EPSG", + "code": 1166 + }, + "name": "World Geodetic System 1984 (Transit)" + }, + { + "id": { + "authority": "EPSG", + "code": 1152 + }, + "name": "World Geodetic System 1984 (G730)" + }, + { + "id": { + "authority": "EPSG", + "code": 1153 + }, + "name": "World Geodetic System 1984 (G873)" + }, + { + "id": { + "authority": "EPSG", + "code": 1154 + }, + "name": "World Geodetic System 1984 (G1150)" + }, + { + "id": { + "authority": "EPSG", + "code": 1155 + }, + "name": "World Geodetic System 1984 (G1674)" + }, + { + "id": { + "authority": "EPSG", + "code": 1156 + }, + "name": "World Geodetic System 1984 (G1762)" + }, + { + "id": { + "authority": "EPSG", + "code": 1309 + }, + "name": "World Geodetic System 1984 (G2139)" + } + ], + "name": "World Geodetic System 1984 ensemble" + }, + "id": { + "authority": "OGC", + "code": "CRS84" + }, + "name": "WGS 84 (CRS84)", + "scope": "Not known.", + "type": "GeographicCRS" + }, + "edges": "planar", + "encoding": "WKB", + "geometry_types": [ + "Polygon", + "MultiPolygon" + ] + } + }, + "primary_column": "geometry", + "version": "1.1.0-dev" + } + """; + + @Test + void testParseBasicMetadata() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("geometry") + .named("root"), + Map.of("geo", EXAMPLE_METADATA), + "")); + assertEquals("geometry", parsed.primaryColumn()); + assertEquals("1.1.0-dev", parsed.version()); + assertEquals("planar", parsed.primaryColumnMetadata().edges()); + assertEquals("WKB", parsed.primaryColumnMetadata().encoding()); + assertEquals(new Envelope(-180.0, 180.0, -90.0, 83.6451), parsed.primaryColumnMetadata().envelope()); + assertEquals(new GeoParquetMetadata.CoveringBbox( + List.of("bbox", "xmin"), + List.of("bbox", "ymin"), + List.of("bbox", "xmax"), + List.of("bbox", "ymax") + ), parsed.primaryColumnMetadata().covering().bbox()); + assertEquals(List.of("Polygon", "MultiPolygon"), parsed.primaryColumnMetadata().geometryTypes()); + assertTrue(parsed.primaryColumnMetadata().crs() instanceof Map); + } + + @Test + void testFailsWhenNoGeometry() { + var fileMetadata = new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("not_geometry") + .named("root"), + Map.of(), + ""); + assertThrows(IOException.class, () -> GeoParquetMetadata.parse(fileMetadata)); + } + + @Test + void testFailsWhenBadGeometryType() { + var fileMetadata = new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.INT32) + .named("geometry") + .named("root"), + Map.of(), + ""); + assertThrows(IOException.class, () -> GeoParquetMetadata.parse(fileMetadata)); + } + + @Test + void testInfersDefaultGeometry() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("geometry") + .named("root"), + Map.of(), + "")); + assertEquals("geometry", parsed.primaryColumn()); + assertEquals("WKB", parsed.primaryColumnMetadata().encoding()); + assertEquals(Bounds.WORLD.latLon(), parsed.primaryColumnMetadata().envelope()); + assertNull(parsed.primaryColumnMetadata().covering()); + } + + @Test + void testGeometryReaderFromMetadata() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("geometry") + .named("root"), + Map.of("geo", EXAMPLE_METADATA), + "")); + assertEquals(point(1, 2), new GeometryReader(parsed).readPrimaryGeometry(WithTags.from(Map.of( + "geometry", new WKBWriter().write(point(1, 2)) + )))); + } + + @Test + void testGeometryReaderFromMetadataDifferentName() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("other") + .named("root"), + Map.of("geo", """ + { + "primary_column": "other", + "columns": { + "other": { + "encoding": "WKB" + } + } + } + """), + "")); + assertEquals(point(1, 2), new GeometryReader(parsed).readPrimaryGeometry(WithTags.from(Map.of( + "other", new WKBWriter().write(point(1, 2)) + )))); + } + + @ParameterizedTest + @ValueSource(strings = {"wkb_geometry", "geometry"}) + void testReadWKBGeometryNoMetadata(String name) throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named(name) + .named("root"), + Map.of(), + "")); + assertEquals(point(1, 2), new GeometryReader(parsed).readPrimaryGeometry(WithTags.from(Map.of( + name, new WKBWriter().write(point(1, 2)) + )))); + } + + @Test + void testReadWKTGeometryNoMetadata() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("wkt_geometry") + .named("root"), + Map.of(), + "")); + assertEquals(point(1, 2), new GeometryReader(parsed).readPrimaryGeometry(WithTags.from(Map.of( + "wkt_geometry", new WKTWriter().write(point(1, 2)) + )))); + } + + @TestFactory + void testReadGeoArrowPoint() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage().named("root"), + Map.of("geo", """ + { + "primary_column": "geoarrow", + "columns": { + "geoarrow": { + "encoding": "point" + } + } + } + """), + "")); + assertEquals(point(1, 2), new GeometryReader(parsed).readPrimaryGeometry(WithTags.from(Map.of( + "geoarrow", Map.of("x", 1, "y", 2) + )))); + } + + @TestFactory + void testReadGeoArrowMultiPoint() throws IOException { + var parsed = GeoParquetMetadata.parse(new FileMetaData( + Types.buildMessage().named("root"), + Map.of("geo", """ + { + "primary_column": "geoarrow", + "columns": { + "geoarrow": { + "encoding": "multipolygon" + } + } + } + """), + "")); + assertEquals(createMultiPoint(List.of(point(1, 2))), + new GeometryReader(parsed).readPrimaryGeometry(WithTags.from(Map.of( + "geoarrow", List.of(Map.of("x", 1, "y", 2)) + )))); + } + + @ParameterizedTest + @CsvSource({ + "bbox, true, DOUBLE", + "bbox, true, FLOAT", + "custom_bbox, true, DOUBLE", + "custom_bbox, true, FLOAT", + "bbox, false, DOUBLE", + "bbox, false, FLOAT", + }) + void testBboxFilterFromMetadata(String bbox, boolean hasMetadata, PrimitiveType.PrimitiveTypeName type) + throws IOException { + var schema = Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("geometry") + .requiredGroup() + .required(type).named("xmin") + .required(type).named("xmax") + .required(type).named("ymin") + .required(type).named("ymax") + .named(bbox) + .named("root"); + var parsed = GeoParquetMetadata.parse(new FileMetaData( + schema, + hasMetadata ? Map.of("geo", EXAMPLE_METADATA.replaceAll("\"bbox\",", "\"" + bbox + "\",")) : Map.of(), + "")); + var expected = type == PrimitiveType.PrimitiveTypeName.FLOAT ? + and( + and(gtEq(floatColumn(bbox + ".xmax"), 1f), ltEq(floatColumn(bbox + ".xmin"), 2f)), + and(gtEq(floatColumn(bbox + ".ymax"), 3f), ltEq(floatColumn(bbox + ".ymin"), 4f)) + ) : + and( + and(gtEq(doubleColumn(bbox + ".xmax"), 1.0), ltEq(doubleColumn(bbox + ".xmin"), 2.0)), + and(gtEq(doubleColumn(bbox + ".ymax"), 3.0), ltEq(doubleColumn(bbox + ".ymin"), 4.0)) + ); + assertEquals(expected, parsed.primaryColumnMetadata().bboxFilter(schema, new Bounds(new Envelope(1, 2, 3, 4)))); + } + + @ParameterizedTest + @CsvSource({ + "bbox, true, DOUBLE", + "bbox, true, FLOAT", + "custom_bbox, true, DOUBLE", + "custom_bbox, true, FLOAT", + "bbox, false, DOUBLE", + "bbox, false, FLOAT", + }) + void testBboxFilterFromMetadataOldGdalStyle(String bbox, boolean hasMetadata, PrimitiveType.PrimitiveTypeName type) + throws IOException { + var schema = Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("geometry") + .required(type).named(bbox + ".xmin") + .required(type).named(bbox + ".xmax") + .required(type).named(bbox + ".ymin") + .required(type).named(bbox + ".ymax") + .named("root"); + var parsed = GeoParquetMetadata.parse(new FileMetaData( + schema, + hasMetadata ? Map.of("geo", """ + { + "primary_column": "geometry", + "columns": { + "geometry": { + "covering": { + "bbox": { + "xmin": ["bbox.xmin"], + "xmax": ["bbox.xmax"], + "ymin": ["bbox.ymin"], + "ymax": ["bbox.ymax"] + } + } + } + } + } + """.replace("bbox.", bbox + ".")) : Map.of(), + "")); + var expected = type == PrimitiveType.PrimitiveTypeName.FLOAT ? + and( + and(gtEq(Filters.floatColumn(List.of(bbox + ".xmax")), 1f), + ltEq(Filters.floatColumn(List.of(bbox + ".xmin")), 2f)), + and(gtEq(Filters.floatColumn(List.of(bbox + ".ymax")), 3f), + ltEq(Filters.floatColumn(List.of(bbox + ".ymin")), 4f)) + ) : + and( + and(gtEq(Filters.doubleColumn(List.of(bbox + ".xmax")), 1.0), + ltEq(Filters.doubleColumn(List.of(bbox + ".xmin")), 2.0)), + and(gtEq(Filters.doubleColumn(List.of(bbox + ".ymax")), 3.0), + ltEq(Filters.doubleColumn(List.of(bbox + ".ymin")), 4.0)) + ); + assertEquals(expected, parsed.primaryColumnMetadata().bboxFilter(schema, new Bounds(new Envelope(1, 2, 3, 4)))); + } + + @Test + void testNoBboxFilterFromDefault() throws IOException { + var schema = Types.buildMessage() + .required(PrimitiveType.PrimitiveTypeName.BINARY) + .named("geometry") + .named("root"); + var parsed = GeoParquetMetadata.parse(new FileMetaData( + schema, + Map.of(), + "")); + assertNull(parsed.primaryColumnMetadata().bboxFilter(schema, new Bounds(new Envelope(1, 2, 3, 4)))); + } +}