From fb22392580160dbb3784ee6477795403183739bc Mon Sep 17 00:00:00 2001 From: Michael Barry Date: Sat, 25 May 2024 05:21:37 -0400 Subject: [PATCH] Structured tag api (#895) --- planetiler-core/pom.xml | 5 + .../planetiler/reader/JsonConversion.java | 46 ++ .../planetiler/reader/SourceFeature.java | 9 - .../onthegomap/planetiler/reader/Struct.java | 586 ++++++++++++++++++ .../planetiler/reader/StructSerializer.java | 24 + .../planetiler/reader/WithTags.java | 55 +- .../reader/parquet/ParquetFeature.java | 43 ++ .../parquet/ParquetRecordConverter.java | 3 +- .../planetiler/reader/StructTest.java | 351 +++++++++++ .../reader/parquet/ParquetConverterTest.java | 48 ++ .../reader/parquet/ParquetInputFileTest.java | 17 + 11 files changed, 1173 insertions(+), 14 deletions(-) create mode 100644 planetiler-core/src/main/java/com/onthegomap/planetiler/reader/JsonConversion.java create mode 100644 planetiler-core/src/main/java/com/onthegomap/planetiler/reader/Struct.java create mode 100644 planetiler-core/src/main/java/com/onthegomap/planetiler/reader/StructSerializer.java create mode 100644 planetiler-core/src/test/java/com/onthegomap/planetiler/reader/StructTest.java diff --git a/planetiler-core/pom.xml b/planetiler-core/pom.xml index 92c66b0805..869965309b 100644 --- a/planetiler-core/pom.xml +++ b/planetiler-core/pom.xml @@ -119,6 +119,11 @@ jackson-dataformat-csv ${jackson.version} + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + ${jackson.version} + io.prometheus simpleclient diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/JsonConversion.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/JsonConversion.java new file mode 100644 index 0000000000..41d66c5d04 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/JsonConversion.java @@ -0,0 +1,46 @@ +package com.onthegomap.planetiler.reader; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.json.JsonMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import java.io.UncheckedIOException; + +/** + * Utilities for converting between JSON strings and java objects using Jackson utilities. + *

+ * {@link ObjectMapper} are expensive to construct, but not thread safe, so this class reuses the same object mapper + * within each thread but does not share between threads. + */ +class JsonConversion { + private JsonConversion() {} + + @SuppressWarnings("java:S5164") // ignore not calling remove() on mappers since number of threads is limited + private static final ThreadLocal MAPPERS = ThreadLocal.withInitial(() -> JsonMapper.builder() + .addModule( + new JavaTimeModule().addSerializer(Struct.class, new StructSerializer()) + ) + .configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false) + .build()); + + public static String writeValueAsString(Object o) { + try { + return o == null ? null : MAPPERS.get().writeValueAsString(o); + } catch (JsonProcessingException e) { + throw new UncheckedIOException(e); + } + } + + public static T convertValue(Object o, Class clazz) { + return o == null ? null : MAPPERS.get().convertValue(o, clazz); + } + + public static T readValue(String string, Class clazz) { + try { + return string == null ? null : MAPPERS.get().readValue(string, clazz); + } catch (JsonProcessingException e) { + return null; + } + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/SourceFeature.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/SourceFeature.java index 478bedae9c..ab1dc9e46a 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/SourceFeature.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/SourceFeature.java @@ -77,15 +77,6 @@ public boolean hasTag(String key) { } - @Override - public Object getTag(String key, Object defaultValue) { - Object val = tags.get(key); - if (val == null) { - return defaultValue; - } - return val; - } - @Override public Map tags() { return tags; diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/Struct.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/Struct.java new file mode 100644 index 0000000000..cb86c7fe15 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/Struct.java @@ -0,0 +1,586 @@ +package com.onthegomap.planetiler.reader; + +import com.onthegomap.planetiler.util.Parse; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.UnaryOperator; +import java.util.stream.Collectors; + +/** + * Wrapper for a value that could either be a primitive, list, or map of nested primitives. + *

+ * The APIs are meant to be forgiving, so if you access field a.b.c but "a" is missing from the top-level struct it will + * just return {@link #NULL} instead of throwing an exception. + *

+ * Values are also coerced to other datatypes when possible, for example: + *

+ */ +public interface Struct { + /** Returns a new struct that wraps a primitive java value, or nested {@link List} or {@link Map} of values. */ + static Struct of(Object o) { + return switch (o) { + case null -> NULL; + case Struct struct -> struct; + case Number n -> new Numeric(n); + case Boolean b -> new BooleanStruct(b); + case String s -> new StringStruct(s); + case byte[] b -> new BinaryStruct(b); + case Instant i -> new InstantStruct(i); + case LocalTime i -> new LocalTimeStruct(i); + case LocalDate i -> new LocalDateStruct(i); + case UUID uuid -> new PrimitiveStruct<>(uuid); + case Map map -> { + Map result = LinkedHashMap.newLinkedHashMap(map.size()); + for (var e : map.entrySet()) { + var v = of(e.getValue()); + if (!v.isNull()) { + result.put(e.getKey(), v); + } + } + yield new MapStruct(result); + } + case Collection collection -> { + List result = new ArrayList<>(collection.size()); + for (var d : collection) { + result.add(of(d)); + } + yield new ListStruct(result); + } + default -> throw new IllegalArgumentException("Unable to convert " + o + " (" + o.getClass() + ")"); + }; + } + + /** + * Returns the nested field of a map struct, an element of an array if {@code key} is numeric, or {@link #NULL} when + * called on a primitive value. + */ + default Struct get(Object key) { + return NULL; + } + + /** Shortcut for calling {@link #get(Object)} multiple times to query a value several layers deep. */ + default Struct get(Object first, Object... others) { + Struct struct = first instanceof Number n ? get(n.intValue()) : get(first.toString()); + for (Object other : others) { + struct = other instanceof Number n ? struct.get(n.intValue()) : struct.get(other.toString()); + if (struct.isNull()) { + return Struct.NULL; + } + } + return struct; + } + + /** When this is map, returns a map from key to value struct, otherwise an empty map. */ + default Map asMap() { + return Map.of(); + } + + /** Returns this struct, or {@code fallback} when {@link #NULL} */ + default Struct orElse(Object fallback) { + return this; + } + + /** A missing or empty value. */ + Struct NULL = new Struct() { + @Override + public Object rawValue() { + return null; + } + + @Override + public List asList() { + return List.of(); + } + + @Override + public String asString() { + return null; + } + + @Override + public Struct orElse(Object fallback) { + return of(fallback); + } + + @Override + public String toString() { + return "null"; + } + + @Override + public String asJson() { + return "null"; + } + + @Override + public boolean isNull() { + return true; + } + + @Override + public boolean equals(Object obj) { + return obj == NULL; + } + + @Override + public int hashCode() { + return 0; + } + }; + + /** Returns the nth element of a list, or {@link #NULL} when not a list or {@code index} is out of bounds. */ + default Struct get(int index) { + return NULL; + } + + /** + * Returns the list of nested structs in a list, a list of this single element when this is a primitive, or empty list + * when {@link #NULL}. + */ + default List asList() { + return List.of(this); + } + + /** + * Returns the {@link Number#intValue()} for numeric values, millisecond value for time types, or attempts to parse as + * a number when this is a string. + */ + default Integer asInt() { + return null; + } + + /** + * Returns the {@link Number#longValue()} for numeric values, millisecond value for time types, or attempts to parse + * as a number when this is a string. + */ + default Long asLong() { + return null; + } + + /** + * Returns the {@link Number#doubleValue()} ()} for numeric values, millisecond value for time types, or attempts to + * parse as a double when this is a string. + */ + default Double asDouble() { + return null; + } + + /** + * Returns boolean value of this element, or true for "1", "true", "yes" and false for "0", "false", "no". + */ + default Boolean asBoolean() { + return false; + } + + /** Returns a string representation of this value (use {@link #asJson()} for json string). */ + default String asString() { + return rawValue() == null ? null : rawValue().toString(); + } + + + /** + * Returns an {@link Instant} parsed from milliseconds since epoch, or a string with an ISO-8601 encoded time string. + */ + default Instant asTimestamp() { + return null; + } + + /** Returns a byte array value or bytes from a UTF8-encoded string. */ + @SuppressWarnings("java:S1168") + default byte[] asBytes() { + return null; + } + + default boolean isNull() { + return false; + } + + /** Returns true if this is a map with nested key/value pairs, false for lists or primitives. */ + default boolean isStruct() { + return false; + } + + /** Returns the raw primitive, {@link List} or {@link Map} value, with all nested {@link Struct Structs} unwrapped. */ + Object rawValue(); + + /** + * Attempts to marshal this value into a typed java class or record using + * jackson-databind. + *

+ * For example: + * {@snippet : + * record Point(double x, double y) {} + * var point = Struct.of(Map.of("x", 1.5, "y", 2)).as(Point.class); + * System.out.println(point); // "Point[x=1.5, y=2.0]" + * } + */ + default T as(Class clazz) { + return JsonConversion.convertValue(rawValue(), clazz); + } + + /** Returns a JSON string representation of the raw value wrapped by this struct. */ + default String asJson() { + return JsonConversion.writeValueAsString(rawValue()); + } + + /** + * Returns a new list where each element of this list has been expanded to the list of elements returned by + * {@code mapper}. + *

+ * Individual items are treated as a list containing just that item. + */ + default Struct flatMap(UnaryOperator mapper) { + var list = asList().stream() + .flatMap(item -> mapper.apply(item).asList().stream()) + .map(Struct::of) + .toList(); + return list.isEmpty() ? NULL : new ListStruct(list); + } + + class PrimitiveStruct implements Struct { + + final T value; + private String asJson; + + PrimitiveStruct(T value) { + this.value = value; + } + + + @Override + public final Object rawValue() { + return value; + } + + @Override + public String asJson() { + if (this.asJson == null) { + this.asJson = Struct.super.asJson(); + } + return asJson; + } + + @Override + public String asString() { + return value.toString(); + } + + @Override + public String toString() { + return asString(); + } + + @Override + public boolean equals(Object o) { + return this == o || (o instanceof PrimitiveStruct that && value.equals(that.value)); + } + + @Override + public int hashCode() { + return value.hashCode(); + } + } + + class Numeric extends PrimitiveStruct { + + Numeric(Number value) { + super(value); + } + + @Override + public Integer asInt() { + return value.intValue(); + } + + @Override + public Long asLong() { + return value.longValue(); + } + + @Override + public Double asDouble() { + return value.doubleValue(); + } + + @Override + public Instant asTimestamp() { + var raw = Instant.ofEpochMilli(value.longValue()); + if (value instanceof Float || value instanceof Double) { + double doubleValue = value.doubleValue(); + raw = raw.plusNanos((long) ((doubleValue - Math.floor(doubleValue)) * Duration.ofMillis(1).toNanos())); + } + return raw; + } + } + + class BooleanStruct extends PrimitiveStruct { + + BooleanStruct(boolean value) { + super(value); + } + + @Override + public Boolean asBoolean() { + return value == Boolean.TRUE; + } + } + + @SuppressWarnings("java:S2160") // don't need to override equals() for struct since it is derived from value + class StringStruct extends PrimitiveStruct { + private Struct struct = null; + + + StringStruct(String value) { + super(value); + } + + @Override + public String asString() { + return value; + } + + @Override + public Integer asInt() { + return Parse.parseIntOrNull(value); + } + + @Override + public Long asLong() { + return Parse.parseLongOrNull(value); + } + + @Override + public Double asDouble() { + return Parse.parseDoubleOrNull(value); + } + + @Override + public Boolean asBoolean() { + return Parse.bool(value); + } + + @Override + public Instant asTimestamp() { + try { + return Instant.parse(value); + } catch (DateTimeParseException e) { + Long value = asLong(); + if (value != null) { + return Instant.ofEpochMilli(value); + } + return null; + } + } + + @Override + public Struct get(Object key) { + return parseJson().get(key); + } + + @Override + public Struct get(int index) { + return parseJson().get(index); + } + + @Override + public Map asMap() { + return parseJson().asMap(); + } + + private Struct parseJson() { + return struct != null ? struct : (struct = of(JsonConversion.readValue(value, Object.class))); + } + + @Override + public byte[] asBytes() { + return value.getBytes(StandardCharsets.UTF_8); + } + + @Override + public T as(Class clazz) { + return JsonConversion.readValue(value, clazz); + } + } + + class BinaryStruct extends PrimitiveStruct { + + BinaryStruct(byte[] value) { + super(value); + } + + @Override + public String asString() { + return new String(value, StandardCharsets.UTF_8); + } + + @Override + public byte[] asBytes() { + return value; + } + } + + class InstantStruct extends PrimitiveStruct { + + InstantStruct(Instant value) { + super(value); + } + + @Override + public Instant asTimestamp() { + return value; + } + + @Override + public Integer asInt() { + return Math.toIntExact(value.toEpochMilli()); + } + + @Override + public Long asLong() { + return value.toEpochMilli(); + } + + @Override + public Double asDouble() { + return (double) value.toEpochMilli(); + } + } + + class LocalTimeStruct extends PrimitiveStruct { + + LocalTimeStruct(LocalTime value) { + super(value); + } + + @Override + public Integer asInt() { + return Math.toIntExact(Duration.ofNanos(value.toNanoOfDay()).toMillis()); + } + + @Override + public Long asLong() { + return Duration.ofNanos(value.toNanoOfDay()).toMillis(); + } + + @Override + public Double asDouble() { + return value.toNanoOfDay() * 1d / Duration.ofMillis(1).toNanos(); + } + + @Override + public String asString() { + return DateTimeFormatter.ISO_LOCAL_TIME.format(value); + } + } + + class LocalDateStruct extends PrimitiveStruct { + + LocalDateStruct(LocalDate value) { + super(value); + } + + @Override + public Integer asInt() { + return Math.toIntExact(value.toEpochDay()); + } + + @Override + public Long asLong() { + return value.toEpochDay(); + } + + @Override + public Double asDouble() { + return (double) value.toEpochDay(); + } + + @Override + public String asString() { + return DateTimeFormatter.ISO_LOCAL_DATE.format(value); + } + } + + class MapStruct extends PrimitiveStruct> { + + MapStruct(Map value) { + super(value); + } + + @Override + public Struct get(Object key) { + var result = value.get(key); + if (result != null) { + return result; + } else if (key instanceof String s && s.contains(".")) { + String[] parts = s.split("\\.", 2); + if (parts.length == 2) { + String firstPart = parts[0]; + return firstPart.endsWith("[]") ? + get(firstPart.substring(0, firstPart.length() - 2)).flatMap(child -> child.get(parts[1])) : + get(firstPart, parts[1]); + } + } + + return NULL; + } + + @Override + public boolean isStruct() { + return true; + } + + @Override + public String asString() { + return super.asJson(); + } + + @Override + public Map asMap() { + return value.entrySet().stream() + .map(e -> Map.entry(e.getKey(), e.getValue())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + } + + class ListStruct extends PrimitiveStruct> { + ListStruct(List value) { + super(value); + } + + @Override + public List asList() { + return value; + } + + @Override + public Struct get(int index) { + return index < value.size() && index >= 0 ? value.get(index) : NULL; + } + + @Override + public Struct get(Object key) { + return key instanceof Number n ? get(n.intValue()) : NULL; + } + } + + +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/StructSerializer.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/StructSerializer.java new file mode 100644 index 0000000000..c8cd05cb67 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/StructSerializer.java @@ -0,0 +1,24 @@ +package com.onthegomap.planetiler.reader; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import java.io.IOException; + +class StructSerializer extends StdSerializer { + + public StructSerializer() { + this(null); + } + + public StructSerializer(Class t) { + super(t); + } + + @Override + public void serialize( + Struct value, JsonGenerator jgen, SerializerProvider provider) + throws IOException { + jgen.writePOJO(value.rawValue()); + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/WithTags.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/WithTags.java index 80dbde4fdc..1fcdfd25a1 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/WithTags.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/WithTags.java @@ -2,7 +2,9 @@ import com.onthegomap.planetiler.util.Imposm3Parsers; import com.onthegomap.planetiler.util.Parse; +import java.util.Arrays; import java.util.Map; +import java.util.Objects; /** An input element with a set of string key/object value pairs. */ public interface WithTags { @@ -14,7 +16,21 @@ static WithTags from(Map tags) { Map tags(); default Object getTag(String key) { - return tags().get(key); + var result = tags().get(key); + if (result != null) { + return result; + } else if (key.contains(".")) { + return getDotted(key).rawValue(); + } + return null; + } + + private Struct getDotted(String key) { + String[] parts = key.split("\\.", 2); + if (parts.length == 2) { + return getStruct(parts[0]).get(parts[1]); + } + return getStruct(parts[0]); } default Object getTag(String key, Object defaultValue) { @@ -26,7 +42,8 @@ default Object getTag(String key, Object defaultValue) { } default boolean hasTag(String key) { - return tags().containsKey(key); + var contains = tags().containsKey(key); + return contains || (key.contains(".") && !getDotted(key).isNull()); } default boolean hasTag(String key, Object value) { @@ -77,8 +94,8 @@ default String getString(String key, String defaultValue) { } /** - * Returns {@code false} if {@code tag}'s {@link Object#toString()} value is empty, "0", "false", or "no" and {@code - * true} otherwise. + * Returns {@code false} if {@code tag}'s {@link Object#toString()} value is empty, "0", "false", or "no" and + * {@code true} otherwise. */ default boolean getBoolean(String key) { return Parse.bool(getTag(key)); @@ -112,5 +129,35 @@ default void setTag(String key, Object value) { tags().put(key, value); } + /** Returns a {@link Struct} wrapper for a field, which can be a primitive or nested list/map. */ + default Struct getStruct(String key) { + return Struct.of(getTag(key)); + } + + /** + * Shortcut for calling {@link Struct#get(Object)} multiple times to get a deeply nested value. + *

+ * Arguments can be strings to get values out of maps, or integers to get an element at a certain index out of a list. + */ + default Struct getStruct(Object key, Object... others) { + Struct struct = getStruct(Objects.toString(key)); + return struct.get(others[0], Arrays.copyOfRange(others, 1, others.length)); + } + + /** + * Attempts to marshal the properties on this feature into a typed java class or record using + * jackson-databind. + */ + default T as(Class clazz) { + return JsonConversion.convertValue(tags(), clazz); + } + + /** + * Serializes the properties on this feature as a JSON object. + */ + default String asJson() { + return JsonConversion.writeValueAsString(tags()); + } + record OfMap(@Override Map tags) implements WithTags {} } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java index 03bcaf7ef5..895fb9a662 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java @@ -3,6 +3,7 @@ import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.GeometryException; import com.onthegomap.planetiler.reader.SourceFeature; +import com.onthegomap.planetiler.reader.Struct; import java.util.List; import java.util.Map; import org.locationtech.jts.geom.Geometry; @@ -19,6 +20,7 @@ public class ParquetFeature extends SourceFeature { private final Object rawGeometry; private Geometry latLon; private Geometry world; + private Struct struct = null; ParquetFeature(String source, String sourceLayer, long id, GeometryReader geometryParser, Map tags) { @@ -65,6 +67,47 @@ public boolean canBeLine() { } } + private Struct cachedStruct() { + return struct != null ? struct : (struct = Struct.of(tags())); + } + + @Override + public Struct getStruct(String key) { + return cachedStruct().get(key); + } + + @Override + public Struct getStruct(Object key, Object... others) { + return cachedStruct().get(key, others); + } + + @Override + public Object getTag(String key) { + var value = tags().get(key); + if (value == null) { + String[] parts = key.split("\\.", 2); + if (parts.length == 2) { + return getStruct(parts[0]).get(parts[1]).rawValue(); + } + return getStruct(parts[0]).rawValue(); + } + return value; + } + + @Override + public Object getTag(String key, Object defaultValue) { + var value = getTag(key); + if (value == null) { + value = defaultValue; + } + return value; + } + + @Override + public boolean hasTag(String key) { + return super.hasTag(key) || getTag(key) != null; + } + @Override public String toString() { return tags().toString(); diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetRecordConverter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetRecordConverter.java index 2cdae2b7ff..93aeafcd13 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetRecordConverter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetRecordConverter.java @@ -64,7 +64,8 @@ private static class ListConverter extends StructConverter { @Override protected Converter makeConverter(Context child) { - if ((child.named("list") || child.named("array")) && child.onlyField("element")) { + if ((child.named("list") || child.named("array")) && + (child.onlyField("element") || child.onlyField("array_element"))) { return new ListElementConverter(child.hoist()); } return super.makeConverter(child); diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/StructTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/StructTest.java new file mode 100644 index 0000000000..8e3c9ce675 --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/StructTest.java @@ -0,0 +1,351 @@ +package com.onthegomap.planetiler.reader; + +import static org.junit.jupiter.api.Assertions.*; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.Test; + +class StructTest { + @Test + void testGet() { + var struct = WithTags.from(Map.of( + "a", Map.of("b", "c") + )); + var nested = struct.getStruct("a").get("b"); + assertEquals("c", nested.asString()); + assertEquals("c", nested.rawValue()); + assertFalse(nested.isNull()); + + assertTrue(struct.getStruct("a").get("b").get("c").get("d").isNull()); + } + + @Test + void testGetMultilevel() { + var struct = WithTags.from(Map.of( + "a", Map.of("b", "c") + )); + var nested = struct.getStruct("a", "b"); + assertEquals("c", nested.asString()); + assertEquals("c", nested.rawValue()); + assertFalse(nested.isNull()); + + assertTrue(struct.getStruct("a", "b", "c", "d").isNull()); + } + + + @Test + void testGetDottedFromStruct() { + assertEquals("c", Struct.of(Map.of( + "a", Map.of("b", "c") + )).get("a.b").asString()); + assertEquals("c", Struct.of(Map.of( + "a.b", "c" + )).get("a.b").asString()); + assertEquals("d", Struct.of(Map.of( + "a", Map.of("b.c", "d") + )).get("a.b.c").asString()); + assertNull(Struct.of(Map.of( + "a", Map.of("b.c", "d") + )).get("a.b.e").asString()); + } + + @Test + void testGetDottedFromWithTags() { + assertEquals("c", WithTags.from(Map.of( + "a", Map.of("b", "c") + )).getStruct("a.b").asString()); + assertEquals("c", WithTags.from(Map.of( + "a", Map.of("b", "c") + )).getTag("a.b")); + assertTrue(WithTags.from(Map.of( + "a", Map.of("b", "c") + )).hasTag("a.b")); + } + + + @Test + void testListQuery() { + var struct = Struct.of(Map.of( + "a", List.of(Map.of("b", "c"), Map.of("b", "d")) + )); + assertEquals("d", struct.get("a").flatMap(elem -> elem.get("b")).get(1).asString()); + assertEquals(Struct.of(List.of("c", "d")), struct.get("a[].b")); + } + + @Test + void testListGet() { + var struct = Struct.of(List.of(1, 2, 3)); + assertEquals(1, struct.get(0).asInt()); + assertEquals(3, struct.get(2).asInt()); + assertTrue(struct.get(4).isNull()); + assertTrue(struct.get(-1).isNull()); + } + + @Test + void testNullInput() { + var struct = Struct.of(null); + assertNull(struct.rawValue()); + assertTrue(struct.isNull()); + assertTrue(struct.get(0).isNull()); + assertTrue(struct.get("nested").isNull()); + assertTrue(struct.get("nested", "level2").isNull()); + assertEquals(Map.of(), struct.asMap()); + assertEquals(List.of(), struct.asList()); + assertEquals("null", struct.toString()); + assertEquals("null", struct.asJson()); + record Type() {} + assertNull(struct.as(Type.class)); + + assertEquals(1, struct.orElse(Struct.of(1)).rawValue()); + } + + private static void assertNotListOrMap(Struct struct) { + assertTrue(struct.get(0).isNull()); + assertTrue(struct.get("nested").isNull()); + assertTrue(struct.get("nested", "level2").isNull()); + assertEquals(Map.of(), struct.asMap()); + assertEquals(List.of(struct), struct.asList()); + } + + @Test + void testBooleanInput() { + var struct = Struct.of(true); + assertEquals(true, struct.rawValue()); + assertEquals(true, struct.asBoolean()); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + assertEquals(true, struct.asList().get(0).asBoolean()); + assertEquals("true", struct.toString()); + assertEquals("true", struct.asJson()); + + assertEquals(true, struct.orElse(Struct.of(1)).rawValue()); + } + + @Test + void testIntInput() { + var struct = Struct.of(1); + assertEquals(1, struct.rawValue()); + assertEquals(1, struct.asInt()); + assertEquals(1L, struct.asLong()); + assertEquals(1d, struct.asDouble()); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + assertEquals(1, struct.asList().get(0).asInt()); + assertEquals("1", struct.toString()); + assertEquals("1", struct.asJson()); + + assertEquals(1, struct.orElse(Struct.of(2)).rawValue()); + } + + @Test + void testLongInput() { + var struct = Struct.of(1L); + assertEquals(1L, struct.rawValue()); + assertEquals(1, struct.asInt()); + assertEquals(1L, struct.asLong()); + assertEquals(1d, struct.asDouble()); + assertEquals(Instant.ofEpochMilli(1), struct.asTimestamp()); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + assertEquals(1, struct.asList().get(0).asInt()); + assertEquals("1", struct.toString()); + assertEquals("1", struct.asJson()); + + assertEquals(1L, struct.orElse(Struct.of(2)).rawValue()); + } + + @Test + void testFloatInput() { + var struct = Struct.of(1.3f); + assertEquals(1.3f, struct.rawValue()); + assertEquals(1, struct.asInt()); + assertEquals(1L, struct.asLong()); + assertEquals(1.3d, struct.asDouble(), 1e-2); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + assertEquals(1.3, struct.asList().get(0).asDouble(), 1e-2); + assertEquals("1.3", struct.toString()); + assertEquals("1.3", struct.asJson()); + + assertEquals(1.3f, struct.orElse(Struct.of(2)).rawValue()); + } + + @Test + void testDoubleInput() { + var struct = Struct.of(1.3d); + assertEquals(1.3d, struct.rawValue()); + assertEquals(1, struct.asInt()); + assertEquals(1L, struct.asLong()); + assertEquals(1.3d, struct.asDouble()); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + assertEquals(1.3, struct.asList().get(0).asDouble()); + assertEquals("1.3", struct.toString()); + assertEquals("1.3", struct.asJson()); + assertEquals(1.3d, struct.orElse(Struct.of(2)).rawValue()); + } + + @Test + void testNumbersConvertToTimestamps() { + assertEquals(Instant.ofEpochSecond(1, Duration.ofMillis(1).toNanos() / 2), Struct.of(1000.5).asTimestamp()); + assertEquals(Instant.ofEpochMilli(1500), Struct.of(1500L).asTimestamp()); + } + + @Test + void testInstantInput() { + var struct = Struct.of(Instant.ofEpochSecond(60)); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + + assertEquals(Instant.ofEpochSecond(60), struct.rawValue()); + assertEquals(60_000, struct.asInt()); + assertEquals(60_000L, struct.asLong()); + assertEquals(60_000d, struct.asDouble()); + assertEquals(Instant.ofEpochSecond(60), struct.asTimestamp()); + assertEquals(60_000d, struct.asList().get(0).asDouble()); + assertEquals("1970-01-01T00:01:00Z", struct.toString()); + assertEquals("\"1970-01-01T00:01:00Z\"", struct.asJson()); + + assertEquals(Instant.ofEpochSecond(60), struct.orElse(Struct.of(2)).rawValue()); + } + + @Test + void testLocalTimeInput() { + var struct = Struct.of(LocalTime.of(1, 2)); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + + assertEquals(LocalTime.of(1, 2), struct.rawValue()); + assertEquals((int) Duration.ofHours(1).plusMinutes(2).toMillis(), struct.asInt()); + assertEquals(Duration.ofHours(1).plusMinutes(2).toMillis(), struct.asLong()); + assertEquals((double) Duration.ofHours(1).plusMinutes(2).toMillis(), struct.asDouble()); + assertEquals("01:02:00", struct.toString()); + assertEquals("\"01:02:00\"", struct.asJson()); + } + + @Test + void testLocalDateInput() { + var struct = Struct.of(LocalDate.of(1, 2, 3)); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + + assertEquals(LocalDate.of(1, 2, 3), struct.rawValue()); + assertEquals(-719129, struct.asInt()); + assertEquals(-719129L, struct.asLong()); + assertEquals(-719129d, struct.asDouble()); + assertEquals("0001-02-03", struct.toString()); + assertEquals("\"0001-02-03\"", struct.asJson()); + } + + @Test + void testUUIDInput() { + var struct = Struct.of(new UUID(1, 2)); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + + assertEquals(new UUID(1, 2), struct.rawValue()); + assertEquals("00000000-0000-0001-0000-000000000002", struct.asString()); + assertEquals("00000000-0000-0001-0000-000000000002", struct.toString()); + assertEquals("\"00000000-0000-0001-0000-000000000002\"", struct.asJson()); + } + + @Test + void testStringInput() { + var struct = Struct.of("abc"); + assertFalse(struct.isNull()); + assertNotListOrMap(struct); + assertEquals("abc", struct.asString()); + assertEquals("\"abc\"", struct.asJson()); + assertNull(struct.asInt()); + assertNull(struct.asLong()); + assertNull(struct.asDouble()); + assertEquals(true, struct.asBoolean()); + assertArrayEquals("abc".getBytes(StandardCharsets.UTF_8), struct.asBytes()); + } + + @Test + void testStringToNumber() { + var struct = Struct.of("1.5"); + assertEquals(1, struct.asInt()); + assertEquals(1L, struct.asLong()); + assertEquals(1.5, struct.asDouble(), 1e-2); + assertEquals(true, struct.asBoolean()); + assertEquals("\"1.5\"", struct.asJson()); + } + + @Test + void testStringToBoolean() { + assertFalse(Struct.of("false").asBoolean()); + assertTrue(Struct.of("true").asBoolean()); + assertTrue(Struct.of("yes").asBoolean()); + assertFalse(Struct.of("0").asBoolean()); + assertTrue(Struct.of("1").asBoolean()); + assertFalse(Struct.of("no").asBoolean()); + } + + @Test + void testStringToInstant() { + assertEquals(Instant.ofEpochSecond(100), Struct.of(Instant.ofEpochSecond(100).toString()).asTimestamp()); + assertEquals(Instant.ofEpochSecond(100), Struct.of("100000").asTimestamp()); + } + + @Test + void testJsonStringToStruct() { + record Inner(int b) {} + record Outer(List a) {} + var struct = Struct.of(""" + {"a":[{"b":1}]} + """); + assertEquals(1, struct.get("a", 0, "b").asInt()); + assertEquals(new Outer(List.of(new Inner(1))), struct.as(Outer.class)); + } + + @Test + void testJsonListToStruct() { + var struct = Struct.of(""" + [1,2,3] + """); + assertEquals(1, struct.get(0).asInt()); + assertEquals(2, struct.get(1).asInt()); + } + + @Test + void testBinaryInput() { + var struct = Struct.of(new byte[]{1, 2}); + assertArrayEquals(new byte[]{1, 2}, struct.asBytes()); + } + + @Test + void testAsMapper() { + var struct = WithTags.from(Map.of( + "a", Map.of("b", "c") + )); + record Inner(String b) {} + record Outer(Inner a) {} + assertEquals(new Outer(new Inner("c")), struct.as(Outer.class)); + assertEquals(new Inner("c"), struct.getStruct("a").as(Inner.class)); + } + + @Test + void testAsJson() { + var struct = WithTags.from(Map.of( + "a", Map.of("b", "c") + )); + assertEquals(""" + {"a":{"b":"c"}} + """.strip(), struct.asJson()); + assertEquals(""" + {"b":"c"} + """.strip(), struct.getStruct("a").asJson()); + assertEquals(""" + "c" + """.strip(), struct.getStruct("a").get("b").asJson()); + } +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetConverterTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetConverterTest.java index 25a6840031..a71e8ddb0d 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetConverterTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetConverterTest.java @@ -319,6 +319,54 @@ void testListFromListElementStructs() { assertEquals(Map.of("value", Lists.newArrayList(1, null, 3)), materializer.getCurrentRecord()); } + @Test + void testArray() { + var materializer = new ParquetRecordConverter(Types.buildMessage() + .requiredGroup().as(LogicalTypeAnnotation.listType()) + .repeatedGroup() + .requiredGroup() + .required(PrimitiveType.PrimitiveTypeName.INT32) + .named("someField") + .named("array_element") + .named("array") + .named("value") + .named("message")); + + var root = materializer.getRootConverter(); + var value = root.getConverter(0).asGroupConverter(); + var list = value.getConverter(0).asGroupConverter(); + var element = list.getConverter(0).asGroupConverter(); + var field = element.getConverter(0).asPrimitiveConverter(); + root.start(); + value.start(); + value.end(); + root.end(); + assertEquals(Map.of("value", List.of()), materializer.getCurrentRecord()); + + root.start(); + value.start(); + + list.start(); + element.start(); + field.addInt(1); + element.end(); + list.end(); + + + list.start(); + element.start(); + field.addInt(2); + element.end(); + list.end(); + value.end(); + root.end(); + + assertEquals(Map.of("value", List.of( + Map.of("someField", 1), + Map.of("someField", 2) + )), materializer.getCurrentRecord()); + } + @Test void testListRepeatedAtTopAndBottomLevel() { var materializer = new ParquetRecordConverter(Types.buildMessage() diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFileTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFileTest.java index d47c5b9666..5920c43ca1 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFileTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/parquet/ParquetInputFileTest.java @@ -18,6 +18,7 @@ import java.util.function.Consumer; import org.apache.parquet.filter2.predicate.FilterApi; import org.junit.jupiter.api.DynamicTest; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestFactory; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -185,4 +186,20 @@ private static DynamicTest testEquals(Map map, String key, Objec private static DynamicTest test(Map map, String key, Consumer test) { return dynamicTest(key, () -> test.accept(map.get(key))); } + + @Test + void testReadNested() { + Set xmins = new HashSet<>(); + Set updateTime = new HashSet<>(); + for (var block : new ParquetInputFile("parquet", "layer", + TestUtils.pathToResource("parquet").resolve("boston.parquet")) + .get()) { + for (var item : block) { + xmins.add(item.getTag("bbox.xmin")); + updateTime.add(item.getStruct("update_time").asTimestamp().toEpochMilli()); + } + } + assertEquals(Set.of(-71.0743637084961, -71.07461547851562, -71.07460021972656), xmins); + assertEquals(Set.of(1596647976000L, 1624238059000L, 1625971545000L), updateTime); + } }