diff --git a/NOTICE.md b/NOTICE.md index abd9d1bb15..25f81916d1 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -28,6 +28,7 @@ The `planetiler-core` module includes the following software: - mil.nga.geopackage:geopackage (MIT license) - org.snakeyaml:snakeyaml-engine (Apache license) - org.commonmark:commonmark (BSD 2-clause license) + - org.tukaani:xz (public domain) - Adapted code: - `DouglasPeuckerSimplifier` from [JTS](https://github.com/locationtech/jts) (EDL) - `OsmMultipolygon` from [imposm3](https://github.com/omniscale/imposm3) (Apache license) diff --git a/layerstats/README.md b/layerstats/README.md new file mode 100644 index 0000000000..ac9892198c --- /dev/null +++ b/layerstats/README.md @@ -0,0 +1,175 @@ +Layer Stats +=========== + +This page describes how to generate and analyze layer stats data to find ways to optimize tile size. + +### Generating Layer Stats + +Run planetiler with `--output-layerstats` to generate an extra `.layerstats.tsv.gz` file with a row for each +layer in each tile that can be used to analyze tile sizes. You can also get stats for an existing archive by running: + +```bash +java -jar planetiler.jar stats --input= --output=layerstats.tsv.gz +``` + +The output is a gzipped tsv with a row per layer on each tile and the following columns: + +| column | description | +|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------| +| z | tile zoom | +| x | tile x | +| y | tile y | +| hilbert | tile hilbert ID (defines [pmtiles](https://protomaps.com/docs/pmtiles) order) | +| archived_tile_bytes | stored tile size (usually gzipped) | +| layer | layer name | +| layer_bytes | encoded size of this layer on this tile | +| layer_features | number of features in this layer | +| layer_attr_bytes | encoded size of the [attribute key/value pairs](https://github.com/mapbox/vector-tile-spec/tree/master/2.1#44-feature-attributes) in this layer | +| layer_attr_keys | number of distinct attribute keys in this layer on this tile | +| layer_attr_values | number of distinct attribute values in this layer on this tile | + +### Analyzing Layer Stats + +Load a layer stats file in [duckdb](https://duckdb.org/): + +```sql +CREATE TABLE layerstats AS SELECT * FROM 'output.pmtiles.layerstats.tsv.gz'; +``` + +Then get the biggest layers: + +```sql +SELECT * FROM layerstats ORDER BY layer_bytes DESC LIMIT 2; +``` + +| z | x | y | hilbert | archived_tile_bytes | layer | layer_bytes | layer_features | layer_attr_bytes | layer_attr_keys | layer_attr_values | +|----|-------|------|-----------|---------------------|-------------|-------------|----------------|------------------|-----------------|-------------------| +| 14 | 13722 | 7013 | 305278258 | 1261474 | housenumber | 2412464 | 108384 | 30764 | 1 | 3021 | +| 14 | 13723 | 7014 | 305278256 | 1064044 | housenumber | 1848990 | 83038 | 26022 | 1 | 2542 | + +To get a table of biggest layers by zoom: + +```sql +PIVOT ( + SELECT z, layer, (max(layer_bytes)/1000)::int size FROM layerstats GROUP BY z, layer ORDER BY z ASC +) ON printf('%2d', z) USING sum(size); +-- duckdb sorts columns lexicographically, so left-pad the zoom so 2 comes before 10 +``` + +| layer | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | +|---------------------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|------| +| boundary | 10 | 75 | 85 | 53 | 44 | 25 | 18 | 15 | 15 | 29 | 24 | 18 | 32 | 18 | 10 | +| landcover | 2 | 1 | 8 | 5 | 3 | 31 | 18 | 584 | 599 | 435 | 294 | 175 | 166 | 111 | 334 | +| place | 116 | 314 | 833 | 830 | 525 | 270 | 165 | 80 | 51 | 54 | 63 | 70 | 50 | 122 | 221 | +| water | 8 | 4 | 11 | 9 | 15 | 13 | 89 | 114 | 126 | 109 | 133 | 94 | 167 | 116 | 91 | +| water_name | 7 | 19 | 25 | 15 | 11 | 6 | 6 | 4 | 3 | 6 | 5 | 4 | 4 | 4 | 29 | +| waterway | | | | 1 | 4 | 2 | 18 | 13 | 10 | 28 | 20 | 16 | 60 | 66 | 73 | +| park | | | | | 54 | 135 | 89 | 76 | 72 | 82 | 90 | 56 | 48 | 19 | 50 | +| landuse | | | | | 3 | 2 | 33 | 67 | 95 | 107 | 177 | 132 | 66 | 313 | 109 | +| transportation | | | | | 384 | 425 | 259 | 240 | 287 | 284 | 165 | 95 | 313 | 187 | 133 | +| transportation_name | | | | | | | 32 | 20 | 18 | 13 | 30 | 18 | 65 | 59 | 169 | +| mountain_peak | | | | | | | | 13 | 13 | 12 | 15 | 12 | 12 | 317 | 235 | +| aerodrome_label | | | | | | | | | 5 | 4 | 5 | 4 | 4 | 4 | 4 | +| aeroway | | | | | | | | | | | 16 | 26 | 35 | 31 | 18 | +| poi | | | | | | | | | | | | | 35 | 18 | 811 | +| building | | | | | | | | | | | | | | 94 | 1761 | +| housenumber | | | | | | | | | | | | | | | 2412 | + +To get biggest tiles: + +```sql +CREATE TABLE tilestats AS SELECT +z, x, y, +any_value(archived_tile_bytes) gzipped, +sum(layer_bytes) raw +FROM layerstats GROUP BY z, x, y; + +SELECT +z, x, y, +format_bytes(gzipped::int) gzipped, +format_bytes(raw::int) raw, +FROM tilestats ORDER BY gzipped DESC LIMIT 2; +``` + +NOTE: this group by uses a lot of memory so you need to be running in file-backed +mode `duckdb analysis.duckdb` (not in-memory mode) + +| z | x | y | gzipped | raw | +|----|------|------|---------|------| +| 13 | 2286 | 3211 | 9KB | 12KB | +| 13 | 2340 | 2961 | 9KB | 12KB | + +To make it easier to look at these tiles on a map, you can define following macros that convert z/x/y coordinates to +lat/lons: + +```sql +CREATE MACRO lon(z, x) AS (x/2**z) * 360 - 180; +CREATE MACRO lat_n(z, y) AS pi() - 2 * pi() * y/2**z; +CREATE MACRO lat(z, y) AS degrees(atan(0.5*(exp(lat_n(z, y)) - exp(-lat_n(z, y))))); +CREATE MACRO debug_url(z, x, y) as concat( + 'https://protomaps.github.io/PMTiles/#map=', + z + 0.5, '/', + round(lat(z, x + 0.5), 5), '/', + round(lon(z, y + 0.5), 5) +); + +SELECT z, x, y, debug_url(z, x, y), layer, format_bytes(layer_bytes) size +FROM layerstats ORDER BY layer_bytes DESC LIMIT 2; +``` + +| z | x | y | debug_url(z, x, y) | layer | size | +|----|-------|------|-------------------------------------------------------------------|-------------|-------| +| 14 | 13722 | 7013 | https://protomaps.github.io/PMTiles/#map=14.5/-76.32335/-25.89478 | housenumber | 2.4MB | +| 14 | 13723 | 7014 | https://protomaps.github.io/PMTiles/#map=14.5/-76.32855/-25.8728 | housenumber | 1.8MB | + +Drag and drop your pmtiles archive to the pmtiles debugger to see the large tiles on a map. You can also switch to the +"inspect" tab to inspect an individual tile. + +#### Computing Weighted Average Tile Sizes + +If you compute a straight average tile size, it will be dominated by ocean tiles that no one looks at. You can compute a +weighted average based on actual usage by joining with a `z, x, y, loads` tile source. For +convenience, [top_osm_tiles.tsv.gz](top_osm_tiles.tsv.gz) has the top 1 million tiles from 90 days +of [OSM tile logs](https://planet.openstreetmap.org/tile_logs/) from summer 2023. + +You can load these sample weights using duckdb's [httpfs module](https://duckdb.org/docs/extensions/httpfs.html): + +```sql +INSTALL httpfs; +CREATE TABLE weights AS SELECT z, x, y, loads FROM 'https://raw.githubusercontent.com/onthegomap/planetiler/main/layerstats/top_osm_tiles.tsv.gz'; +``` + +Then compute the weighted average tile size: + +```sql +SELECT +format_bytes((sum(gzipped * loads) / sum(loads))::int) gzipped_avg, +format_bytes((sum(raw * loads) / sum(loads))::int) raw_avg, +FROM tilestats JOIN weights USING (z, x, y); +``` + +| gzipped_avg | raw_avg | +|-------------|---------| +| 81KB | 132KB | + +If you are working with an extract, then the low-zoom tiles will dominate, so you can make the weighted average respect +the per-zoom weights that appear globally: + +```sql +WITH zoom_weights AS ( + SELECT z, sum(loads) loads FROM weights GROUP BY z +), +zoom_avgs AS ( + SELECT + z, + sum(gzipped * loads) / sum(loads) gzipped, + sum(raw * loads) / sum(loads) raw, + FROM tilestats JOIN weights USING (z, x, y) + GROUP BY z +) +SELECT +format_bytes((sum(gzipped * loads) / sum(loads))::int) gzipped_avg, +format_bytes((sum(raw * loads) / sum(loads))::int) raw_avg, +FROM zoom_avgs JOIN zoom_weights USING (z); +``` + diff --git a/layerstats/top_osm_tiles.tsv.gz b/layerstats/top_osm_tiles.tsv.gz new file mode 100644 index 0000000000..2baf3ed26e Binary files /dev/null and b/layerstats/top_osm_tiles.tsv.gz differ diff --git a/planetiler-core/pom.xml b/planetiler-core/pom.xml index e5a0554d29..33e7d41e01 100644 --- a/planetiler-core/pom.xml +++ b/planetiler-core/pom.xml @@ -49,6 +49,11 @@ jts-core 1.19.0 + + org.tukaani + xz + 1.9 + org.geotools gt-shapefile @@ -109,6 +114,11 @@ jackson-dataformat-xml ${jackson.version} + + com.fasterxml.jackson.dataformat + jackson-dataformat-csv + ${jackson.version} + io.prometheus simpleclient diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index 5c081087af..b36d417d6b 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -29,6 +29,8 @@ import com.onthegomap.planetiler.util.Geofabrik; import com.onthegomap.planetiler.util.LogUtil; import com.onthegomap.planetiler.util.ResourceUsage; +import com.onthegomap.planetiler.util.TileSizeStats; +import com.onthegomap.planetiler.util.TopOsmTiles; import com.onthegomap.planetiler.util.Translations; import com.onthegomap.planetiler.util.Wikidata; import com.onthegomap.planetiler.worker.RunnableThatThrows; @@ -38,6 +40,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.function.Function; import java.util.stream.IntStream; import org.slf4j.Logger; @@ -101,6 +104,7 @@ public class Planetiler { private boolean useWikidata = false; private boolean onlyFetchWikidata = false; private boolean fetchWikidata = false; + private final boolean fetchOsmTileStats; private TileArchiveMetadata tileArchiveMetadata; private Planetiler(Arguments arguments) { @@ -111,10 +115,11 @@ private Planetiler(Arguments arguments) { if (config.color() != null) { AnsiColors.setUseColors(config.color()); } - tmpDir = arguments.file("tmpdir", "temp directory", Path.of("data", "tmp")); + tmpDir = config.tmpDir(); onlyDownloadSources = arguments.getBoolean("only_download", "download source data then exit", false); downloadSources = onlyDownloadSources || arguments.getBoolean("download", "download sources", false); - + fetchOsmTileStats = + arguments.getBoolean("download_osm_tile_weights", "download OSM tile weights file", downloadSources); nodeDbPath = arguments.file("temp_nodes", "temp node db location", tmpDir.resolve("node.db")); multipolygonPath = arguments.file("temp_multipolygons", "temp multipolygon db location", tmpDir.resolve("multipolygon.db")); @@ -666,6 +671,10 @@ public void run() throws Exception { output.uri() + " already exists, use the --force argument to overwrite or --append."); } + Path layerStatsPath = arguments.file("layer_stats", "layer stats output path", + // default to .layerstats.tsv.gz + TileSizeStats.getDefaultLayerstatsPath(Optional.ofNullable(output.getLocalPath()).orElse(Path.of("output")))); + if (config.tileWriteThreads() < 1) { throw new IllegalArgumentException("require tile_write_threads >= 1"); } @@ -715,6 +724,9 @@ public void run() throws Exception { if (!toDownload.isEmpty()) { download(); } + if (fetchOsmTileStats) { + TopOsmTiles.downloadPrecomputed(config, stats); + } ensureInputFilesExist(); if (fetchWikidata) { @@ -762,8 +774,8 @@ public void run() throws Exception { featureGroup.prepare(); - TileArchiveWriter.writeOutput(featureGroup, archive, output::size, tileArchiveMetadata, - config, stats); + TileArchiveWriter.writeOutput(featureGroup, archive, output::size, tileArchiveMetadata, layerStatsPath, config, + stats); } catch (IOException e) { throw new IllegalStateException("Unable to write to " + output, e); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java index bed7c518de..f115074fb0 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java @@ -441,11 +441,9 @@ public VectorTile addLayerFeatures(String layerName, List fea } /** - * Creates a vector tile protobuf with all features in this tile and serializes it as a byte array. - *

- * Does not compress the result. + * Returns a vector tile protobuf object with all features in this tile. */ - public byte[] encode() { + public VectorTileProto.Tile toProto() { VectorTileProto.Tile.Builder tile = VectorTileProto.Tile.newBuilder(); for (Map.Entry e : layers.entrySet()) { String layerName = e.getKey(); @@ -492,7 +490,16 @@ public byte[] encode() { tile.addLayers(tileLayer.build()); } - return tile.build().toByteArray(); + return tile.build(); + } + + /** + * Creates a vector tile protobuf with all features in this tile and serializes it as a byte array. + *

+ * Does not compress the result. + */ + public byte[] encode() { + return toProto().toByteArray(); } /** diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/ReadableTileArchive.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/ReadableTileArchive.java index 51b0b0e0c2..c6d2d06d2e 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/ReadableTileArchive.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/ReadableTileArchive.java @@ -40,6 +40,10 @@ default byte[] getTile(TileCoord coord) { */ CloseableIterator getAllTileCoords(); + default CloseableIterator getAllTiles() { + return getAllTileCoords().map(coord -> new Tile(coord, getTile(coord))); + } + /** * Returns the metadata stored in this archive. */ diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/Tile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/Tile.java new file mode 100644 index 0000000000..b580069a76 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/Tile.java @@ -0,0 +1,32 @@ +package com.onthegomap.planetiler.archive; + +import com.onthegomap.planetiler.geo.TileCoord; +import java.util.Arrays; +import java.util.Objects; + +/** A tile stored in an archive with coordinate {@code coord} and archived {@code bytes}. */ +public record Tile(TileCoord coord, byte[] bytes) implements Comparable { + + @Override + public boolean equals(Object o) { + return (this == o) || + (o instanceof Tile other && Objects.equals(coord, other.coord) && Arrays.equals(bytes, other.bytes)); + } + + @Override + public int hashCode() { + int result = coord.hashCode(); + result = 31 * result + Arrays.hashCode(bytes); + return result; + } + + @Override + public String toString() { + return "Tile{coord=" + coord + ", data=byte[" + bytes.length + "]}"; + } + + @Override + public int compareTo(Tile o) { + return coord.compareTo(o.coord); + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveMetadata.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveMetadata.java index 0e210c3fbe..8c626646e0 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveMetadata.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveMetadata.java @@ -19,7 +19,7 @@ import com.onthegomap.planetiler.config.PlanetilerConfig; import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.util.BuildInfo; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import java.io.IOException; import java.util.HashMap; import java.util.LinkedHashMap; @@ -43,7 +43,7 @@ public record TileArchiveMetadata( @JsonProperty(ZOOM_KEY) Double zoom, @JsonProperty(MINZOOM_KEY) Integer minzoom, @JsonProperty(MAXZOOM_KEY) Integer maxzoom, - @JsonIgnore List vectorLayers, + @JsonIgnore List vectorLayers, @JsonAnyGetter @JsonDeserialize(using = EmptyMapIfNullDeserializer.class) Map others, @JsonProperty(COMPRESSION_KEY) TileCompression tileCompression ) { @@ -73,7 +73,7 @@ public TileArchiveMetadata(Profile profile, PlanetilerConfig config) { this(profile, config, null); } - public TileArchiveMetadata(Profile profile, PlanetilerConfig config, List vectorLayers) { + public TileArchiveMetadata(Profile profile, PlanetilerConfig config, List vectorLayers) { this( getString(config, NAME_KEY, profile.name()), getString(config, DESCRIPTION_KEY, profile.description()), @@ -145,7 +145,7 @@ public Map toMap() { } /** Returns a copy of this instance with {@link #vectorLayers} set to {@code layerStats}. */ - public TileArchiveMetadata withLayerStats(List layerStats) { + public TileArchiveMetadata withLayerStats(List layerStats) { return new TileArchiveMetadata(name, description, attribution, version, type, format, bounds, center, zoom, minzoom, maxzoom, layerStats, others, tileCompression); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java index 93afc98c13..60ebcc1a14 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java @@ -15,21 +15,24 @@ import com.onthegomap.planetiler.util.DiskBacked; import com.onthegomap.planetiler.util.Format; import com.onthegomap.planetiler.util.Hashing; +import com.onthegomap.planetiler.util.TileSizeStats; +import com.onthegomap.planetiler.util.TileWeights; +import com.onthegomap.planetiler.util.TilesetSummaryStatistics; import com.onthegomap.planetiler.worker.WorkQueue; import com.onthegomap.planetiler.worker.Worker; import com.onthegomap.planetiler.worker.WorkerPipeline; import java.io.IOException; -import java.util.ArrayDeque; +import java.nio.file.Path; +import java.text.NumberFormat; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.OptionalLong; -import java.util.Queue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.atomic.LongAccumulator; import java.util.function.Consumer; import java.util.function.LongSupplier; import java.util.stream.IntStream; @@ -52,14 +55,14 @@ public class TileArchiveWriter { private final PlanetilerConfig config; private final Stats stats; private final Counter.Readable[] tilesByZoom; - private final Counter.Readable[] totalTileSizesByZoom; - private final LongAccumulator[] maxTileSizesByZoom; private final Iterable inputTiles; private final AtomicReference lastTileWritten = new AtomicReference<>(); private final TileArchiveMetadata tileArchiveMetadata; + private final TilesetSummaryStatistics tileStats; private TileArchiveWriter(Iterable inputTiles, WriteableTileArchive archive, PlanetilerConfig config, TileArchiveMetadata tileArchiveMetadata, Stats stats) { + this.tileStats = new TilesetSummaryStatistics(TileWeights.readFromFile(config.tileWeights())); this.inputTiles = inputTiles; this.archive = archive; this.config = config; @@ -68,12 +71,6 @@ private TileArchiveWriter(Iterable inputTiles, Writea tilesByZoom = IntStream.rangeClosed(0, config.maxzoom()) .mapToObj(i -> Counter.newSingleThreadCounter()) .toArray(Counter.Readable[]::new); - totalTileSizesByZoom = IntStream.rangeClosed(0, config.maxzoom()) - .mapToObj(i -> Counter.newMultiThreadCounter()) - .toArray(Counter.Readable[]::new); - maxTileSizesByZoom = IntStream.rangeClosed(0, config.maxzoom()) - .mapToObj(i -> new LongAccumulator(Long::max, 0)) - .toArray(LongAccumulator[]::new); memoizedTiles = stats.longCounter("archive_memoized_tiles"); featuresProcessed = stats.longCounter("archive_features_processed"); Map countsByZoom = new LinkedHashMap<>(); @@ -85,7 +82,7 @@ private TileArchiveWriter(Iterable inputTiles, Writea /** Reads all {@code features}, encodes them in parallel, and writes to {@code output}. */ public static void writeOutput(FeatureGroup features, WriteableTileArchive output, DiskBacked fileSize, - TileArchiveMetadata tileArchiveMetadata, PlanetilerConfig config, Stats stats) { + TileArchiveMetadata tileArchiveMetadata, Path layerStatsPath, PlanetilerConfig config, Stats stats) { var timer = stats.startStage("archive"); int readThreads = config.featureReadThreads(); @@ -122,8 +119,6 @@ public static void writeOutput(FeatureGroup features, WriteableTileArchive outpu (int) (5_000d * ProcessInfo.getMaxMemoryBytes() / 100_000_000_000d) ); - WorkerPipeline encodeBranch, writeBranch = null; - /* * To emit tiles in order, fork the input queue and send features to both the encoder and writer. The writer * waits on them to be encoded in the order they were received, and the encoder processes them in parallel. @@ -135,23 +130,36 @@ public static void writeOutput(FeatureGroup features, WriteableTileArchive outpu * So some of the restrictions could be lifted then. */ WorkQueue writerQueue = new WorkQueue<>("archive_writer_queue", queueSize, 1, stats); - encodeBranch = pipeline + WorkQueue layerStatsQueue = new WorkQueue<>("archive_layerstats_queue", queueSize, 1, stats); + WorkerPipeline encodeBranch = pipeline .fromGenerator(secondStageName, next -> { - var writerEnqueuer = writerQueue.threadLocalWriter(); - writer.readFeaturesAndBatch(batch -> { - next.accept(batch); - writerEnqueuer.accept(batch); // also send immediately to writer - }); - writerQueue.close(); + try (writerQueue; layerStatsQueue) { + var writerEnqueuer = writerQueue.threadLocalWriter(); + var statsEnqueuer = layerStatsQueue.threadLocalWriter(); + writer.readFeaturesAndBatch(batch -> { + next.accept(batch); + writerEnqueuer.accept(batch); // also send immediately to writer + if (config.outputLayerStats()) { + statsEnqueuer.accept(batch); + } + }); + } // use only 1 thread since readFeaturesAndBatch needs to be single-threaded }, 1) .addBuffer("reader_queue", queueSize) .sinkTo("encode", processThreads, writer::tileEncoderSink); // the tile writer will wait on the result of each batch to ensure tiles are written in order - writeBranch = pipeline.readFromQueue(writerQueue) + WorkerPipeline writeBranch = pipeline.readFromQueue(writerQueue) .sinkTo("write", tileWriteThreads, writer::tileWriter); + WorkerPipeline layerStatsBranch = null; + + if (config.outputLayerStats()) { + layerStatsBranch = pipeline.readFromQueue(layerStatsQueue) + .sinkTo("stats", 1, tileStatsWriter(layerStatsPath)); + } + var loggers = ProgressLoggers.create() .addRatePercentCounter("features", features.numFeaturesWritten(), writer.featuresProcessed, true) .addFileSize(features) @@ -164,16 +172,37 @@ public static void writeOutput(FeatureGroup features, WriteableTileArchive outpu loggers.addThreadPoolStats("read", readWorker); } loggers.addPipelineStats(encodeBranch) - .addPipelineStats(writeBranch) - .newLine() + .addPipelineStats(writeBranch); + if (layerStatsBranch != null) { + loggers.addPipelineStats(layerStatsBranch); + } + loggers.newLine() .add(writer::getLastTileLogDetails); - var doneFuture = writeBranch == null ? encodeBranch.done() : joinFutures(writeBranch.done(), encodeBranch.done()); + var doneFuture = joinFutures( + writeBranch.done(), + layerStatsBranch == null ? CompletableFuture.completedFuture(null) : layerStatsBranch.done(), + encodeBranch.done()); loggers.awaitAndLog(doneFuture, config.logInterval()); writer.printTileStats(); timer.stop(); } + private static WorkerPipeline.SinkStep tileStatsWriter(Path layerStatsPath) { + return prev -> { + try (var statsWriter = TileSizeStats.newWriter(layerStatsPath)) { + statsWriter.write(TileSizeStats.headerRow()); + for (var batch : prev) { + for (var encodedTile : batch.out().get()) { + for (var line : encodedTile.layerStats()) { + statsWriter.write(line); + } + } + } + } + }; + } + private String getLastTileLogDetails() { TileCoord lastTile = lastTileWritten.get(); String blurb; @@ -184,7 +213,7 @@ private String getLastTileLogDetails() { lastTile.z(), lastTile.x(), lastTile.y(), lastTile.z(), Format.defaultInstance().percent(archive.tileOrder().progressOnLevel(lastTile, config.bounds().tileExtents())), - lastTile.getDebugUrl() + lastTile.getDebugUrl(config.debugUrlPattern()) ); } return "last tile: " + blurb; @@ -220,12 +249,6 @@ private void readFeaturesAndBatch(Consumer next) { } private void tileEncoderSink(Iterable prev) throws IOException { - tileEncoder(prev, batch -> { - // no next step - }); - } - - private void tileEncoder(Iterable prev, Consumer next) throws IOException { /* * To optimize emitting many identical consecutive tiles (like large ocean areas), memoize output to avoid * recomputing if the input hasn't changed. @@ -233,40 +256,48 @@ private void tileEncoder(Iterable prev, Consumer next) thr byte[] lastBytes = null, lastEncoded = null; Long lastTileDataHash = null; boolean lastIsFill = false; + List lastLayerStats = null; boolean skipFilled = config.skipFilledTiles(); + var tileStatsUpdater = tileStats.threadLocalUpdater(); for (TileBatch batch : prev) { - Queue result = new ArrayDeque<>(batch.size()); + List result = new ArrayList<>(batch.size()); FeatureGroup.TileFeatures last = null; - // each batch contains tile ordered by z asc, x asc, y desc + // each batch contains tile ordered by tile-order ID ascending for (int i = 0; i < batch.in.size(); i++) { FeatureGroup.TileFeatures tileFeatures = batch.in.get(i); featuresProcessed.incBy(tileFeatures.getNumFeaturesProcessed()); byte[] bytes, encoded; + List layerStats; Long tileDataHash; if (tileFeatures.hasSameContents(last)) { bytes = lastBytes; encoded = lastEncoded; tileDataHash = lastTileDataHash; + layerStats = lastLayerStats; memoizedTiles.inc(); } else { VectorTile en = tileFeatures.getVectorTileEncoder(); if (skipFilled && (lastIsFill = en.containsOnlyFills())) { encoded = null; + layerStats = null; bytes = null; } else { - encoded = en.encode(); + var proto = en.toProto(); + encoded = proto.toByteArray(); bytes = switch (config.tileCompression()) { case GZIP -> gzip(encoded); case NONE -> encoded; case UNKNWON -> throw new IllegalArgumentException("cannot compress \"UNKNOWN\""); }; + layerStats = TileSizeStats.computeTileStats(proto); if (encoded.length > config.tileWarningSizeBytes()) { LOGGER.warn("{} {}kb uncompressed", tileFeatures.tileCoord(), encoded.length / 1024); } } + lastLayerStats = layerStats; lastEncoded = encoded; lastBytes = bytes; last = tileFeatures; @@ -277,25 +308,30 @@ private void tileEncoder(Iterable prev, Consumer next) thr } lastTileDataHash = tileDataHash; } - if (skipFilled && lastIsFill) { - continue; + if ((!skipFilled || !lastIsFill) && bytes != null) { + tileStatsUpdater.recordTile(tileFeatures.tileCoord(), bytes.length, layerStats); + List layerStatsRows = config.outputLayerStats() ? + TileSizeStats.formatOutputRows(tileFeatures.tileCoord(), bytes.length, layerStats) : + List.of(); + result.add( + new TileEncodingResult( + tileFeatures.tileCoord(), + bytes, + encoded.length, + tileDataHash == null ? OptionalLong.empty() : OptionalLong.of(tileDataHash), + layerStatsRows + ) + ); } - int zoom = tileFeatures.tileCoord().z(); - int encodedLength = encoded == null ? 0 : encoded.length; - totalTileSizesByZoom[zoom].incBy(encodedLength); - maxTileSizesByZoom[zoom].accumulate(encodedLength); - result.add( - new TileEncodingResult(tileFeatures.tileCoord(), bytes, - tileDataHash == null ? OptionalLong.empty() : OptionalLong.of(tileDataHash)) - ); } // hand result off to writer batch.out.complete(result); - next.accept(batch); } } private void tileWriter(Iterable tileBatches) throws ExecutionException, InterruptedException { + var f = NumberFormat.getNumberInstance(Locale.getDefault()); + f.setMaximumFractionDigits(5); archive.initialize(tileArchiveMetadata); var order = archive.tileOrder(); @@ -305,9 +341,7 @@ private void tileWriter(Iterable tileBatches) throws ExecutionExcepti int currentZ = Integer.MIN_VALUE; try (var tileWriter = archive.newTileWriter()) { for (TileBatch batch : tileBatches) { - Queue encodedTiles = batch.out.get(); - TileEncodingResult encodedTile; - while ((encodedTile = encodedTiles.poll()) != null) { + for (var encodedTile : batch.out.get()) { TileCoord tileCoord = encodedTile.coord(); assert lastTile == null || order.encode(tileCoord) > order.encode(lastTile) : "Tiles out of order %s before %s" @@ -325,7 +359,7 @@ private void tileWriter(Iterable tileBatches) throws ExecutionExcepti } tileWriter.write(encodedTile); - stats.wroteTile(z, encodedTile.tileData() == null ? 0 : encodedTile.tileData().length); + stats.wroteTile(z, encodedTile.tileData().length); tilesByZoom[z].inc(); } lastTileWritten.set(lastTile); @@ -337,35 +371,14 @@ private void tileWriter(Iterable tileBatches) throws ExecutionExcepti LOGGER.info("Finished z{} in {}", currentZ, time.stop()); } - archive.finish(tileArchiveMetadata); } + @SuppressWarnings("java:S2629") private void printTileStats() { - if (LOGGER.isDebugEnabled()) { - Format format = Format.defaultInstance(); - LOGGER.debug("Tile stats:"); - long sumSize = 0; - long sumCount = 0; - long maxMax = 0; - for (int z = config.minzoom(); z <= config.maxzoom(); z++) { - long totalCount = tilesByZoom[z].get(); - long totalSize = totalTileSizesByZoom[z].get(); - sumSize += totalSize; - sumCount += totalCount; - long maxSize = maxTileSizesByZoom[z].get(); - maxMax = Math.max(maxMax, maxSize); - LOGGER.debug("z{} avg:{} max:{}", - z, - format.storage(totalCount == 0 ? 0 : (totalSize / totalCount), false), - format.storage(maxSize, false)); - } - LOGGER.debug("all avg:{} max:{}", - format.storage(sumCount == 0 ? 0 : (sumSize / sumCount), false), - format.storage(maxMax, false)); - LOGGER.debug(" # features: {}", format.integer(featuresProcessed.get())); - LOGGER.debug(" # tiles: {}", format.integer(this.tilesEmitted())); - } + Format format = Format.defaultInstance(); + tileStats.printStats(config.debugUrlPattern()); + LOGGER.debug(" # features: {}", format.integer(featuresProcessed.get())); } private long tilesEmitted() { @@ -393,7 +406,7 @@ public static long generateContentHash(byte[] bytes) { */ private record TileBatch( List in, - CompletableFuture> out + CompletableFuture> out ) { TileBatch() { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileEncodingResult.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileEncodingResult.java index 5df7c0a4c4..8716c214fe 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileEncodingResult.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileEncodingResult.java @@ -2,15 +2,26 @@ import com.onthegomap.planetiler.geo.TileCoord; import java.util.Arrays; +import java.util.List; import java.util.Objects; import java.util.OptionalLong; +import javax.annotation.Nonnull; public record TileEncodingResult( TileCoord coord, - byte[] tileData, - /** will always be empty in non-compact mode and might also be empty in compact mode */ - OptionalLong tileDataHash + @Nonnull byte[] tileData, + int rawTileSize, + /* will always be empty in non-compact mode and might also be empty in compact mode */ + OptionalLong tileDataHash, + List layerStats ) { + public TileEncodingResult( + TileCoord coord, + byte[] tileData, + OptionalLong tileDataHash + ) { + this(coord, tileData, tileData.length, tileDataHash, List.of()); + } @Override public int hashCode() { @@ -23,15 +34,10 @@ public int hashCode() { @Override public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (!(obj instanceof TileEncodingResult)) { - return false; - } - TileEncodingResult other = (TileEncodingResult) obj; - return Objects.equals(coord, other.coord) && Arrays.equals(tileData, other.tileData) && - Objects.equals(tileDataHash, other.tileDataHash); + return this == obj || (obj instanceof TileEncodingResult other && + Objects.equals(coord, other.coord) && + Arrays.equals(tileData, other.tileData) && + Objects.equals(tileDataHash, other.tileDataHash)); } @Override @@ -39,5 +45,4 @@ public String toString() { return "TileEncodingResult [coord=" + coord + ", tileData=" + Arrays.toString(tileData) + ", tileDataHash=" + tileDataHash + "]"; } - } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ExternalMergeSort.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ExternalMergeSort.java index 05ba1db7df..c63269e723 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ExternalMergeSort.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ExternalMergeSort.java @@ -10,6 +10,7 @@ import com.onthegomap.planetiler.util.BinPack; import com.onthegomap.planetiler.util.ByteBufferUtil; import com.onthegomap.planetiler.util.CloseableConsumer; +import com.onthegomap.planetiler.util.FastGzipOutputStream; import com.onthegomap.planetiler.util.FileUtils; import com.onthegomap.planetiler.worker.WorkerPipeline; import java.io.BufferedInputStream; @@ -41,9 +42,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; -import java.util.zip.Deflater; import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; import javax.annotation.concurrent.NotThreadSafe; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -282,15 +281,6 @@ private interface Reader extends Closeable, Iterator { void close(); } - /** Compresses bytes with minimal impact on write performance. Equivalent to {@code gzip -1} */ - private static class FastGzipOutputStream extends GZIPOutputStream { - - public FastGzipOutputStream(OutputStream out) throws IOException { - super(out); - def.setLevel(Deflater.BEST_SPEED); - } - } - /** Read all features from a chunk file using a {@link BufferedInputStream}. */ private static class ReaderBuffered extends BaseReader { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java index 6e415f4e0a..a4601f7daf 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java @@ -13,7 +13,7 @@ import com.onthegomap.planetiler.util.CloseableConsumer; import com.onthegomap.planetiler.util.CommonStringEncoder; import com.onthegomap.planetiler.util.DiskBacked; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import com.onthegomap.planetiler.worker.Worker; import java.io.Closeable; import java.io.IOException; @@ -59,7 +59,7 @@ public final class FeatureGroup implements Iterable, private final CommonStringEncoder.AsByte commonLayerStrings = new CommonStringEncoder.AsByte(); private final CommonStringEncoder commonValueStrings = new CommonStringEncoder(100_000); private final Stats stats; - private final LayerStats layerStats = new LayerStats(); + private final LayerAttrStats layerStats = new LayerAttrStats(); private volatile boolean prepared = false; private final TileOrder tileOrder; @@ -156,7 +156,7 @@ static byte encodeGeomTypeAndScale(VectorTile.VectorGeometry geometry) { * Returns statistics about each layer written through {@link #newRenderedFeatureEncoder()} including min/max zoom, * features on elements in that layer, and their types. */ - public LayerStats layerStats() { + public LayerAttrStats layerStats() { return layerStats; } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java index ea2ba38504..13ed394b67 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java @@ -53,7 +53,11 @@ public record PlanetilerConfig( int tileWarningSizeBytes, Boolean color, boolean keepUnzippedSources, - TileCompression tileCompression + TileCompression tileCompression, + boolean outputLayerStats, + String debugUrlPattern, + Path tmpDir, + Path tileWeights ) { public static final int MIN_MINZOOM = 0; @@ -116,6 +120,8 @@ public static PlanetilerConfig from(Arguments arguments) { int renderMaxzoom = arguments.getInteger("render_maxzoom", "maximum rendering zoom level up to " + MAX_MAXZOOM, Math.max(maxzoom, DEFAULT_MAXZOOM)); + Path tmpDir = arguments.file("tmpdir", "temp directory", Path.of("data", "tmp")); + return new PlanetilerConfig( arguments, bounds, @@ -190,7 +196,13 @@ public static PlanetilerConfig from(Arguments arguments) { .fromId(arguments.getString("tile_compression", "the tile compression, one of " + TileCompression.availableValues().stream().map(TileCompression::id).toList(), - "gzip")) + "gzip")), + arguments.getBoolean("output_layerstats", "output a tsv.gz file for each tile/layer size", false), + arguments.getString("debug_url", "debug url to use for displaying tiles with {z} {lat} {lon} placeholders", + "https://onthegomap.github.io/planetiler-demo/#{z}/{lat}/{lon}"), + tmpDir, + arguments.file("tile_weights", "tsv.gz file with columns z,x,y,loads to generate weighted average tile size stat", + tmpDir.resolveSibling("tile_weights.tsv.gz")) ); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/TileCoord.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/TileCoord.java index 902128805c..6d6370a7f9 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/TileCoord.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/TileCoord.java @@ -2,8 +2,10 @@ import static com.onthegomap.planetiler.config.PlanetilerConfig.MAX_MAXZOOM; -import com.onthegomap.planetiler.util.Format; import com.onthegomap.planetiler.util.Hilbert; +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.util.Locale; import javax.annotation.concurrent.Immutable; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.CoordinateXY; @@ -126,19 +128,25 @@ public int compareTo(TileCoord o) { } /** Returns the latitude/longitude of the northwest corner of this tile. */ - public Coordinate getLatLon() { + public Envelope getEnvelope() { double worldWidthAtZoom = Math.pow(2, z); - return new CoordinateXY( + return new Envelope( GeoUtils.getWorldLon(x / worldWidthAtZoom), + GeoUtils.getWorldLon((x + 1) / worldWidthAtZoom), + GeoUtils.getWorldLat((y + 1) / worldWidthAtZoom), GeoUtils.getWorldLat(y / worldWidthAtZoom) ); } /** Returns a URL that displays the openstreetmap data for this tile. */ - public String getDebugUrl() { - Coordinate coord = getLatLon(); - return Format.osmDebugUrl(z, coord); + public String getDebugUrl(String pattern) { + Coordinate center = getEnvelope().centre(); + DecimalFormat format = new DecimalFormat("0.#####", DecimalFormatSymbols.getInstance(Locale.US)); + return pattern + .replaceAll("\\{(lat|latitude)}", format.format(center.y)) + .replaceAll("\\{(lon|longitude)}", format.format(center.x)) + .replaceAll("\\{(z|zoom)}", z + ".5"); } /** Returns the pixel coordinate on this tile of a given latitude/longitude (assuming 256x256 px tiles). */ diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java index 601692782c..aeb3393e41 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import com.onthegomap.planetiler.archive.ReadableTileArchive; +import com.onthegomap.planetiler.archive.Tile; import com.onthegomap.planetiler.archive.TileArchiveMetadata; import com.onthegomap.planetiler.archive.TileCompression; import com.onthegomap.planetiler.archive.TileEncodingResult; @@ -19,7 +20,7 @@ import com.onthegomap.planetiler.reader.FileFormatException; import com.onthegomap.planetiler.util.CloseableIterator; import com.onthegomap.planetiler.util.Format; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import com.onthegomap.planetiler.util.Parse; import java.io.IOException; import java.nio.file.Path; @@ -201,6 +202,13 @@ private static Connection newConnection(String url, SQLiteConfig defaults, Argum } } + private static TileCoord getResultCoord(ResultSet rs) throws SQLException { + int z = rs.getInt(TILES_COL_Z); + int rawy = rs.getInt(TILES_COL_Y); + int x = rs.getInt(TILES_COL_X); + return TileCoord.ofXYZ(x, (1 << z) - 1 - rawy, z); + } + @Override public boolean deduplicates() { return compactDb; @@ -423,7 +431,22 @@ public byte[] getTile(int x, int y, int z) { @Override public CloseableIterator getAllTileCoords() { - return new TileCoordIterator(); + return new QueryIterator<>( + statement -> statement.executeQuery( + "select %s, %s, %s from %s".formatted(TILES_COL_Z, TILES_COL_X, TILES_COL_Y, TILES_TABLE) + ), + Mbtiles::getResultCoord + ); + } + + @Override + public CloseableIterator getAllTiles() { + return new QueryIterator<>( + statement -> statement.executeQuery( + "select %s, %s, %s, %s from %s".formatted(TILES_COL_Z, TILES_COL_X, TILES_COL_Y, TILES_COL_DATA, TILES_TABLE) + ), + rs -> new Tile(getResultCoord(rs), rs.getBytes(TILES_COL_DATA)) + ); } public Connection connection() { @@ -438,6 +461,11 @@ public boolean compactDb() { return compactDb; } + @FunctionalInterface + private interface SqlFunction { + O apply(I t) throws SQLException; + } + /** * Data contained in the {@code json} row of the metadata table * @@ -446,10 +474,10 @@ public boolean compactDb() { */ // TODO add tilestats public record MetadataJson( - @JsonProperty("vector_layers") List vectorLayers + @JsonProperty("vector_layers") List vectorLayers ) { - public MetadataJson(LayerStats.VectorLayer... layers) { + public MetadataJson(LayerAttrStats.VectorLayer... layers) { this(List.of(layers)); } @@ -470,7 +498,12 @@ public String toJson() { } } - /** Contents of a row of the tiles table, or in case of compact mode in the tiles view. */ + /** + * Contents of a row of the tiles table, or in case of compact mode in the tiles view. + * + * @deprecated Use {@link Tile} instead + */ + @Deprecated(forRemoval = true) public record TileEntry(TileCoord tile, byte[] bytes) implements Comparable { @Override @@ -543,19 +576,21 @@ public boolean equals(Object obj) { } } - /** Iterates through tile coordinates one at a time without materializing the entire list in memory. */ - private class TileCoordIterator implements CloseableIterator { - + /** Iterates through the results of a query one at a time without materializing the entire list in memory. */ + private class QueryIterator implements CloseableIterator { private final Statement statement; private final ResultSet rs; + private final SqlFunction rowMapper; private boolean hasNext = false; - private TileCoordIterator() { + private QueryIterator( + SqlFunction query, + SqlFunction rowMapper + ) { + this.rowMapper = rowMapper; try { this.statement = connection.createStatement(); - this.rs = statement.executeQuery( - "select %s, %s, %s, %s from %s".formatted(TILES_COL_Z, TILES_COL_X, TILES_COL_Y, TILES_COL_DATA, TILES_TABLE) - ); + this.rs = query.apply(statement); hasNext = rs.next(); } catch (SQLException e) { throw new FileFormatException("Could not read tile coordinates from mbtiles file", e); @@ -581,15 +616,12 @@ public boolean hasNext() { } @Override - public TileCoord next() { + public T next() { if (!hasNext()) { throw new NoSuchElementException(); } try { - int z = rs.getInt(TILES_COL_Z); - int rawy = rs.getInt(TILES_COL_Y); - int x = rs.getInt(TILES_COL_X); - var result = TileCoord.ofXYZ(x, (1 << z) - 1 - rawy, z); + T result = rowMapper.apply(rs); hasNext = rs.next(); if (!hasNext) { close(); @@ -688,7 +720,7 @@ public void close() { } - private class BatchedTileTableWriter extends BatchedTableWriterBase { + private class BatchedTileTableWriter extends BatchedTableWriterBase { private static final List COLUMNS = List.of(TILES_COL_Z, TILES_COL_X, TILES_COL_Y, TILES_COL_DATA); @@ -697,10 +729,10 @@ private class BatchedTileTableWriter extends BatchedTableWriterBase { } @Override - protected int setParamsInStatementForItem(int positionOffset, PreparedStatement statement, TileEntry tile) + protected int setParamsInStatementForItem(int positionOffset, PreparedStatement statement, Tile tile) throws SQLException { - TileCoord coord = tile.tile(); + TileCoord coord = tile.coord(); int x = coord.x(); int y = coord.y(); int z = coord.z(); @@ -765,7 +797,7 @@ private class BatchedNonCompactTileWriter implements TileWriter { @Override public void write(TileEncodingResult encodingResult) { - tableWriter.write(new TileEntry(encodingResult.coord(), encodingResult.tileData())); + tableWriter.write(new Tile(encodingResult.coord(), encodingResult.tileData())); } @Override diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/Pmtiles.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/Pmtiles.java index 8908b26649..c175d9747d 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/Pmtiles.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/Pmtiles.java @@ -11,7 +11,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import com.onthegomap.planetiler.reader.FileFormatException; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import com.onthegomap.planetiler.util.VarInt; import java.io.IOException; import java.nio.BufferUnderflowException; @@ -362,12 +362,12 @@ public static List directoryFromBytes(byte[] bytes) { * stores name, attribution, created_at, planetiler build SHA, vector_layers, etc. */ public record JsonMetadata( - @JsonProperty("vector_layers") List vectorLayers, + @JsonProperty("vector_layers") List vectorLayers, @JsonAnyGetter Map otherMetadata ) { @JsonCreator - public JsonMetadata(@JsonProperty("vector_layers") List vectorLayers) { + public JsonMetadata(@JsonProperty("vector_layers") List vectorLayers) { this(vectorLayers, new HashMap<>()); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/ReadablePmtiles.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/ReadablePmtiles.java index 51dc4d5a3a..b2a625ed32 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/ReadablePmtiles.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/ReadablePmtiles.java @@ -1,6 +1,7 @@ package com.onthegomap.planetiler.pmtiles; import com.onthegomap.planetiler.archive.ReadableTileArchive; +import com.onthegomap.planetiler.archive.Tile; import com.onthegomap.planetiler.archive.TileArchiveMetadata; import com.onthegomap.planetiler.archive.TileCompression; import com.onthegomap.planetiler.geo.TileCoord; @@ -13,7 +14,6 @@ import java.nio.channels.SeekableByteChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.stream.IntStream; @@ -152,61 +152,52 @@ public TileArchiveMetadata metadata() { } } - private static class TileCoordIterator implements CloseableIterator { - private final Stream stream; - private final Iterator iterator; - - public TileCoordIterator(Stream stream) { - this.stream = stream; - this.iterator = stream.iterator(); - } - - @Override - public void close() { - stream.close(); - } - - @Override - public boolean hasNext() { - return this.iterator.hasNext(); - } - - @Override - public TileCoord next() { - return this.iterator.next(); - } - } - - private List readDir(long offset, int length) throws IOException { - var buf = getBytes(offset, length); - if (header.internalCompression() == Pmtiles.Compression.GZIP) { - buf = Gzip.gunzip(buf); + private List readDir(long offset, int length) { + try { + var buf = getBytes(offset, length); + if (header.internalCompression() == Pmtiles.Compression.GZIP) { + buf = Gzip.gunzip(buf); + } + return Pmtiles.directoryFromBytes(buf); + } catch (IOException e) { + throw new UncheckedIOException(e); } - return Pmtiles.directoryFromBytes(buf); } // Warning: this will only work on z15 or less pmtiles which planetiler creates private Stream getTileCoords(List dir) { - return dir.stream().flatMap(entry -> { + return dir.stream().flatMap(entry -> entry.runLength() == 0 ? + getTileCoords(readDir(header.leafDirectoriesOffset() + entry.offset(), entry.length())) : IntStream + .range((int) entry.tileId(), (int) entry.tileId() + entry.runLength()).mapToObj(TileCoord::hilbertDecode)); + } + + private Stream getTiles(List dir) { + return dir.stream().mapMulti((entry, next) -> { try { - return entry.runLength() == 0 ? - getTileCoords(readDir(header.leafDirectoriesOffset() + entry.offset(), entry.length())) : IntStream - .range((int) entry.tileId(), (int) entry.tileId() + entry.runLength()).mapToObj(TileCoord::hilbertDecode); + if (entry.runLength == 0) { + getTiles(readDir(header.leafDirectoriesOffset() + entry.offset(), entry.length())).forEach(next); + } else { + var data = getBytes(header.tileDataOffset() + entry.offset(), entry.length()); + for (int i = 0; i < entry.runLength(); i++) { + next.accept(new Tile(TileCoord.hilbertDecode((int) (entry.tileId() + i)), data)); + } + } } catch (IOException e) { - throw new IllegalStateException(e); + throw new IllegalStateException("Failed to iterate through pmtiles archive ", e); } }); } @Override public CloseableIterator getAllTileCoords() { - List rootDir; - try { - rootDir = readDir(header.rootDirOffset(), (int) header.rootDirLength()); - return new TileCoordIterator(getTileCoords(rootDir)); - } catch (IOException e) { - throw new IllegalStateException(e); - } + List rootDir = readDir(header.rootDirOffset(), (int) header.rootDirLength()); + return CloseableIterator.of(getTileCoords(rootDir)); + } + + @Override + public CloseableIterator getAllTiles() { + List rootDir = readDir(header.rootDirOffset(), (int) header.rootDirLength()); + return CloseableIterator.of(getTiles(rootDir)); } @Override diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchive.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchive.java index 9ba98eb573..40f389ce1e 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchive.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchive.java @@ -16,7 +16,7 @@ import com.onthegomap.planetiler.archive.TileArchiveMetadata; import com.onthegomap.planetiler.archive.TileEncodingResult; import com.onthegomap.planetiler.geo.TileCoord; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -218,7 +218,7 @@ private interface TileArchiveMetadataMixin { CoordinateXY center(); @JsonIgnore(false) - List vectorLayers(); + List vectorLayers(); } @JsonIncludeProperties({"minX", "maxX", "minY", "maxY"}) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchive.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchive.java index 5688124720..ebe9f96528 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchive.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchive.java @@ -5,7 +5,7 @@ import com.onthegomap.planetiler.archive.TileEncodingResult; import com.onthegomap.planetiler.geo.TileCoord; import com.onthegomap.planetiler.proto.StreamArchiveProto; -import com.onthegomap.planetiler.util.LayerStats.VectorLayer; +import com.onthegomap.planetiler.util.LayerAttrStats.VectorLayer; import java.io.IOException; import java.io.OutputStream; import java.io.UncheckedIOException; diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CloseableIterator.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CloseableIterator.java index 151ca9c14a..4efe6257de 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CloseableIterator.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CloseableIterator.java @@ -3,15 +3,57 @@ import java.io.Closeable; import java.util.Iterator; import java.util.Spliterators; +import java.util.function.Function; import java.util.stream.Stream; import java.util.stream.StreamSupport; public interface CloseableIterator extends Closeable, Iterator { + static CloseableIterator of(Stream stream) { + return new CloseableIterator<>() { + private final Iterator iter = stream.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public T next() { + return iter.next(); + } + + @Override + public void close() { + stream.close(); + } + }; + } + @Override void close(); default Stream stream() { return StreamSupport.stream(Spliterators.spliteratorUnknownSize(this, 0), false).onClose(this::close); } + + default CloseableIterator map(Function mapper) { + var parent = this; + return new CloseableIterator<>() { + @Override + public void close() { + parent.close(); + } + + @Override + public boolean hasNext() { + return parent.hasNext(); + } + + @Override + public O next() { + return mapper.apply(parent.next()); + } + }; + } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java index 47b289ad43..fa9c77781a 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java @@ -12,7 +12,6 @@ import java.io.InputStream; import java.io.UncheckedIOException; import java.net.URI; -import java.net.URL; import java.net.URLConnection; import java.net.http.HttpClient; import java.net.http.HttpHeaders; @@ -99,7 +98,7 @@ public static Downloader create(PlanetilerConfig config, Stats stats) { } private static URLConnection getUrlConnection(String urlString, PlanetilerConfig config) throws IOException { - var url = new URL(urlString); + var url = URI.create(urlString).toURL(); var connection = url.openConnection(); connection.setConnectTimeout((int) config.httpTimeout().toMillis()); connection.setReadTimeout((int) config.httpTimeout().toMillis()); diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FastGzipOutputStream.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FastGzipOutputStream.java new file mode 100644 index 0000000000..4d2136fe27 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FastGzipOutputStream.java @@ -0,0 +1,18 @@ +package com.onthegomap.planetiler.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.Deflater; +import java.util.zip.GZIPOutputStream; + +/** + * A version of {@link GZIPOutputStream} that uses {@link Deflater#BEST_SPEED} (level 1) instead of + * {@link Deflater#DEFAULT_COMPRESSION} (-1). + */ +public class FastGzipOutputStream extends GZIPOutputStream { + + public FastGzipOutputStream(OutputStream out) throws IOException { + super(out); + def.setLevel(Deflater.BEST_SPEED); + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LayerStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LayerAttrStats.java similarity index 98% rename from planetiler-core/src/main/java/com/onthegomap/planetiler/util/LayerStats.java rename to planetiler-core/src/main/java/com/onthegomap/planetiler/util/LayerAttrStats.java index 4d3ec988d7..9685f0fe35 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LayerStats.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LayerAttrStats.java @@ -27,7 +27,7 @@ * @see MBtiles spec */ @ThreadSafe -public class LayerStats implements Consumer { +public class LayerAttrStats implements Consumer { /* * This utility is called for billions of features by multiple threads when processing the planet which can make * access to shared data structures a bottleneck. So give each thread an individual ThreadLocalLayerStatsHandler to diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java new file mode 100644 index 0000000000..2b07a5bd07 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java @@ -0,0 +1,280 @@ +package com.onthegomap.planetiler.util; + +import static com.onthegomap.planetiler.worker.Worker.joinFutures; + +import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import com.fasterxml.jackson.databind.annotation.JsonNaming; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import com.onthegomap.planetiler.archive.Tile; +import com.onthegomap.planetiler.archive.TileArchiveConfig; +import com.onthegomap.planetiler.archive.TileArchives; +import com.onthegomap.planetiler.config.Arguments; +import com.onthegomap.planetiler.config.PlanetilerConfig; +import com.onthegomap.planetiler.geo.TileCoord; +import com.onthegomap.planetiler.stats.ProgressLoggers; +import com.onthegomap.planetiler.stats.Stats; +import com.onthegomap.planetiler.worker.WorkQueue; +import com.onthegomap.planetiler.worker.WorkerPipeline; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicLong; +import vector_tile.VectorTileProto; + +/** + * Utilities for extracting tile and layer size summaries from encoded vector tiles. + *

+ * {@link #computeTileStats(VectorTileProto.Tile)} extracts statistics about each layer in a tile and + * {@link #formatOutputRows(TileCoord, int, List)} formats them as row of a TSV file to write. + *

+ * To generate a tsv.gz file with stats for each tile, you can add {@code --output-layerstats} option when generating an + * archive, or run the following an existing archive: + * + *

+ * {@code
+ * java -jar planetiler.jar stats --input= --output=layerstats.tsv.gz
+ * }
+ * 
+ */ +public class TileSizeStats { + + private static final int BATCH_SIZE = 1_000; + private static final CsvMapper MAPPER = new CsvMapper(); + private static final CsvSchema SCHEMA = MAPPER + .schemaFor(OutputRow.class) + .withoutHeader() + .withColumnSeparator('\t') + .withLineSeparator("\n"); + private static final ObjectWriter WRITER = MAPPER.writer(SCHEMA); + + /** Returns the default path that a layerstats file should go relative to an existing archive. */ + public static Path getDefaultLayerstatsPath(Path archive) { + return archive.resolveSibling(archive.getFileName() + ".layerstats.tsv.gz"); + } + + public static void main(String... args) { + var arguments = Arguments.fromArgsOrConfigFile(args); + var config = PlanetilerConfig.from(arguments); + var stats = Stats.inMemory(); + var download = arguments.getBoolean("download_osm_tile_weights", "download OSM tile weights file", true); + if (download && !Files.exists(config.tileWeights())) { + TopOsmTiles.downloadPrecomputed(config, stats); + } + var tileStats = new TilesetSummaryStatistics(TileWeights.readFromFile(config.tileWeights())); + var inputString = arguments.getString("input", "input file"); + var input = TileArchiveConfig.from(inputString); + var localPath = input.getLocalPath(); + var output = localPath == null ? + arguments.file("output", "output file") : + arguments.file("output", "output file", getDefaultLayerstatsPath(localPath)); + var counter = new AtomicLong(0); + var timer = stats.startStage("tilestats"); + record Batch(List tiles, CompletableFuture> stats) {} + WorkQueue writerQueue = new WorkQueue<>("tilestats_write_queue", 1_000, 1, stats); + var pipeline = WorkerPipeline.start("tilestats", stats); + var readBranch = pipeline + .fromGenerator("enumerate", next -> { + try ( + var reader = TileArchives.newReader(input, config); + var tiles = reader.getAllTiles(); + writerQueue + ) { + var writer = writerQueue.threadLocalWriter(); + List batch = new ArrayList<>(BATCH_SIZE); + while (tiles.hasNext()) { + var tile = tiles.next(); + if (batch.size() >= BATCH_SIZE) { + var result = new Batch(batch, new CompletableFuture<>()); + writer.accept(result); + next.accept(result); + batch = new ArrayList<>(BATCH_SIZE); + } + batch.add(tile); + counter.incrementAndGet(); + } + if (!batch.isEmpty()) { + var result = new Batch(batch, new CompletableFuture<>()); + writer.accept(result); + next.accept(result); + } + } + }) + .addBuffer("coords", 1_000) + .sinkTo("process", config.featureProcessThreads(), prev -> { + byte[] zipped = null; + byte[] unzipped; + VectorTileProto.Tile decoded; + List layerStats = null; + + var updater = tileStats.threadLocalUpdater(); + for (var batch : prev) { + List lines = new ArrayList<>(batch.tiles.size()); + for (var tile : batch.tiles) { + if (!Arrays.equals(zipped, tile.bytes())) { + zipped = tile.bytes(); + unzipped = Gzip.gunzip(tile.bytes()); + decoded = VectorTileProto.Tile.parseFrom(unzipped); + layerStats = computeTileStats(decoded); + } + updater.recordTile(tile.coord(), zipped.length, layerStats); + lines.addAll(TileSizeStats.formatOutputRows(tile.coord(), zipped.length, layerStats)); + } + batch.stats.complete(lines); + } + }); + + var writeBranch = pipeline.readFromQueue(writerQueue) + .sinkTo("write", 1, prev -> { + try (var writer = newWriter(output)) { + writer.write(headerRow()); + for (var batch : prev) { + for (var line : batch.stats.get()) { + writer.write(line); + } + } + } + }); + ProgressLoggers loggers = ProgressLoggers.create() + .addRateCounter("tiles", counter) + .newLine() + .addPipelineStats(readBranch) + .addPipelineStats(writeBranch) + .newLine() + .addProcessStats(); + loggers.awaitAndLog(joinFutures(readBranch.done(), writeBranch.done()), config.logInterval()); + + timer.stop(); + tileStats.printStats(config.debugUrlPattern()); + stats.printSummary(); + } + + /** Returns the TSV rows to output for all the layers in a tile. */ + public static List formatOutputRows(TileCoord tileCoord, int archivedBytes, List layerStats) + throws IOException { + int hilbert = tileCoord.hilbertEncoded(); + List result = new ArrayList<>(layerStats.size()); + for (var layer : layerStats) { + result.add(lineToString(new OutputRow( + tileCoord.z(), + tileCoord.x(), + tileCoord.y(), + hilbert, + archivedBytes, + layer.layer, + layer.layerBytes, + layer.layerFeatures, + layer.layerAttrBytes, + layer.layerAttrKeys, + layer.layerAttrValues + ))); + } + return result; + } + + /** + * Opens a new gzip (level 1/fast) writer to {@code path}, creating a new one or replacing an existing file at that + * path. + */ + public static Writer newWriter(Path path) throws IOException { + return new OutputStreamWriter( + new FastGzipOutputStream(new BufferedOutputStream(Files.newOutputStream(path, + StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE)))); + } + + /** Returns {@code output} encoded as a TSV row string. */ + public static String lineToString(OutputRow output) throws IOException { + return WRITER.writeValueAsString(output); + } + + /** Returns the header row for the output TSV file. */ + public static String headerRow() { + return String.join( + String.valueOf(SCHEMA.getColumnSeparator()), + SCHEMA.getColumnNames() + ) + new String(SCHEMA.getLineSeparator()); + } + + /** Returns the size and statistics for each layer in {@code proto}. */ + public static List computeTileStats(VectorTileProto.Tile proto) { + if (proto == null) { + return List.of(); + } + List result = new ArrayList<>(proto.getLayersCount()); + for (var layer : proto.getLayersList()) { + int attrSize = 0; + for (var key : layer.getKeysList().asByteStringList()) { + attrSize += key.size(); + } + for (var value : layer.getValuesList()) { + attrSize += value.getSerializedSize(); + } + result.add(new LayerStats( + layer.getName(), + layer.getSerializedSize(), + layer.getFeaturesCount(), + attrSize, + layer.getKeysCount(), + layer.getValuesCount() + )); + } + result.sort(Comparator.naturalOrder()); + return result; + } + + /** Model for the data contained in each row in the TSV. */ + @JsonPropertyOrder({ + "z", + "x", + "y", + "hilbert", + "archived_tile_bytes", + "layer", + "layer_bytes", + "layer_features", + "layer_attr_bytes", + "layer_attr_keys", + "layer_attr_values" + }) + @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) + public record OutputRow( + int z, + int x, + int y, + int hilbert, + int archivedTileBytes, + String layer, + int layerBytes, + int layerFeatures, + int layerAttrBytes, + int layerAttrKeys, + int layerAttrValues + ) {} + + /** Stats extracted from a layer in a vector tile. */ + public record LayerStats( + String layer, + int layerBytes, + int layerFeatures, + int layerAttrBytes, + int layerAttrKeys, + int layerAttrValues + ) implements Comparable { + + @Override + public int compareTo(LayerStats o) { + return layer.compareTo(o.layer); + } + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileWeights.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileWeights.java new file mode 100644 index 0000000000..e6f0c1ab3f --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileWeights.java @@ -0,0 +1,136 @@ +package com.onthegomap.planetiler.util; + +import static java.nio.file.StandardOpenOption.CREATE; +import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING; +import static java.nio.file.StandardOpenOption.WRITE; + +import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.RuntimeJsonMappingException; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import com.onthegomap.planetiler.config.PlanetilerConfig; +import com.onthegomap.planetiler.geo.TileCoord; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Holds tile weights to compute weighted average tile sizes. + *

+ * {@link TopOsmTiles} can be used to get tile weights from 90 days of openstreetmap.org tile traffic. + */ +public class TileWeights { + private static final Logger LOGGER = LoggerFactory.getLogger(TileWeights.class); + private static final CsvMapper MAPPER = new CsvMapper(); + private static final CsvSchema SCHEMA = MAPPER + .schemaFor(Row.class) + .withHeader() + .withColumnSeparator('\t') + .withLineSeparator("\n"); + private static final ObjectWriter WRITER = MAPPER.writer(SCHEMA); + private static final ObjectReader READER = MAPPER.readerFor(Row.class).with(SCHEMA); + private final Map byZoom = new HashMap<>(); + private final Map weights = new HashMap<>(); + + public long getWeight(TileCoord coord) { + return weights.getOrDefault(coord, 0L); + } + + /** Returns the sum of all tile weights at a specific zoom */ + public long getZoomWeight(int zoom) { + return byZoom.getOrDefault(zoom, 0L); + } + + /** Adds {@code weight} to the current weight for {@code coord} and returns this modified instance. */ + public TileWeights put(TileCoord coord, long weight) { + weights.merge(coord, weight, Long::sum); + byZoom.merge(coord.z(), weight, Long::sum); + return this; + } + + /** + * Write tile weights to a gzipped TSV file with {@code z, x, y, loads} columns. + */ + public void writeToFile(Path path) throws IOException { + try ( + var output = new GZIPOutputStream( + new BufferedOutputStream(Files.newOutputStream(path, CREATE, TRUNCATE_EXISTING, WRITE))); + var writer = WRITER.writeValues(output) + ) { + var sorted = weights.entrySet().stream() + .sorted(Comparator.comparingInt(e -> e.getKey().encoded())) + .iterator(); + while (sorted.hasNext()) { + var entry = sorted.next(); + TileCoord coord = entry.getKey(); + writer.write(new Row(coord.z(), coord.x(), coord.y(), entry.getValue())); + } + } + } + + /** + * Load tile weights from a gzipped TSV file with {@code z, x, y, loads} columns. + *

+ * Duplicate entries will be added together. + */ + public static TileWeights readFromFile(Path path) { + TileWeights result = new TileWeights(); + try ( + var input = new GZIPInputStream(new BufferedInputStream(Files.newInputStream(path))); + var reader = READER.readValues(input) + ) { + while (reader.hasNext()) { + var row = reader.next(); + if (row.z >= PlanetilerConfig.MIN_MINZOOM && row.z <= PlanetilerConfig.MAX_MAXZOOM) { + int x = row.x % (1 << row.z); + int y = row.y % (1 << row.z); + result.put(TileCoord.ofXYZ(x, y, row.z()), row.loads()); + } + } + } catch (IOException | RuntimeJsonMappingException e) { + LOGGER.warn("Unable to load tile weights from {}, will fall back to unweighted average: {}", path, e); + return new TileWeights(); + } + return result; + } + + public boolean isEmpty() { + return byZoom.values().stream().noneMatch(e -> e > 0); + } + + @JsonPropertyOrder({"z", "x", "y", "loads"}) + private record Row(int z, int x, int y, long loads) {} + + @Override + public String toString() { + return "TileWeights{\n" + + weights.entrySet().stream() + .map(result -> result.getKey() + ": " + result.getValue()) + .collect(Collectors.joining("\n")) + .indent(2) + + '}'; + } + + @Override + public boolean equals(Object o) { + return o == this || (o instanceof TileWeights other && Objects.equals(other.weights, weights)); + } + + @Override + public int hashCode() { + return weights.hashCode(); + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java new file mode 100644 index 0000000000..dfab3ad1c5 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java @@ -0,0 +1,439 @@ +package com.onthegomap.planetiler.util; + +import com.onthegomap.planetiler.config.PlanetilerConfig; +import com.onthegomap.planetiler.geo.TileCoord; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.LongSummaryStatistics; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility that computes min/max/average sizes for each vector tile layers at each zoom level, then computes combined + * summary statistics at the end. + *

+ * Provide a {@link TileWeights} instance to compute weighted average tile sizes based on actual tile traffic. + */ +public class TilesetSummaryStatistics { + + private static final int TOP_N_TILES = 10; + private static final int WARN_BYTES = 100_000; + private static final int ERROR_BYTES = 500_000; + private static final Logger LOGGER = LoggerFactory.getLogger(TilesetSummaryStatistics.class); + private final TileWeights tileWeights; + + // instead of threads updating concurrent data structures, each thread gets a thread-local + // Summary instance it can update without contention that are combined at the end. + private final List

summaries = new CopyOnWriteArrayList<>(); + + public TilesetSummaryStatistics(TileWeights tileWeights) { + this.tileWeights = tileWeights; + } + + public TilesetSummaryStatistics() { + this(new TileWeights()); + } + + private static String tileBiggestLayers(Format formatter, TileSummary tile) { + int minSize = tile.layers.stream().mapToInt(l -> l.layerBytes()).max().orElse(0); + return tile.layers.stream() + .filter(d -> d.layerBytes() >= minSize) + .sorted(Comparator.comparingInt(d -> -d.layerBytes())) + .map(d -> d.layer() + ":" + formatter.storage(d.layerBytes())) + .collect(Collectors.joining(", ")); + } + + /** Returns a combined {@link Summary} from each thread's {@link Updater}. */ + public Summary summary() { + return summaries.stream().reduce(new Summary(), Summary::mergeIn); + } + + /** Logs biggest tiles, max layer size by zoom, and weighted average tile sizes. */ + @SuppressWarnings("java:S2629") + public void printStats(String debugUrlPattern) { + LOGGER.debug("Tile stats:"); + Summary result = summary(); + var overallStats = result.get(); + var formatter = Format.defaultInstance(); + LOGGER.debug("Biggest tiles (gzipped)\n{}", overallStats.formatBiggestTiles(debugUrlPattern)); + var alreadyListed = overallStats.biggestTiles().stream() + .map(TileSummary::coord) + .collect(Collectors.toSet()); + var otherTiles = result.layers().stream() + .flatMap(layer -> result.get(layer).biggestTiles().stream().limit(1)) + .filter(tile -> !alreadyListed.contains(tile.coord) && tile.archivedSize > WARN_BYTES) + .toList(); + if (!otherTiles.isEmpty()) { + LOGGER.info("Other tiles with large layers\n{}", + otherTiles.stream() + .map(tile -> "%d/%d/%d (%s) %s (%s)".formatted( + tile.coord.z(), + tile.coord.x(), + tile.coord.y(), + formatter.storage(tile.archivedSize), + tile.coord.getDebugUrl(debugUrlPattern), + tileBiggestLayers(formatter, tile) + )).collect(Collectors.joining("\n"))); + } + + LOGGER.debug("Max tile sizes\n{}\n{}\n{}", + result.formatTable(n -> { + String string = " " + formatter.storage(n, true); + return n.intValue() > ERROR_BYTES ? AnsiColors.red(string) : + n.intValue() > WARN_BYTES ? AnsiColors.yellow(string) : + string; + }, Cell::maxSize), + result.formatRow("full tile", + formatter::storage, + z -> result.get(z).maxSize(), + result.get().maxSize() + ), + result.formatRow("gzipped", + formatter::storage, + z -> result.get(z).maxArchivedSize(), + result.get().maxArchivedSize() + ) + ); + LOGGER.debug(" Max tile: {} (gzipped: {})", + formatter.storage(overallStats.maxSize()), + formatter.storage(overallStats.maxArchivedSize())); + LOGGER.debug(" Avg tile: {} (gzipped: {}) {}", + formatter.storage(overallStats.weightedAverageSize()), + formatter.storage(overallStats.weightedAverageArchivedSize()), + tileWeights.isEmpty() ? + "no tile weights, use --download-osm-tile-weights for weighted average" : + "using weighted average based on OSM traffic"); + LOGGER.debug(" # tiles: {}", formatter.integer(overallStats.numTiles())); + } + + /** + * Returns an {@link Updater} that accepts individual tile layer stats from a thread that will eventually be combined + * into the final tileset report. + */ + public Updater threadLocalUpdater() { + return new Updater(); + } + + /** Aggregated statistics for a layer/zoom, layer, zoom, or entire tileset. */ + public static class Cell { + private final LongSummaryStatistics archivedBytes = new LongSummaryStatistics(); + private final LongSummaryStatistics bytes = new LongSummaryStatistics(); + private final PriorityQueue topTiles = new PriorityQueue<>(); + private long weightedBytesSum; + private long weightedArchivedBytesSum; + private long totalWeight; + private int bigTileCutoff = 0; + + private static Cell combine(Cell a, Cell b) { + return new Cell().mergeIn(a).mergeIn(b); + } + + /** Max raw layer bytes (or tile size when aggregated over all layers). */ + public long maxSize() { + return Math.max(0, bytes.getMax()); + } + + /** Max gzipped tile bytes (or 0 when broken-out by layer). */ + public long maxArchivedSize() { + return Math.max(0, archivedBytes.getMax()); + } + + /** Total tiles included in this aggregation. */ + public long numTiles() { + return bytes.getCount(); + } + + /** + * Returns the biggest tiles in this aggregation by gzipped size (when aggregated over all layers) or raw size + * within an individual layer. + */ + public List biggestTiles() { + return topTiles.stream().sorted(Comparator.comparingLong(s -> -s.archivedSize)).toList(); + } + + /** + * Returns average gzipped tile size in this aggregation, weighted by the {@link TileWeights} instance provided. + *

+ * When multiple zoom-levels are combined, the weighted average respects the weight-per-zoom-level from + * {@link TileWeights} so that low zoom tiles are not overweighted when analyzing a small extract. + */ + public double weightedAverageArchivedSize() { + return totalWeight == 0 ? archivedBytes.getAverage() : (weightedArchivedBytesSum * 1d / totalWeight); + } + + /** + * Returns average raw (not gzipped) tile size in this aggregation, weighted by the {@link TileWeights} instance + * provided. + * + * @see #weightedAverageArchivedSize() + */ + public double weightedAverageSize() { + return totalWeight == 0 ? bytes.getAverage() : (weightedBytesSum * 1d / totalWeight); + } + + private Cell mergeIn(Cell other) { + return mergeIn(other, 1); + } + + private Cell mergeIn(Cell other, double weight) { + totalWeight += other.totalWeight * weight; + weightedBytesSum += other.weightedBytesSum * weight; + weightedArchivedBytesSum += other.weightedArchivedBytesSum * weight; + archivedBytes.combine(other.archivedBytes); + bytes.combine(other.bytes); + for (var bigTile : other.topTiles) { + acceptBigTile(bigTile.coord, bigTile.archivedSize, bigTile.layers); + } + return this; + } + + private void acceptBigTile(TileCoord coord, int archivedBytes, List layerStats) { + if (archivedBytes >= bigTileCutoff) { + topTiles.offer(new TileSummary(coord, archivedBytes, layerStats)); + while (topTiles.size() > TOP_N_TILES) { + topTiles.poll(); + var min = topTiles.peek(); + if (min != null) { + bigTileCutoff = min.archivedSize(); + } + } + } + } + + String formatBiggestTiles(String debugUrlPattern) { + var biggestTiles = biggestTiles(); + var formatter = Format.defaultInstance(); + return IntStream.range(0, biggestTiles.size()) + .mapToObj(index -> { + var tile = biggestTiles.get(index); + return "%d. %d/%d/%d (%s) %s (%s)".formatted( + index + 1, + tile.coord.z(), + tile.coord.x(), + tile.coord.y(), + formatter.storage(tile.archivedSize), + tile.coord.getDebugUrl(debugUrlPattern), + tileBiggestLayers(formatter, tile) + ); + }).collect(Collectors.joining("\n")); + } + } + + /** Statistics for a tile and its layers. */ + public record TileSummary(TileCoord coord, int archivedSize, List layers) + implements Comparable { + + @Override + public int compareTo(TileSummary o) { + int result = Integer.compare(archivedSize, o.archivedSize); + if (result == 0) { + result = Integer.compare(coord.encoded(), o.coord.encoded()); + } + return result; + } + + TileSummary withSize(int newSize) { + return new TileSummary(coord, newSize, layers); + } + } + + /** Overall summary statistics for a tileset, aggregated from all {@link Updater Updaters}. */ + public class Summary { + + private final List byTile = + IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) + .mapToObj(i -> new Cell()) + .toList(); + + private final List> byLayer = + IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) + .>mapToObj(i -> new HashMap<>()) + .toList(); + + /** All the layers that appear in the tileset. */ + public List layers() { + return byLayer.stream().flatMap(e -> e.keySet().stream()).distinct().sorted().toList(); + } + + /** Returns the summary statistics for a layer at a zoom level. */ + public Cell get(int z, String layer) { + return byLayer.get(z).getOrDefault(layer, new Cell()); + } + + /** Returns the summary statistics for a layer from all zoom levels. */ + public Cell get(String layer) { + return combineZooms(byLayer.stream() + .map(e -> e.getOrDefault(layer, new Cell())) + .toList()); + } + + /** Returns the summary statistics for a zoom level from all layers. */ + public Cell get(int z) { + return byTile.get(z); + } + + /** Returns the summary statistics for the entire dataset by aggregating all layers and zoom-levels. */ + public Cell get() { + return combineZooms(byTile); + } + + /** Returns the minimum zoom a tile appears at in the tileset. */ + public int minZoomWithData() { + return IntStream.range(0, byTile.size()) + .filter(i -> byTile.get(i).numTiles() > 0) + .min() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + /** Returns the maximum zoom a tile appears at in the tileset. */ + public int maxZoomWithData() { + return IntStream.range(0, byTile.size()) + .filter(i -> byTile.get(i).numTiles() > 0) + .max() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + /** Returns the minimum zoom a specific layer appears at in the tileset. */ + public int minZoomWithData(String layer) { + return IntStream.range(0, byLayer.size()) + .filter(i -> byLayer.get(i).containsKey(layer)) + .min() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + private Summary mergeIn(Summary other) { + for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { + byTile.get(z).mergeIn(other.byTile.get(z)); + } + for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { + var ourMap = byLayer.get(z); + var theirMap = other.byLayer.get(z); + theirMap.forEach((layer, stats) -> ourMap.merge(layer, stats, Cell::combine)); + } + return this; + } + + private Cell combineZooms(List byTile) { + // aggregate Cells over zoom levels, but respect the global zoom-level weights + // from TileWeights + double sumWeight = 0; + double preSumWeight = 0; + for (int z = 0; z < byTile.size(); z++) { + var cell = byTile.get(z); + long zoomWeight = tileWeights.getZoomWeight(z); + if (cell.numTiles() > 0 && zoomWeight > 0) { + sumWeight += zoomWeight; + preSumWeight += cell.totalWeight; + } + } + boolean noData = sumWeight == 0 || preSumWeight == 0; + Cell result = new Cell(); + for (int z = 0; z < byTile.size(); z++) { + var cell = byTile.get(z); + long zoomWeight = tileWeights.getZoomWeight(z); + if ((cell.numTiles() > 0 && zoomWeight > 0) || noData) { + double weight = noData ? 1 : (zoomWeight / sumWeight) / (cell.totalWeight / preSumWeight); + result.mergeIn(cell, weight); + } + } + return result; + } + + String formatRow( + String firstColumn, + Function formatter, + Function extractCells, + Number lastColumn + ) { + return formatRow(firstColumn, extractCells.andThen(formatter), formatter.apply(lastColumn)); + } + + String formatRow( + String firstColumn, + Function extractStat, + String lastColumn + ) { + StringBuilder builder = new StringBuilder(); + int minZoom = minZoomWithData(); + int maxZoom = maxZoomWithData(); + List layers = layers().stream() + .sorted(Comparator.comparingInt(this::minZoomWithData)) + .toList(); + int maxLayerLength = Math.max(9, layers.stream().mapToInt(String::length).max().orElse(0)); + String cellFormat = "%1$5s"; + String layerFormat = "%1$" + maxLayerLength + "s"; + + builder.append(layerFormat.formatted(firstColumn)); + for (int z = minZoom; z <= maxZoom; z++) { + builder.append(cellFormat.formatted(extractStat.apply(z))); + builder.append(' '); + } + builder.append(cellFormat.formatted(lastColumn)); + return builder.toString(); + } + + String formatTable(Function formatter, + Function extractStat) { + StringBuilder builder = new StringBuilder(); + List layers = layers().stream() + .sorted(Comparator.comparingInt(this::minZoomWithData)) + .toList(); + + // header: 0 1 2 3 4 ... 15 + builder.append(formatRow("", z -> "z" + z, "all")).append('\n'); + + // each row: layer + for (var layer : layers) { + builder.append(formatRow( + layer, + formatter, + z -> extractStat.apply(get(z, layer)), + extractStat.apply(get(layer)) + )).append('\n'); + } + return builder.toString().stripTrailing(); + } + } + + /** Thread local updater that accepts individual statistics for each tile. */ + public class Updater { + private final Summary summary = new Summary(); + + private Updater() { + summaries.add(summary); + } + + private static Cell newCell(String layer) { + return new Cell(); + } + + public void recordTile(TileCoord coord, int archivedBytes, List layerStats) { + var tileStat = summary.byTile.get(coord.z()); + var layerStat = summary.byLayer.get(coord.z()); + tileStat.archivedBytes.accept(archivedBytes); + tileStat.acceptBigTile(coord, archivedBytes, layerStats); + long weight = tileWeights.getWeight(coord); + tileStat.totalWeight += weight; + tileStat.weightedArchivedBytesSum += weight * archivedBytes; + + int sum = 0; + for (var layer : layerStats) { + var cell = layerStat.computeIfAbsent(layer.layer(), Updater::newCell); + cell.bytes.accept(layer.layerBytes()); + cell.acceptBigTile(coord, layer.layerBytes(), layerStats); + sum += layer.layerBytes(); + cell.weightedBytesSum += weight * layer.layerBytes(); + cell.totalWeight += weight; + } + tileStat.weightedBytesSum += weight * sum; + tileStat.bytes.accept(sum); + } + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TopOsmTiles.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TopOsmTiles.java new file mode 100644 index 0000000000..652235e4a5 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TopOsmTiles.java @@ -0,0 +1,189 @@ +package com.onthegomap.planetiler.util; + +import static com.onthegomap.planetiler.util.Exceptions.throwFatalException; + +import com.google.common.io.LineReader; +import com.onthegomap.planetiler.config.Arguments; +import com.onthegomap.planetiler.config.PlanetilerConfig; +import com.onthegomap.planetiler.geo.TileCoord; +import com.onthegomap.planetiler.stats.ProgressLoggers; +import com.onthegomap.planetiler.stats.Stats; +import com.onthegomap.planetiler.worker.WorkerPipeline; +import java.io.BufferedInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDate; +import java.time.Period; +import java.time.ZoneOffset; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Pattern; +import java.util.stream.IntStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.tukaani.xz.XZInputStream; + +/** + * A utility for computing {@link TileWeights} from historic openstreetmap.org tile traffic. + *

+ * To download raw data from OSM tile logs, run with: + * + *

+ * {@code
+ * java -jar planetiler.jar top-osm-tiles --days=<# days to fetch> --top=<# tiles to include> --output=output.tsv.gz
+ * }
+ * 
+ *

+ * You can also fetch precomputed top-1m tile stats from summer 2023 using + * {@link #downloadPrecomputed(PlanetilerConfig, Stats)} + */ +public class TopOsmTiles { + + private static final String DOWLOAD_URL = + "https://raw.githubusercontent.com/onthegomap/planetiler/main/layerstats/top_osm_tiles.tsv.gz"; + private static final Logger LOGGER = LoggerFactory.getLogger(TopOsmTiles.class); + private final Stats stats; + private final PlanetilerConfig config; + private final Downloader downloader; + + TopOsmTiles(PlanetilerConfig config, Stats stats) { + this.config = config; + this.stats = stats; + downloader = Downloader.create(config, stats); + } + + Reader fetch(LocalDate date) throws IOException { + String url = "https://planet.openstreetmap.org/tile_logs/tiles-%4d-%02d-%02d.txt.xz".formatted( + date.getYear(), + date.getMonthValue(), + date.getDayOfMonth() + ); + return new InputStreamReader(new XZInputStream(new BufferedInputStream(downloader.openStream(url)))); + } + + @SuppressWarnings("java:S2142") + TileWeights run(int threads, int topN, int maxZoom, List toDownload) { + CompletableFuture result = new CompletableFuture<>(); + var timer = stats.startStage("top-osm-tiles"); + + AtomicLong downloaded = new AtomicLong(); + + var pipeline = WorkerPipeline.start("top-osm-tiles", stats) + .readFromTiny("urls", toDownload).>addWorker("download", threads, + (prev, next) -> { + for (var date : prev) { + for (var line : readFile(maxZoom, date)) { + next.accept(line); + } + downloaded.incrementAndGet(); + } + }) + .addBuffer("lines", 100_000, 1_000) + .sinkTo("collect", 1, lines -> { + Map counts = new HashMap<>(); + for (var line : lines) { + counts.merge(line.getKey(), line.getValue(), Long::sum); + } + LOGGER.info("Extracting top {} tiles from {} tiles", topN, counts.size()); + var tileWeights = new TileWeights(); + counts.entrySet().stream() + .sorted(Comparator.comparingLong(e -> -e.getValue())) + .limit(topN) + .forEach(entry -> tileWeights.put(TileCoord.decode(entry.getKey()), entry.getValue())); + result.complete(tileWeights); + }); + + ProgressLoggers progress = ProgressLoggers.create() + .addPercentCounter("files", toDownload.size(), downloaded) + .newLine() + .addPipelineStats(pipeline) + .newLine() + .addProcessStats(); + + pipeline.awaitAndLog(progress, config.logInterval()); + timer.stop(); + stats.printSummary(); + try { + return result.get(); + } catch (InterruptedException | ExecutionException e) { + return throwFatalException(e); + } + } + + private List> readFile(int maxZoom, LocalDate date) { + var splitter = Pattern.compile("[/ ]"); + for (int i = 0; i <= config.httpRetries(); i++) { + List> result = new ArrayList<>(); + try (var reader = fetch(date)) { + LineReader lines = new LineReader(reader); + String line; + while ((line = lines.readLine()) != null) { + String[] parts = splitter.split(line); + if (parts.length == 4) { + // adjust osm tiles (256x256px) to vector (512x512px) by moving up one zoom level + int z = Integer.parseInt(parts[0]) - 1; + if (z >= 0 && z <= maxZoom) { + int x = Integer.parseInt(parts[1]) >> 1; + int y = Integer.parseInt(parts[2]) >> 1; + long loads = Long.parseLong(parts[3]); + result.add(Map.entry(TileCoord.ofXYZ(x, y, z).encoded(), loads)); + } + } + } + return result; + } catch (FileNotFoundException e) { + LOGGER.info("No data for {}", date); + break; + } catch (IOException e) { + if (i == config.httpRetries()) { + LOGGER.warn("Failed getting {} {}", date, e); + } + } + } + return List.of(); + } + + public static void main(String[] args) throws IOException { + Arguments arguments = Arguments.fromArgsOrConfigFile(args).orElse(Arguments.of(Map.of( + "http-retries", "3" + ))); + var config = PlanetilerConfig.from(arguments); + var stats = arguments.getStats(); + var days = arguments.getInteger("days", "number of days into the past to look", 90); + var maxZoom = arguments.getInteger("maxzoom", "max zoom", 15); + var topN = arguments.getInteger("top", "top n", 1_000_000); + var output = arguments.file("output", "output", Path.of("top_osm_tiles.tsv.gz")); + var threads = arguments.getInteger("download-threads", "number of threads to use for downloading/parsing", + Math.min(10, arguments.threads())); + + var date = LocalDate.now(ZoneOffset.UTC); + var toDownload = IntStream.range(0, days) + .mapToObj(i -> date.minus(Period.ofDays(i))) + .toList(); + + new TopOsmTiles(config, stats) + .run(threads, topN, maxZoom, toDownload) + .writeToFile(output); + } + + /** + * Download precomputed top-1m tile stats from 90 days of openstreetmap.org tile logs to + * {@link PlanetilerConfig#tileWeights()} path if they don't already exist. + */ + public static void downloadPrecomputed(PlanetilerConfig config, Stats stats) { + if (!Files.exists(config.tileWeights())) { + Downloader.create(config, stats) + .downloadIfNecessary(new Downloader.ResourceToDownload("osm-tile-weights", DOWLOAD_URL, config.tileWeights())); + } + } +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java index cae697ef26..55e7ee2703 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java @@ -3,7 +3,8 @@ import static com.onthegomap.planetiler.TestUtils.*; import static org.junit.jupiter.api.Assertions.*; -import com.onthegomap.planetiler.TestUtils.OsmXml; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; import com.onthegomap.planetiler.archive.ReadableTileArchive; import com.onthegomap.planetiler.archive.TileArchiveConfig; import com.onthegomap.planetiler.archive.TileArchiveMetadata; @@ -31,7 +32,10 @@ import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.stream.InMemoryStreamArchive; import com.onthegomap.planetiler.util.BuildInfo; +import com.onthegomap.planetiler.util.Gzip; +import com.onthegomap.planetiler.util.TileSizeStats; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -151,8 +155,7 @@ private PlanetilerResults run( featureGroup.prepare(); try (Mbtiles db = Mbtiles.newInMemoryDatabase(config.arguments())) { TileArchiveWriter.writeOutput(featureGroup, db, () -> 0L, new TileArchiveMetadata(profile, config), - config, - stats); + null, config, stats); var tileMap = TestUtils.getTileMap(db); tileMap.values().forEach(fs -> fs.forEach(f -> f.geometry().validate())); int tileDataCount = db.compactDb() ? TestUtils.getTilesDataCount(db) : 0; @@ -1805,7 +1808,8 @@ private static TileCompression extractTileCompression(String args) { "--output-format=pbf", "--output-format=json", "--tile-compression=none", - "--tile-compression=gzip" + "--tile-compression=gzip", + "--output-layerstats" }) void testPlanetilerRunner(String args) throws Exception { Path originalOsm = TestUtils.pathToResource("monaco-latest.osm.pbf"); @@ -1877,6 +1881,57 @@ public void processFeature(SourceFeature source, FeatureCollector features) { ), db.metadata().toMap()); } } + + final Path layerstats = output.resolveSibling(output.getFileName().toString() + ".layerstats.tsv.gz"); + if (args.contains("--output-layerstats")) { + assertTrue(Files.exists(layerstats)); + byte[] data = Files.readAllBytes(layerstats); + byte[] uncompressed = Gzip.gunzip(data); + String[] lines = new String(uncompressed, StandardCharsets.UTF_8).split("\n"); + assertEquals(12, lines.length); + + assertEquals(List.of( + "z", + "x", + "y", + "hilbert", + "archived_tile_bytes", + "layer", + "layer_bytes", + "layer_features", + "layer_attr_bytes", + "layer_attr_keys", + "layer_attr_values" + ), List.of(lines[0].split("\t")), lines[0]); + + var mapper = new CsvMapper(); + var reader = mapper + .readerFor(Map.class) + .with(CsvSchema.emptySchema().withColumnSeparator('\t').withLineSeparator("\n").withHeader()); + try (var items = reader.readValues(uncompressed)) { + while (items.hasNext()) { + @SuppressWarnings("unchecked") Map next = (Map) items.next(); + int z = Integer.parseInt(next.get("z")); + int x = Integer.parseInt(next.get("x")); + int y = Integer.parseInt(next.get("y")); + int hilbert = Integer.parseInt(next.get("hilbert")); + assertEquals(hilbert, TileCoord.ofXYZ(x, y, z).hilbertEncoded()); + assertTrue(Integer.parseInt(next.get("z")) <= 14, "bad z: " + next); + } + } + + // ensure tilestats standalone executable produces same output + var standaloneLayerstatsOutput = tempDir.resolve("layerstats2.tsv.gz"); + TileSizeStats.main("--input=" + output, "--output=" + standaloneLayerstatsOutput); + byte[] standaloneData = Files.readAllBytes(standaloneLayerstatsOutput); + byte[] standaloneUncompressed = Gzip.gunzip(standaloneData); + assertEquals( + new String(uncompressed, StandardCharsets.UTF_8), + new String(standaloneUncompressed, StandardCharsets.UTF_8) + ); + } else { + assertFalse(Files.exists(layerstats)); + } } @Test diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java index 21a075d1a4..ad235a0ff1 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java @@ -16,6 +16,7 @@ import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlRootElement; import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import com.onthegomap.planetiler.archive.ReadableTileArchive; +import com.onthegomap.planetiler.archive.Tile; import com.onthegomap.planetiler.archive.TileCompression; import com.onthegomap.planetiler.config.PlanetilerConfig; import com.onthegomap.planetiler.geo.GeoUtils; @@ -210,7 +211,7 @@ public static Map> getTileMap(ReadableTileArc TileCompression tileCompression) throws IOException { Map> tiles = new TreeMap<>(); - for (var tile : getAllTiles(db)) { + for (var tile : getTiles(db)) { var bytes = switch (tileCompression) { case GZIP -> gunzip(tile.bytes()); case NONE -> tile.bytes(); @@ -218,7 +219,7 @@ public static Map> getTileMap(ReadableTileArc }; var decoded = VectorTile.decode(bytes).stream() .map(feature -> feature(decodeSilently(feature.geometry()), feature.attrs())).toList(); - tiles.put(tile.tile(), decoded); + tiles.put(tile.coord(), decoded); } return tiles; } @@ -231,10 +232,13 @@ public static Geometry decodeSilently(VectorTile.VectorGeometry geom) { } } + @Deprecated(forRemoval = true) public static Set getAllTiles(ReadableTileArchive db) { - return db.getAllTileCoords().stream() - .map(coord -> new Mbtiles.TileEntry(coord, db.getTile(coord))) - .collect(Collectors.toSet()); + return db.getAllTiles().stream().map(t -> new Mbtiles.TileEntry(t.coord(), t.bytes())).collect(Collectors.toSet()); + } + + public static Set getTiles(ReadableTileArchive db) { + return db.getAllTiles().stream().collect(Collectors.toSet()); } public static int getTilesDataCount(Mbtiles db) throws SQLException { diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/TileCoordTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/TileCoordTest.java index 433b126658..30e561da18 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/TileCoordTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/TileCoordTest.java @@ -6,6 +6,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.locationtech.jts.geom.Envelope; class TileCoordTest { @@ -120,4 +121,26 @@ void testTileProgressOnLevelHilbert(int x, int y, int z, double p) { TileExtents.computeFromWorldBounds(15, GeoUtils.WORLD_BOUNDS)); assertEquals(p, progress); } + + @ParameterizedTest + @CsvSource({ + "0,0,0,0.5/0/0", + "0,0,1,1.5/42.52556/-90", + "123,123,14,14.5/84.81142/-177.28638", + }) + void testDebugUrl(int x, int y, int z, String expected) { + assertEquals(expected, TileCoord.ofXYZ(x, y, z).getDebugUrl("{z}/{lat}/{lon}")); + } + + @Test + void testEnvelope() { + assertEquals(new Envelope( + -180, 180, + -85.0511287798066, 85.0511287798066 + ), TileCoord.ofXYZ(0, 0, 0).getEnvelope()); + assertEquals(new Envelope( + 0, 180, + -85.0511287798066, 0 + ), TileCoord.ofXYZ(1, 1, 1).getEnvelope()); + } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java index 4e6b71e1ee..faecf965fd 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java @@ -5,12 +5,13 @@ import com.google.common.math.IntMath; import com.onthegomap.planetiler.TestUtils; +import com.onthegomap.planetiler.archive.Tile; import com.onthegomap.planetiler.archive.TileArchiveMetadata; import com.onthegomap.planetiler.archive.TileCompression; import com.onthegomap.planetiler.archive.TileEncodingResult; import com.onthegomap.planetiler.config.Arguments; import com.onthegomap.planetiler.geo.TileCoord; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import java.io.IOException; import java.math.RoundingMode; import java.nio.file.Path; @@ -50,18 +51,18 @@ private static void testWriteTiles(Path path, int howMany, boolean skipIndexCrea } assertNull(db.getTile(0, 0, 0)); - Set expected = new TreeSet<>(); + Set expected = new TreeSet<>(); try (var writer = db.newTileWriter()) { for (int i = 0; i < howMany; i++) { var dataHash = i - (i % 2); var dataBase = howMany + dataHash; - var entry = new Mbtiles.TileEntry(TileCoord.ofXYZ(i, i + 1, 14), new byte[]{ + var entry = new Tile(TileCoord.ofXYZ(i, i + 1, 14), new byte[]{ (byte) dataBase, (byte) (dataBase >> 8), (byte) (dataBase >> 16), (byte) (dataBase >> 24) }); - writer.write(new TileEncodingResult(entry.tile(), entry.bytes(), OptionalLong.of(dataHash))); + writer.write(new TileEncodingResult(entry.coord(), entry.bytes(), OptionalLong.of(dataHash))); expected.add(entry); } } @@ -69,13 +70,14 @@ private static void testWriteTiles(Path path, int howMany, boolean skipIndexCrea if (optimize) { db.vacuumAnalyze(); } - var all = TestUtils.getAllTiles(db); + var all = TestUtils.getTiles(db); assertEquals(howMany, all.size()); assertEquals(expected, all); - assertEquals(expected.stream().map(Mbtiles.TileEntry::tile).collect(Collectors.toSet()), + assertEquals(expected.stream().map(Tile::coord).collect(Collectors.toSet()), db.getAllTileCoords().stream().collect(Collectors.toSet())); + assertEquals(expected, db.getAllTiles().stream().collect(Collectors.toSet())); for (var expectedEntry : expected) { - var tile = expectedEntry.tile(); + var tile = expectedEntry.coord(); byte[] data = db.getTile(tile.x(), tile.y(), tile.z()); assertArrayEquals(expectedEntry.bytes(), data); } @@ -157,7 +159,7 @@ void testRoundTripMetadata() throws IOException { 7d, 8, 9, - List.of(new LayerStats.VectorLayer("MyLayer", Map.of())), + List.of(new LayerAttrStats.VectorLayer("MyLayer", Map.of())), Map.of("other key", "other value"), TileCompression.GZIP )); @@ -178,7 +180,7 @@ void testMetadataWithoutCompressionAssumesGzip() throws IOException { 7d, 8, 9, - List.of(new LayerStats.VectorLayer("MyLayer", Map.of())), + List.of(new LayerAttrStats.VectorLayer("MyLayer", Map.of())), Map.of("other key", "other value"), null ); @@ -195,7 +197,7 @@ void testMetadataWithoutCompressionAssumesGzip() throws IOException { 7d, 8, 9, - List.of(new LayerStats.VectorLayer("MyLayer", Map.of())), + List.of(new LayerAttrStats.VectorLayer("MyLayer", Map.of())), Map.of("other key", "other value"), TileCompression.GZIP ); @@ -250,17 +252,17 @@ void testMetadataJsonNoLayers() throws IOException { @Test void testFullMetadataJson() throws IOException { testMetadataJson(new Mbtiles.MetadataJson( - new LayerStats.VectorLayer( + new LayerAttrStats.VectorLayer( "full", Map.of( - "NUMBER_FIELD", LayerStats.FieldType.NUMBER, - "STRING_FIELD", LayerStats.FieldType.STRING, - "boolean field", LayerStats.FieldType.BOOLEAN + "NUMBER_FIELD", LayerAttrStats.FieldType.NUMBER, + "STRING_FIELD", LayerAttrStats.FieldType.STRING, + "boolean field", LayerAttrStats.FieldType.BOOLEAN ) ).withDescription("full description") .withMinzoom(0) .withMaxzoom(5), - new LayerStats.VectorLayer( + new LayerAttrStats.VectorLayer( "partial", Map.of() ) diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/pmtiles/PmtilesTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/pmtiles/PmtilesTest.java index f0a4a178ef..08384a9425 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/pmtiles/PmtilesTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/pmtiles/PmtilesTest.java @@ -5,13 +5,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.onthegomap.planetiler.Profile; import com.onthegomap.planetiler.TestUtils; +import com.onthegomap.planetiler.archive.Tile; import com.onthegomap.planetiler.archive.TileArchiveMetadata; import com.onthegomap.planetiler.archive.TileCompression; import com.onthegomap.planetiler.archive.TileEncodingResult; import com.onthegomap.planetiler.config.PlanetilerConfig; import com.onthegomap.planetiler.geo.TileCoord; import com.onthegomap.planetiler.reader.FileFormatException; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import com.onthegomap.planetiler.util.SeekableInMemoryByteChannel; import java.io.IOException; import java.nio.ByteBuffer; @@ -22,6 +23,7 @@ import java.util.Map; import java.util.OptionalLong; import java.util.Set; +import java.util.TreeSet; import java.util.stream.Collectors; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -201,7 +203,11 @@ void testWritePmtilesSingleEntry() throws IOException { assertNull(reader.getTile(0, 0, 2)); Set coordset = reader.getAllTileCoords().stream().collect(Collectors.toSet()); - assertEquals(1, coordset.size()); + assertEquals(Set.of(TileCoord.ofXYZ(0, 0, 1)), coordset); + Set tileset = reader.getAllTiles().stream().collect(Collectors.toSet()); + assertEquals(Set.of( + new Tile(TileCoord.ofXYZ(0, 0, 1), new byte[]{0xa, 0x2}) + ), tileset); } } @@ -219,7 +225,7 @@ void testRoundtripMetadata() throws IOException { 7d, 8, 9, - List.of(new LayerStats.VectorLayer("MyLayer", Map.of())), + List.of(new LayerAttrStats.VectorLayer("MyLayer", Map.of())), Map.of("other key", "other value"), TileCompression.GZIP )); @@ -310,7 +316,17 @@ void testWritePmtilesDuplication() throws IOException { assertArrayEquals(new byte[]{0xa, 0x2}, reader.getTile(0, 0, 2)); Set coordset = reader.getAllTileCoords().stream().collect(Collectors.toSet()); - assertEquals(3, coordset.size()); + assertEquals(Set.of( + TileCoord.ofXYZ(0, 0, 0), + TileCoord.ofXYZ(0, 0, 1), + TileCoord.ofXYZ(0, 0, 2) + ), coordset); + var tileset = reader.getAllTiles().stream().collect(Collectors.toSet()); + assertEquals(Set.of( + new Tile(TileCoord.ofXYZ(0, 0, 0), new byte[]{0xa, 0x2}), + new Tile(TileCoord.ofXYZ(0, 0, 1), new byte[]{0xa, 0x2}), + new Tile(TileCoord.ofXYZ(0, 0, 2), new byte[]{0xa, 0x2}) + ), tileset); } } @@ -337,7 +353,15 @@ void testWritePmtilesUnclustered() throws IOException { assertArrayEquals(new byte[]{0xa, 0x2}, reader.getTile(0, 0, 1)); Set coordset = reader.getAllTileCoords().stream().collect(Collectors.toSet()); - assertEquals(2, coordset.size()); + assertEquals(Set.of( + TileCoord.ofXYZ(0, 0, 0), + TileCoord.ofXYZ(0, 0, 1) + ), coordset); + var tileset = reader.getAllTiles().stream().collect(Collectors.toSet()); + assertEquals(Set.of( + new Tile(TileCoord.ofXYZ(0, 0, 0), new byte[]{0xa, 0x2}), + new Tile(TileCoord.ofXYZ(0, 0, 1), new byte[]{0xa, 0x2}) + ), tileset); } } @@ -352,10 +376,15 @@ void testWritePmtilesLeafDirectories() throws IOException { var writer = in.newTileWriter(); int ENTRIES = 20000; + Set expectedCoords = new TreeSet<>(); + Set expectedTiles = new TreeSet<>(); for (int i = 0; i < ENTRIES; i++) { - writer.write(new TileEncodingResult(TileCoord.hilbertDecode(i), ByteBuffer.allocate(4).putInt(i).array(), - OptionalLong.empty())); + var coord = TileCoord.hilbertDecode(i); + var data = ByteBuffer.allocate(4).putInt(i).array(); + expectedCoords.add(coord); + expectedTiles.add(new Tile(coord, data)); + writer.write(new TileEncodingResult(coord, data, OptionalLong.empty())); } in.finish(metadata); @@ -372,8 +401,11 @@ void testWritePmtilesLeafDirectories() throws IOException { "tileCoord=%s did not match".formatted(coord.toString())); } - Set coordset = reader.getAllTileCoords().stream().collect(Collectors.toSet()); - assertEquals(ENTRIES, coordset.size()); + Set coordset = reader.getAllTileCoords().stream().collect(Collectors.toCollection(TreeSet::new)); + assertEquals(expectedCoords, coordset); + + Set tileset = reader.getAllTiles().stream().collect(Collectors.toCollection(TreeSet::new)); + assertEquals(expectedTiles, tileset); for (int i = 0; i < ENTRIES; i++) { var coord = TileCoord.hilbertDecode(i); @@ -381,5 +413,4 @@ void testWritePmtilesLeafDirectories() throws IOException { } } } - } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchiveTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchiveTest.java index 31ae6c24f8..21005e4ffe 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchiveTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableJsonStreamArchiveTest.java @@ -12,7 +12,7 @@ import com.onthegomap.planetiler.archive.TileEncodingResult; import com.onthegomap.planetiler.config.Arguments; import com.onthegomap.planetiler.geo.TileCoord; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; @@ -36,11 +36,11 @@ class WriteableJsonStreamArchiveTest { new TileArchiveMetadata("name", "description", "attribution", "version", "type", "format", new Envelope(0, 1, 2, 3), new CoordinateXY(1.3, 3.7), 1.0, 2, 3, List.of( - new LayerStats.VectorLayer("vl0", - ImmutableMap.of("1", LayerStats.FieldType.BOOLEAN, "2", LayerStats.FieldType.NUMBER, "3", - LayerStats.FieldType.STRING), + new LayerAttrStats.VectorLayer("vl0", + ImmutableMap.of("1", LayerAttrStats.FieldType.BOOLEAN, "2", LayerAttrStats.FieldType.NUMBER, "3", + LayerAttrStats.FieldType.STRING), Optional.of("description"), OptionalInt.of(1), OptionalInt.of(2)), - new LayerStats.VectorLayer("vl1", + new LayerAttrStats.VectorLayer("vl1", Map.of(), Optional.empty(), OptionalInt.empty(), OptionalInt.empty()) ), diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchiveTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchiveTest.java index e30ff9a6ff..b69f3a5013 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchiveTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/stream/WriteableProtoStreamArchiveTest.java @@ -8,7 +8,7 @@ import com.onthegomap.planetiler.archive.TileEncodingResult; import com.onthegomap.planetiler.geo.TileCoord; import com.onthegomap.planetiler.proto.StreamArchiveProto; -import com.onthegomap.planetiler.util.LayerStats; +import com.onthegomap.planetiler.util.LayerAttrStats; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -33,10 +33,11 @@ class WriteableProtoStreamArchiveTest { new TileArchiveMetadata("name", "description", "attribution", "version", "type", "format", new Envelope(0, 1, 2, 3), new CoordinateXY(1.3, 3.7), 1.0, 2, 3, List.of( - new LayerStats.VectorLayer("vl0", - Map.of("1", LayerStats.FieldType.BOOLEAN, "2", LayerStats.FieldType.NUMBER, "3", LayerStats.FieldType.STRING), + new LayerAttrStats.VectorLayer("vl0", + Map.of("1", LayerAttrStats.FieldType.BOOLEAN, "2", LayerAttrStats.FieldType.NUMBER, "3", + LayerAttrStats.FieldType.STRING), Optional.of("description"), OptionalInt.of(1), OptionalInt.of(2)), - new LayerStats.VectorLayer("vl1", + new LayerAttrStats.VectorLayer("vl1", Map.of(), Optional.empty(), OptionalInt.empty(), OptionalInt.empty()) ), diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LayerStatsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LayerAttrStatsTest.java similarity index 73% rename from planetiler-core/src/test/java/com/onthegomap/planetiler/util/LayerStatsTest.java rename to planetiler-core/src/test/java/com/onthegomap/planetiler/util/LayerAttrStatsTest.java index 0ea8a97ac8..8835d68c85 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LayerStatsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LayerAttrStatsTest.java @@ -11,13 +11,13 @@ import java.util.Optional; import org.junit.jupiter.api.Test; -class LayerStatsTest { +class LayerAttrStatsTest { - final LayerStats layerStats = new LayerStats(); + final LayerAttrStats layerStats = new LayerAttrStats(); @Test void testEmptyLayerStats() { - assertEquals(Arrays.asList(new LayerStats.VectorLayer[]{}), layerStats.getTileStats()); + assertEquals(Arrays.asList(new LayerAttrStats.VectorLayer[]{}), layerStats.getTileStats()); } @Test @@ -33,11 +33,11 @@ void testEmptyLayerStatsOneLayer() { 1, Optional.empty() )); - assertEquals(Arrays.asList(new LayerStats.VectorLayer[]{ - new LayerStats.VectorLayer("layer1", Map.of( - "a", LayerStats.FieldType.NUMBER, - "b", LayerStats.FieldType.STRING, - "c", LayerStats.FieldType.BOOLEAN + assertEquals(Arrays.asList(new LayerAttrStats.VectorLayer[]{ + new LayerAttrStats.VectorLayer("layer1", Map.of( + "a", LayerAttrStats.FieldType.NUMBER, + "b", LayerAttrStats.FieldType.STRING, + "c", LayerAttrStats.FieldType.BOOLEAN ), 3, 3) }), layerStats.getTileStats()); } @@ -77,13 +77,13 @@ void testEmptyLayerStatsTwoLayers() { 1, Optional.empty() )); - assertEquals(Arrays.asList(new LayerStats.VectorLayer[]{ - new LayerStats.VectorLayer("layer1", Map.of( + assertEquals(Arrays.asList(new LayerAttrStats.VectorLayer[]{ + new LayerAttrStats.VectorLayer("layer1", Map.of( ), 3, 3), - new LayerStats.VectorLayer("layer2", Map.of( - "a", LayerStats.FieldType.NUMBER, - "b", LayerStats.FieldType.BOOLEAN, - "c", LayerStats.FieldType.STRING + new LayerAttrStats.VectorLayer("layer2", Map.of( + "a", LayerAttrStats.FieldType.NUMBER, + "b", LayerAttrStats.FieldType.BOOLEAN, + "c", LayerAttrStats.FieldType.STRING ), 1, 4) }), layerStats.getTileStats()); } @@ -116,9 +116,9 @@ void testMergeFromMultipleThreads() throws InterruptedException { t2.start(); t1.join(); t2.join(); - assertEquals(Arrays.asList(new LayerStats.VectorLayer[]{ - new LayerStats.VectorLayer("layer1", Map.of( - "a", LayerStats.FieldType.STRING + assertEquals(Arrays.asList(new LayerAttrStats.VectorLayer[]{ + new LayerAttrStats.VectorLayer("layer1", Map.of( + "a", LayerAttrStats.FieldType.STRING ), 3, 4) }), layerStats.getTileStats()); } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java new file mode 100644 index 0000000000..a35c7adf57 --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java @@ -0,0 +1,99 @@ +package com.onthegomap.planetiler.util; + +import static com.onthegomap.planetiler.TestUtils.newPoint; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.onthegomap.planetiler.VectorTile; +import com.onthegomap.planetiler.geo.TileCoord; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class TileSizeStatsTest { + @Test + void computeStatsEmpty() { + var stats = TileSizeStats.computeTileStats(new VectorTile().toProto()); + assertEquals(0, stats.size()); + } + + @Test + void computeStatsOneFeature() throws IOException { + var stats = TileSizeStats.computeTileStats(new VectorTile() + .addLayerFeatures("layer", List.of(new VectorTile.Feature( + "layer", + 1, + VectorTile.encodeGeometry(newPoint(0, 0)), + Map.of("key1", "value1", "key2", 2) + ))) + .toProto()); + assertEquals(1, stats.size()); + var entry1 = stats.get(0); + assertEquals("layer", entry1.layer()); + assertEquals(1, entry1.layerFeatures()); + assertEquals(55, entry1.layerBytes()); + + assertEquals(18, entry1.layerAttrBytes()); + assertEquals(2, entry1.layerAttrKeys()); + assertEquals(2, entry1.layerAttrValues()); + + var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); + assertEquals( + """ + z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values + 3 1 2 34 999 layer 55 1 18 2 2 + """ + .trim(), + (TileSizeStats.headerRow() + String.join("", formatted)).trim()); + } + + @Test + void computeStats2Features() throws IOException { + var stats = TileSizeStats.computeTileStats(new VectorTile() + .addLayerFeatures("b", List.of( + new VectorTile.Feature( + "b", + 1, + VectorTile.encodeGeometry(newPoint(0, 0)), + Map.of() + ) + )) + .addLayerFeatures("a", List.of( + new VectorTile.Feature( + "a", + 1, + VectorTile.encodeGeometry(newPoint(0, 0)), + Map.of("key1", "value1", "key2", 2) + ), + new VectorTile.Feature( + "a", + 2, + VectorTile.encodeGeometry(newPoint(1, 1)), + Map.of("key1", 2, "key2", 3) + ) + )) + .toProto()); + assertEquals(2, stats.size()); + var entry1 = stats.get(0); + assertEquals("a", entry1.layer()); + assertEquals(2, entry1.layerFeatures()); + assertEquals(72, entry1.layerBytes()); + + assertEquals(20, entry1.layerAttrBytes()); + assertEquals(2, entry1.layerAttrKeys()); + assertEquals(3, entry1.layerAttrValues()); + var entry2 = stats.get(1); + assertEquals("b", entry2.layer()); + assertEquals(1, entry2.layerFeatures()); + + var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); + assertEquals( + """ + z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values + 3 1 2 34 999 a 72 2 20 2 3 + 3 1 2 34 999 b 19 1 0 0 0 + """ + .trim(), + (TileSizeStats.headerRow() + String.join("", formatted)).trim()); + } +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileWeightsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileWeightsTest.java new file mode 100644 index 0000000000..78aa7bed6f --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileWeightsTest.java @@ -0,0 +1,95 @@ +package com.onthegomap.planetiler.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.onthegomap.planetiler.geo.TileCoord; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.zip.GZIPInputStream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class TileWeightsTest { + @Test + void test() { + var weights = new TileWeights(); + assertEquals(0, weights.getWeight(TileCoord.ofXYZ(0, 0, 0))); + assertEquals(0, weights.getZoomWeight(0)); + assertEquals(0, weights.getWeight(TileCoord.ofXYZ(0, 0, 1))); + assertEquals(0, weights.getWeight(TileCoord.ofXYZ(1, 0, 1))); + assertEquals(0, weights.getZoomWeight(1)); + assertTrue(weights.isEmpty()); + + weights.put(TileCoord.ofXYZ(0, 0, 0), 1); + weights.put(TileCoord.ofXYZ(0, 0, 0), 2); + weights.put(TileCoord.ofXYZ(0, 0, 1), 3); + weights.put(TileCoord.ofXYZ(1, 0, 1), 4); + + assertFalse(weights.isEmpty()); + assertEquals(3, weights.getWeight(TileCoord.ofXYZ(0, 0, 0))); + assertEquals(3, weights.getZoomWeight(0)); + assertEquals(3, weights.getWeight(TileCoord.ofXYZ(0, 0, 1))); + assertEquals(4, weights.getWeight(TileCoord.ofXYZ(1, 0, 1))); + assertEquals(7, weights.getZoomWeight(1)); + } + + @Test + void testWriteToFileEmpty(@TempDir Path path) throws IOException { + Path file = path.resolve("test.tsv.gz"); + new TileWeights().writeToFile(file); + var read = TileWeights.readFromFile(file); + assertEquals(0, read.getWeight(TileCoord.ofXYZ(0, 0, 0))); + } + + @Test + void testWriteToFile(@TempDir Path path) throws IOException { + Path file = path.resolve("test.tsv.gz"); + new TileWeights() + .put(TileCoord.ofXYZ(0, 0, 1), 1) + .put(TileCoord.ofXYZ(0, 0, 1), 1) + .put(TileCoord.ofXYZ(0, 0, 0), 1) + .writeToFile(file); + var read = TileWeights.readFromFile(file); + assertEquals(""" + z x y loads + 0 0 0 1 + 1 0 0 2 + """, new String(new GZIPInputStream(Files.newInputStream(file)).readAllBytes())); + assertEquals(1, read.getWeight(TileCoord.ofXYZ(0, 0, 0))); + assertEquals(2, read.getWeight(TileCoord.ofXYZ(0, 0, 1))); + } + + @Test + void testReadCorruptFile(@TempDir Path path) throws IOException { + Path file = path.resolve("test.tsv.gz"); + var result = TileWeights.readFromFile(file); + assertEquals(0, result.getWeight(TileCoord.ofXYZ(0, 0, 0))); + + Files.write(file, Gzip.gzip(""" + garbage + """.getBytes(StandardCharsets.UTF_8))); + assertEquals(0, TileWeights.readFromFile(file).getWeight(TileCoord.ofXYZ(0, 0, 0))); + + Files.write(file, Gzip.gzip(""" + z x y loads + a b c d + """.getBytes(StandardCharsets.UTF_8))); + assertEquals(0, TileWeights.readFromFile(file).getWeight(TileCoord.ofXYZ(0, 0, 0))); + + Files.write(file, Gzip.gzip(""" + z x d loads + 1 2 3 4 + """.getBytes(StandardCharsets.UTF_8))); + assertEquals(0, TileWeights.readFromFile(file).getWeight(TileCoord.ofXYZ(0, 0, 0))); + Files.write(file, Gzip.gzip(""" + z x y loads + -1 2 -3 4 + 1 2 5 4 + """.getBytes(StandardCharsets.UTF_8))); + assertEquals(4, TileWeights.readFromFile(file).getWeight(TileCoord.ofXYZ(0, 1, 1))); + } +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java new file mode 100644 index 0000000000..f41b00b32a --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java @@ -0,0 +1,242 @@ +package com.onthegomap.planetiler.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.onthegomap.planetiler.geo.TileCoord; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import org.junit.jupiter.api.Test; + +class TilesetSummaryStatisticsTest { + @Test + void aggregateTileStats() { + var tileStats = new TilesetSummaryStatistics(); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + updater1.recordTile(TileCoord.ofXYZ(0, 0, 1), 123, List.of( + new TileSizeStats.LayerStats("a", 1, 2, 3, 4, 5), + new TileSizeStats.LayerStats("b", 6, 7, 8, 9, 10) + )); + updater2.recordTile(TileCoord.ofXYZ(0, 1, 1), 345, List.of( + new TileSizeStats.LayerStats("b", 1, 2, 3, 4, 5), + new TileSizeStats.LayerStats("c", 6, 7, 8, 9, 10) + )); + var summary = tileStats.summary(); + assertEquals(Set.of("a", "b", "c"), Set.copyOf(summary.layers())); + assertEquals(0, summary.get(0).maxSize()); + assertEquals(0, summary.get(0).numTiles()); + assertEquals(7, summary.get(1).maxSize()); + assertEquals(2, summary.get(1).numTiles()); + + assertEquals(0, summary.get(0, "a").maxSize()); + assertEquals(1, summary.get(1, "a").maxSize()); + assertEquals(6, summary.get(1, "b").maxSize()); + assertEquals(6, summary.get(1, "c").maxSize()); + + assertEquals(0, summary.get(0, "a").numTiles()); + assertEquals(1, summary.get(1, "a").numTiles()); + assertEquals(2, summary.get(1, "b").numTiles()); + assertEquals(1, summary.get(1, "c").numTiles()); + + + assertEquals(1, summary.get("a").maxSize()); + assertEquals(6, summary.get("b").maxSize()); + assertEquals(6, summary.get("c").maxSize()); + assertEquals(1, summary.get("a").numTiles()); + assertEquals(2, summary.get("b").numTiles()); + assertEquals(1, summary.get("c").numTiles()); + + assertEquals(7, summary.get().maxSize()); + assertEquals(2, summary.get().numTiles()); + + updater1.recordTile(TileCoord.ofXYZ(0, 0, 2), 0, List.of( + new TileSizeStats.LayerStats("c", 10, 7, 8, 9, 10) + )); + assertEquals(""" + z1 z2 all + a 1 0 1 + b 6 0 6 + c 6 10 10 + """.stripTrailing(), tileStats.summary().formatTable(Number::toString, cell -> cell.maxSize())); + + assertEquals(""" + z1 z2 all + a 1 0 1 + b 2 0 2 + c 1 1 2 + """.stripTrailing(), tileStats.summary().formatTable(Number::toString, cell -> cell.numTiles())); + } + + @Test + void topGzippedTiles() { + var tileStats = new TilesetSummaryStatistics(); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + for (int i = 0; i < 20; i++) { + (i % 2 == 0 ? updater1 : updater2).recordTile(TileCoord.decode(i), i, List.of()); + } + assertEquals( + List.of( + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(19), 19, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(18), 18, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(17), 17, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(16), 16, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(15), 15, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(14), 14, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(13), 13, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(12), 12, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(11), 11, List.of()), + new TilesetSummaryStatistics.TileSummary(TileCoord.decode(10), 10, List.of()) + ), + tileStats.summary().get().biggestTiles() + ); + } + + @Test + void topLayerTiles() { + var tileStats = new TilesetSummaryStatistics(); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + List summaries = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + var summary = new TilesetSummaryStatistics.TileSummary(TileCoord.decode(i), i, List.of( + new TileSizeStats.LayerStats("a", i * 2, i, 0, 0, 0), + new TileSizeStats.LayerStats("b", i * 3, i, 0, 0, 0) + )); + summaries.add(0, summary); + (i % 2 == 0 ? updater1 : updater2).recordTile(summary.coord(), summary.archivedSize(), summary.layers()); + } + assertEquals( + summaries.stream().map(d -> d.withSize(d.coord().encoded() * 2)).limit(10).toList(), + tileStats.summary().get("a").biggestTiles() + ); + assertEquals( + summaries.stream().map(d -> d.withSize(d.coord().encoded() * 3)).limit(10).toList(), + tileStats.summary().get("b").biggestTiles() + ); + assertEquals(""" + 1. 2/3/1 (19) 2.5/33.25663/135 (b:57) + 2. 2/3/2 (18) 2.5/-33.25663/135 (b:54) + 3. 2/3/3 (17) 2.5/-75.78219/135 (b:51) + 4. 2/2/0 (16) 2.5/75.78219/45 (b:48) + 5. 2/2/1 (15) 2.5/33.25663/45 (b:45) + 6. 2/2/2 (14) 2.5/-33.25663/45 (b:42) + 7. 2/2/3 (13) 2.5/-75.78219/45 (b:39) + 8. 2/1/0 (12) 2.5/75.78219/-45 (b:36) + 9. 2/1/1 (11) 2.5/33.25663/-45 (b:33) + 10. 2/1/2 (10) 2.5/-33.25663/-45 (b:30) + """.trim(), tileStats.summary().get().formatBiggestTiles("{z}/{lat}/{lon}")); + } + + @Test + void tileWeights() { + var tileStats = new TilesetSummaryStatistics(new TileWeights() + .put(TileCoord.ofXYZ(0, 0, 0), 2) + .put(TileCoord.ofXYZ(0, 0, 1), 1)); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + + updater1.recordTile( + TileCoord.ofXYZ(0, 0, 0), + 100, + List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0)) + ); + updater2.recordTile( + TileCoord.ofXYZ(0, 0, 1), + 200, + List.of( + new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0), + new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0) + ) + ); + updater2.recordTile( + TileCoord.ofXYZ(0, 0, 2), // no stats + 400, + List.of( + new TileSizeStats.LayerStats("c", 40, 0, 0, 0, 0) + ) + ); + + assertEquals( + (100 * 2 + 200) / 3d, + tileStats.summary().get().weightedAverageArchivedSize() + ); + assertEquals( + (10 * 2 + 20) / 3d, + tileStats.summary().get("a").weightedAverageSize() + ); + assertEquals( + 30d, + tileStats.summary().get("b").weightedAverageSize() + ); + assertEquals( + 40d, + tileStats.summary().get("c").weightedAverageSize() + ); + } + + @Test + void tileWeightsScaledByZoom() { + var tileStats = new TilesetSummaryStatistics(new TileWeights() + .put(TileCoord.ofXYZ(0, 0, 0), 90) + .put(TileCoord.ofXYZ(0, 0, 1), 8) + .put(TileCoord.ofXYZ(1, 0, 1), 2) + .put(TileCoord.ofXYZ(1, 0, 2), 50)); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + + updater1.recordTile( + TileCoord.ofXYZ(0, 0, 0), + 100, + List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0)) + ); + updater2.recordTile( + TileCoord.ofXYZ(0, 0, 1), + 200, + List.of( + new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0), + new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0) + ) + ); + + // z0 90% 100/10 (a:10) + // z1 10% (all) + // 8% 0,0 200/50 (a:20, b:30) + // z2 - ignore z2 since we don't have an tiles there + + // even though we're missing some tiles at z1, z1 should still get 10% + assertEquals( + 0.9 * 100 + 0.1 * 200, + tileStats.summary().get().weightedAverageArchivedSize(), + 1.5d + ); + assertEquals( + 0.9 * 10 + 0.1 * 50, + tileStats.summary().get().weightedAverageSize(), + 0.2 + ); + + assertEquals( + 0.9 * 10 + 0.1 * 20, + tileStats.summary().get("a").weightedAverageSize(), + 0.2 + ); + assertEquals( + 30, + tileStats.summary().get("b").weightedAverageSize(), + 1e-5 + ); + + assertEquals( + 200, + tileStats.summary().get(1).weightedAverageArchivedSize(), + 1e-5 + ); + assertEquals( + 50, + tileStats.summary().get(1).weightedAverageSize(), + 1e-5 + ); + } +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TopOsmTilesTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TopOsmTilesTest.java new file mode 100644 index 0000000000..3eda9a63c6 --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TopOsmTilesTest.java @@ -0,0 +1,122 @@ +package com.onthegomap.planetiler.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.onthegomap.planetiler.config.Arguments; +import com.onthegomap.planetiler.config.PlanetilerConfig; +import com.onthegomap.planetiler.geo.TileCoord; +import com.onthegomap.planetiler.stats.Stats; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.time.LocalDate; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class TopOsmTilesTest { + @Test + void fetchTopOsmTiles() { + var config = PlanetilerConfig.defaults(); + var stats = Stats.inMemory(); + var day1 = LocalDate.ofEpochDay(0); + var day2 = LocalDate.ofEpochDay(1); + var day3 = LocalDate.ofEpochDay(2); /// no data + var topOsmTiles = new TopOsmTiles(config, stats) { + @Override + Reader fetch(LocalDate date) throws IOException { + if (date.equals(day1)) { + return new StringReader(""" + 0/0/0 1 + 1/0/0 2 + 1/1/0 3 + 2/0/0 4 + """); + } else if (date.equals(day2)) { + return new StringReader(""" + 0/0/0 2 + 1/0/0 4 + 1/1/0 6 + 2/0/0 8 + 3/0/0 1 + """); + } else { + throw new FileNotFoundException(); + } + } + }; + + var result = topOsmTiles.run(2, 2, 4, List.of(day1, day2, day3)); + assertEquals(new TileWeights() + .put(TileCoord.ofXYZ(0, 0, 0), 15) + .put(TileCoord.ofXYZ(0, 0, 1), 12), + result + ); + } + + @Test + void retries() { + var config = PlanetilerConfig.from(Arguments.of(Map.of( + "http-retries", "3" + ))); + var stats = Stats.inMemory(); + var day1 = LocalDate.ofEpochDay(0); + var topOsmTiles = new TopOsmTiles(config, stats) { + int tries = 3; + + @Override + Reader fetch(LocalDate date) throws IOException { + if (date.equals(day1)) { + if (tries-- > 0) { + throw new IOException("Injected download failure"); + } + return new StringReader(""" + 1/0/0 2 + """); + } else { + throw new IOException("other failure"); + } + } + }; + + var result = topOsmTiles.run(2, 2, 4, List.of(day1)); + assertEquals(new TileWeights() + .put(TileCoord.ofXYZ(0, 0, 0), 2), + result + ); + } + + @Test + void exhaustRetries() { + var config = PlanetilerConfig.from(Arguments.of(Map.of( + "http-retries", "3" + ))); + var stats = Stats.inMemory(); + var day1 = LocalDate.ofEpochDay(0); + var topOsmTiles = new TopOsmTiles(config, stats) { + int tries = 4; + + @Override + Reader fetch(LocalDate date) throws IOException { + if (date.equals(day1)) { + if (tries-- > 0) { + throw new IOException("Injected download failure"); + } + return new StringReader(""" + 1/0/0 2 + """); + } else { + throw new IOException("other failure"); + } + } + }; + + var result = topOsmTiles.run(2, 2, 4, List.of(day1)); + + assertEquals( + new TileWeights(), + result + ); + } +} diff --git a/planetiler-custommap/src/test/java/com/onthegomap/planetiler/custommap/ConfiguredMapTest.java b/planetiler-custommap/src/test/java/com/onthegomap/planetiler/custommap/ConfiguredMapTest.java index d7c8527a15..8b2bffa1fd 100644 --- a/planetiler-custommap/src/test/java/com/onthegomap/planetiler/custommap/ConfiguredMapTest.java +++ b/planetiler-custommap/src/test/java/com/onthegomap/planetiler/custommap/ConfiguredMapTest.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Map; -import java.util.Set; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -73,7 +72,7 @@ void testMetadata() { @Test void ensureValidGeometries() throws Exception { - Set parsedTiles = TestUtils.getAllTiles(mbtiles); + var parsedTiles = TestUtils.getTiles(mbtiles); for (var tileEntry : parsedTiles) { var decoded = VectorTile.decode(gunzip(tileEntry.bytes())); for (VectorTile.Feature feature : decoded) { diff --git a/planetiler-dist/src/main/java/com/onthegomap/planetiler/Main.java b/planetiler-dist/src/main/java/com/onthegomap/planetiler/Main.java index dd13ec05ec..3a8fcf2732 100644 --- a/planetiler-dist/src/main/java/com/onthegomap/planetiler/Main.java +++ b/planetiler-dist/src/main/java/com/onthegomap/planetiler/Main.java @@ -11,6 +11,8 @@ import com.onthegomap.planetiler.examples.ToiletsOverlay; import com.onthegomap.planetiler.examples.ToiletsOverlayLowLevelApi; import com.onthegomap.planetiler.mbtiles.Verify; +import com.onthegomap.planetiler.util.TileSizeStats; +import com.onthegomap.planetiler.util.TopOsmTiles; import java.util.Arrays; import java.util.Locale; import java.util.Map; @@ -50,7 +52,9 @@ public class Main { entry("benchmark-longlongmap", LongLongMapBench::main), entry("verify-mbtiles", Verify::main), - entry("verify-monaco", VerifyMonaco::main) + entry("verify-monaco", VerifyMonaco::main), + entry("stats", TileSizeStats::main), + entry("top-osm-tiles", TopOsmTiles::main) ); private static EntryPoint bundledSchema(String path) { diff --git a/planetiler-examples/src/main/java/com/onthegomap/planetiler/examples/ToiletsOverlayLowLevelApi.java b/planetiler-examples/src/main/java/com/onthegomap/planetiler/examples/ToiletsOverlayLowLevelApi.java index 801489afdc..142e745c5b 100644 --- a/planetiler-examples/src/main/java/com/onthegomap/planetiler/examples/ToiletsOverlayLowLevelApi.java +++ b/planetiler-examples/src/main/java/com/onthegomap/planetiler/examples/ToiletsOverlayLowLevelApi.java @@ -2,6 +2,7 @@ import com.onthegomap.planetiler.Planetiler; import com.onthegomap.planetiler.Profile; +import com.onthegomap.planetiler.archive.TileArchiveConfig; import com.onthegomap.planetiler.archive.TileArchiveMetadata; import com.onthegomap.planetiler.archive.TileArchiveWriter; import com.onthegomap.planetiler.archive.TileArchives; @@ -113,9 +114,10 @@ static void run(Path input, Path tmpDir, Path output) throws IOException { // then process rendered features, grouped by tile, encoding them into binary vector tile format // and writing to the output mbtiles file. - try (WriteableTileArchive db = TileArchives.newWriter(output, config)) { - TileArchiveWriter.writeOutput(featureGroup, db, () -> FileUtils.fileSize(output), tileArchiveMetadata, config, - stats); + var archiveConfig = TileArchiveConfig.from(output.toString()); + try (WriteableTileArchive db = TileArchives.newWriter(archiveConfig, config)) { + TileArchiveWriter.writeOutput(featureGroup, db, () -> FileUtils.fileSize(output), tileArchiveMetadata, + archiveConfig.getLocalPath(), config, stats); } catch (IOException e) { throw new IllegalStateException("Unable to write to " + output, e); }