diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java index f8739e2675..708f96e079 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java @@ -266,79 +266,78 @@ private void tileEncoderSink(Iterable prev) throws IOException { List lastLayerStats = null; boolean skipFilled = config.skipFilledTiles(); - try (var tileStatsUpdater = tileStats.threadLocalUpdater()) { - for (TileBatch batch : prev) { - List result = new ArrayList<>(batch.size()); - FeatureGroup.TileFeatures last = null; - // each batch contains tile ordered by tile-order ID ascending - for (int i = 0; i < batch.in.size(); i++) { - FeatureGroup.TileFeatures tileFeatures = batch.in.get(i); - featuresProcessed.incBy(tileFeatures.getNumFeaturesProcessed()); - byte[] bytes, encoded; - List layerStats; - Long tileDataHash; - if (tileFeatures.hasSameContents(last)) { - bytes = lastBytes; - encoded = lastEncoded; - tileDataHash = lastTileDataHash; - layerStats = lastLayerStats; - memoizedTiles.inc(); + var tileStatsUpdater = tileStats.threadLocalUpdater(); + for (TileBatch batch : prev) { + List result = new ArrayList<>(batch.size()); + FeatureGroup.TileFeatures last = null; + // each batch contains tile ordered by tile-order ID ascending + for (int i = 0; i < batch.in.size(); i++) { + FeatureGroup.TileFeatures tileFeatures = batch.in.get(i); + featuresProcessed.incBy(tileFeatures.getNumFeaturesProcessed()); + byte[] bytes, encoded; + List layerStats; + Long tileDataHash; + if (tileFeatures.hasSameContents(last)) { + bytes = lastBytes; + encoded = lastEncoded; + tileDataHash = lastTileDataHash; + layerStats = lastLayerStats; + memoizedTiles.inc(); + } else { + VectorTile en = tileFeatures.getVectorTileEncoder(); + if (skipFilled && (lastIsFill = en.containsOnlyFills())) { + encoded = null; + layerStats = null; + bytes = null; } else { - VectorTile en = tileFeatures.getVectorTileEncoder(); - if (skipFilled && (lastIsFill = en.containsOnlyFills())) { - encoded = null; - layerStats = null; - bytes = null; - } else { - var proto = en.toProto(); - encoded = proto.toByteArray(); - bytes = switch (config.tileCompression()) { - case GZIP -> gzip(encoded); - case NONE -> encoded; - case UNKNWON -> throw new IllegalArgumentException("cannot compress \"UNKNOWN\""); - }; - layerStats = TileStats.computeTileStats(proto); - if (encoded.length > config.tileWarningSizeBytes()) { - LOGGER.warn("{} {}kb uncompressed", - tileFeatures.tileCoord(), - encoded.length / 1024); - } - } - lastLayerStats = layerStats; - lastEncoded = encoded; - lastBytes = bytes; - last = tileFeatures; - if (archive.deduplicates() && en.likelyToBeDuplicated() && bytes != null) { - tileDataHash = generateContentHash(bytes); - } else { - tileDataHash = null; + var proto = en.toProto(); + encoded = proto.toByteArray(); + bytes = switch (config.tileCompression()) { + case GZIP -> gzip(encoded); + case NONE -> encoded; + case UNKNWON -> throw new IllegalArgumentException("cannot compress \"UNKNOWN\""); + }; + layerStats = TileStats.computeTileStats(proto); + if (encoded.length > config.tileWarningSizeBytes()) { + LOGGER.warn("{} {}kb uncompressed", + tileFeatures.tileCoord(), + encoded.length / 1024); } - lastTileDataHash = tileDataHash; } - if (skipFilled && lastIsFill) { - continue; + lastLayerStats = layerStats; + lastEncoded = encoded; + lastBytes = bytes; + last = tileFeatures; + if (archive.deduplicates() && en.likelyToBeDuplicated() && bytes != null) { + tileDataHash = generateContentHash(bytes); + } else { + tileDataHash = null; } - int zoom = tileFeatures.tileCoord().z(); - int encodedLength = encoded == null ? 0 : encoded.length; - totalTileSizesByZoom[zoom].incBy(encodedLength); - maxTileSizesByZoom[zoom].accumulate(encodedLength); - tileStatsUpdater.recordTile(tileFeatures.tileCoord(), bytes.length, layerStats); - List layerStatsRows = config.outputLayerStats() ? - TileStats.formatOutputRows(tileFeatures.tileCoord(), bytes.length, layerStats) : - List.of(); - result.add( - new TileEncodingResult( - tileFeatures.tileCoord(), - bytes, - encoded == null ? 0 : encoded.length, - tileDataHash == null ? OptionalLong.empty() : OptionalLong.of(tileDataHash), - layerStatsRows - ) - ); + lastTileDataHash = tileDataHash; + } + if (skipFilled && lastIsFill) { + continue; } - // hand result off to writer - batch.out.complete(result); + int zoom = tileFeatures.tileCoord().z(); + int encodedLength = encoded == null ? 0 : encoded.length; + totalTileSizesByZoom[zoom].incBy(encodedLength); + maxTileSizesByZoom[zoom].accumulate(encodedLength); + tileStatsUpdater.recordTile(tileFeatures.tileCoord(), bytes.length, layerStats); + List layerStatsRows = config.outputLayerStats() ? + TileStats.formatOutputRows(tileFeatures.tileCoord(), bytes.length, layerStats) : + List.of(); + result.add( + new TileEncodingResult( + tileFeatures.tileCoord(), + bytes, + encoded == null ? 0 : encoded.length, + tileDataHash == null ? OptionalLong.empty() : OptionalLong.of(tileDataHash), + layerStatsRows + ) + ); } + // hand result off to writer + batch.out.complete(result); } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java index 04116c6475..968930ffb0 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java @@ -28,9 +28,15 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.LongSummaryStatistics; +import java.util.Map; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import java.util.stream.IntStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import vector_tile.VectorTileProto; @@ -47,6 +53,7 @@ public class TileStats { .withColumnSeparator('\t') .withLineSeparator("\n"); public static final ObjectWriter WRITER = MAPPER.writer(SCHEMA); + private final List summaries = new CopyOnWriteArrayList<>(); public TileStats() { // TODO load OSM tile weights @@ -106,21 +113,20 @@ record Batch(List tiles, CompletableFuture> stats) {} VectorTileProto.Tile decoded; List layerStats = null; - try (var updater = tileStats.threadLocalUpdater()) { - for (var batch : prev) { - List lines = new ArrayList<>(batch.tiles.size()); - for (var tile : batch.tiles) { - if (!Arrays.equals(zipped, tile.bytes())) { - zipped = tile.bytes(); - unzipped = Gzip.gunzip(tile.bytes()); - decoded = VectorTileProto.Tile.parseFrom(unzipped); - layerStats = computeTileStats(decoded); - } - updater.recordTile(tile.coord(), zipped.length, layerStats); - lines.addAll(TileStats.formatOutputRows(tile.coord(), zipped.length, layerStats)); + var updater = tileStats.threadLocalUpdater(); + for (var batch : prev) { + List lines = new ArrayList<>(batch.tiles.size()); + for (var tile : batch.tiles) { + if (!Arrays.equals(zipped, tile.bytes())) { + zipped = tile.bytes(); + unzipped = Gzip.gunzip(tile.bytes()); + decoded = VectorTileProto.Tile.parseFrom(unzipped); + layerStats = computeTileStats(decoded); } - batch.stats.complete(lines); + updater.recordTile(tile.coord(), zipped.length, layerStats); + lines.addAll(TileStats.formatOutputRows(tile.coord(), zipped.length, layerStats)); } + batch.stats.complete(lines); } }); @@ -205,8 +211,8 @@ public static List computeTileStats(VectorTileProto.Tile proto) { } result.add(new LayerStats( layer.getName(), - layer.getFeaturesCount(), layer.getSerializedSize(), + layer.getFeaturesCount(), attrSize, layer.getKeysCount(), layer.getValuesCount() @@ -218,34 +224,206 @@ public static List computeTileStats(VectorTileProto.Tile proto) { public void printStats() { if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Tile stats:"); + Summary result = summary(); + var overallStats = result.get(); + var formatter = Format.defaultInstance(); + // TODO top 10 biggest tiles + // TODO other notably large tiles + LOGGER.debug("Max tile sizes:\n{}\n{}", + writeStatsTable(result, formatter::storage, SummaryCell::maxSize), + writeStatsRow(result, "gzipped", + formatter::storage, + z -> result.get(z).maxArchivedSize(), + result.get().maxArchivedSize() + ) + ); + LOGGER.debug(" # tiles: {}", formatter.integer(overallStats.numTiles())); + LOGGER.debug(" Max tile: {} (gzipped: {})", + formatter.storage(overallStats.maxSize()), + formatter.storage(overallStats.maxArchivedSize())); + // TODO weighted average tile size } - // TODO - // long sumSize = 0; - // long sumCount = 0; - // long maxMax = 0; - // for (int z = config.minzoom(); z <= config.maxzoom(); z++) { - // long totalCount = tilesByZoom[z].get(); - // long totalSize = totalTileSizesByZoom[z].get(); - // sumSize += totalSize; - // sumCount += totalCount; - // long maxSize = maxTileSizesByZoom[z].get(); - // maxMax = Math.max(maxMax, maxSize); - // LOGGER.debug("z{} avg:{} max:{}", - // z, - // format.storage(totalCount == 0 ? 0 : (totalSize / totalCount), false), - // format.storage(maxSize, false)); - // } - // LOGGER.debug("all avg:{} max:{}", - // format.storage(sumCount == 0 ? 0 : (sumSize / sumCount), false), - // format.storage(maxMax, false)); - // LOGGER.debug(" # tiles: {}", format.integer(this.tilesEmitted())); + } + + private static String writeStatsRow( + Summary result, + String firstColumn, + Function formatter, + Function extractCells, + Number lastColumn + ) { + return writeStatsRow(result, firstColumn, extractCells.andThen(formatter), formatter.apply(lastColumn)); + } + + private static String writeStatsRow( + Summary result, + String firstColumn, + Function extractStat, + String lastColumn + ) { + StringBuilder builder = new StringBuilder(); + int minZoom = result.minZoomWithData(); + int maxZoom = result.maxZoomWithData(); + List layers = result.layers().stream() + .sorted(Comparator.comparingInt(result::minZoomWithData)) + .toList(); + int maxLayerLength = Math.max(9, layers.stream().mapToInt(String::length).max().orElse(0)); + String cellFormat = "%1$5s"; + String layerFormat = "%1$" + maxLayerLength + "s"; + + builder.append(layerFormat.formatted(firstColumn)); + for (int z = minZoom; z <= maxZoom; z++) { + builder.append(cellFormat.formatted(extractStat.apply(z))); + builder.append(' '); + } + builder.append(cellFormat.formatted(lastColumn)); + return builder.toString(); + } + + private static String writeStatsTable(Summary result, Function formatter, + Function extractStat) { + StringBuilder builder = new StringBuilder(); + List layers = result.layers().stream() + .sorted(Comparator.comparingInt(result::minZoomWithData)) + .toList(); + + // header: 0 1 2 3 4 ... 15 + builder.append(writeStatsRow(result, "", z -> "z" + z, "all")).append('\n'); + + // each row: layer + for (var layer : layers) { + builder.append(writeStatsRow( + result, + layer, + formatter, + z -> extractStat.apply(result.get(z, layer)), + extractStat.apply(result.get(layer)) + )).append('\n'); + } + + // last layer: total sizes + builder.append(writeStatsRow( + result, + "full tile", + formatter, + z -> extractStat.apply(result.get(z)), + extractStat.apply(result.get()) + )); + return builder.toString(); } public Updater threadLocalUpdater() { return new Updater(); } + public static class Summary { + private final List byTile = + IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) + .mapToObj(i -> new SummaryCell()) + .toList(); + + private final List> byLayer = + IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) + .>mapToObj(i -> new HashMap<>()) + .toList(); + + public Summary merge(Summary other) { + for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { + byTile.get(z).merge(other.byTile.get(z)); + } + for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { + var ourMap = byLayer.get(z); + var theirMap = other.byLayer.get(z); + theirMap.forEach((layer, stats) -> ourMap.merge(layer, stats, SummaryCell::combine)); + } + return this; + } + + public static Summary combine(Summary a, Summary b) { + return new Summary().merge(a).merge(b); + } + + + public List layers() { + return byLayer.stream().flatMap(e -> e.keySet().stream()).distinct().sorted().toList(); + } + + public SummaryCell get(int z, String layer) { + return byLayer.get(z).getOrDefault(layer, new SummaryCell()); + } + + public SummaryCell get(String layer) { + return byLayer.stream() + .map(e -> e.getOrDefault(layer, new SummaryCell())) + .reduce(new SummaryCell(), SummaryCell::combine); + } + + public SummaryCell get(int z) { + return byTile.get(z); + } + + public SummaryCell get() { + return byTile.stream().reduce(new SummaryCell(), SummaryCell::combine); + } + + public int minZoomWithData() { + return IntStream.range(0, byTile.size()) + .filter(i -> byTile.get(i).numTiles() > 0) + .min() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + public int maxZoomWithData() { + return IntStream.range(0, byTile.size()) + .filter(i -> byTile.get(i).numTiles() > 0) + .max() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + public int minZoomWithData(String layer) { + return IntStream.range(0, byLayer.size()) + .filter(i -> byLayer.get(i).containsKey(layer)) + .min() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + } + + public static class SummaryCell { + private final LongSummaryStatistics archivedBytes = new LongSummaryStatistics(); + private final LongSummaryStatistics bytes = new LongSummaryStatistics(); + + SummaryCell(String layer) {} + + SummaryCell() {} + + public long maxSize() { + return Math.max(0, bytes.getMax()); + } + + public long maxArchivedSize() { + return Math.max(0, archivedBytes.getMax()); + } + + public long numTiles() { + return bytes.getCount(); + } + + public SummaryCell merge(SummaryCell other) { + archivedBytes.combine(other.archivedBytes); + bytes.combine(other.bytes); + return this; + } + + public static SummaryCell combine(SummaryCell a, SummaryCell b) { + return new SummaryCell().merge(a).merge(b); + } + } + + + public Summary summary() { + return summaries.stream().reduce(new Summary(), Summary::merge); + } + @JsonPropertyOrder({ "z", "x", @@ -289,15 +467,24 @@ public int compareTo(LayerStats o) { } } - public class Updater implements AutoCloseable { + public class Updater { + private final Summary summary = new Summary(); - @Override - public void close() { - // TODO report to parent + private Updater() { + summaries.add(summary); } public void recordTile(TileCoord coord, int archivedBytes, List layerStats) { - // TODO + var tileStat = summary.byTile.get(coord.z()); + var layerStat = summary.byLayer.get(coord.z()); + tileStat.archivedBytes.accept(archivedBytes); + int sum = 0; + for (var layer : layerStats) { + var cell = layerStat.computeIfAbsent(layer.layer, SummaryCell::new); + cell.bytes.accept(layer.layerBytes); + sum += layer.layerBytes; + } + tileStat.bytes.accept(sum); } } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java index 6bd67bafce..dcc42bbb91 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java @@ -33,6 +33,7 @@ import com.onthegomap.planetiler.stream.InMemoryStreamArchive; import com.onthegomap.planetiler.util.BuildInfo; import com.onthegomap.planetiler.util.Gzip; +import com.onthegomap.planetiler.util.TileStats; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -1918,6 +1919,16 @@ public void processFeature(SourceFeature source, FeatureCollector features) { assertTrue(Integer.parseInt(next.get("z")) <= 14, "bad z: " + next); } } + + // ensure tilestats standalone executable produces same output + var standaloneLayerstatsOutput = tempDir.resolve("layerstats2.tsv.gz"); + TileStats.main("--input=" + output, "--output=" + standaloneLayerstatsOutput); + byte[] standaloneData = Files.readAllBytes(standaloneLayerstatsOutput); + byte[] standaloneUncompressed = Gzip.gunzip(standaloneData); + assertEquals( + new String(uncompressed, StandardCharsets.UTF_8), + new String(standaloneUncompressed, StandardCharsets.UTF_8) + ); } else { assertFalse(Files.exists(layerstats)); } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java new file mode 100644 index 0000000000..88f3594e91 --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java @@ -0,0 +1,142 @@ +package com.onthegomap.planetiler.util; + +import static com.onthegomap.planetiler.TestUtils.newPoint; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.onthegomap.planetiler.VectorTile; +import com.onthegomap.planetiler.geo.TileCoord; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.junit.jupiter.api.Test; + +class TileStatsTest { + @Test + void computeStatsEmpty() { + var stats = TileStats.computeTileStats(new VectorTile().toProto()); + assertEquals(0, stats.size()); + } + + @Test + void computeStatsOneFeature() throws IOException { + var stats = TileStats.computeTileStats(new VectorTile() + .addLayerFeatures("layer", List.of(new VectorTile.Feature( + "layer", + 1, + VectorTile.encodeGeometry(newPoint(0, 0)), + Map.of("key1", "value1", "key2", 2) + ))) + .toProto()); + assertEquals(1, stats.size()); + var entry1 = stats.get(0); + assertEquals("layer", entry1.layer()); + assertEquals(1, entry1.layerFeatures()); + assertEquals(55, entry1.layerBytes()); + + assertEquals(18, entry1.layerAttrBytes()); + assertEquals(2, entry1.layerAttrKeys()); + assertEquals(2, entry1.layerAttrValues()); + + var formatted = TileStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); + assertEquals( + """ + z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values + 3 1 2 34 999 layer 55 1 18 2 2 + """ + .trim(), + (TileStats.headerRow() + String.join("", formatted)).trim()); + } + + @Test + void computeStats2Features() throws IOException { + var stats = TileStats.computeTileStats(new VectorTile() + .addLayerFeatures("b", List.of( + new VectorTile.Feature( + "b", + 1, + VectorTile.encodeGeometry(newPoint(0, 0)), + Map.of() + ) + )) + .addLayerFeatures("a", List.of( + new VectorTile.Feature( + "a", + 1, + VectorTile.encodeGeometry(newPoint(0, 0)), + Map.of("key1", "value1", "key2", 2) + ), + new VectorTile.Feature( + "a", + 2, + VectorTile.encodeGeometry(newPoint(1, 1)), + Map.of("key1", 2, "key2", 3) + ) + )) + .toProto()); + assertEquals(2, stats.size()); + var entry1 = stats.get(0); + assertEquals("a", entry1.layer()); + assertEquals(2, entry1.layerFeatures()); + assertEquals(72, entry1.layerBytes()); + + assertEquals(20, entry1.layerAttrBytes()); + assertEquals(2, entry1.layerAttrKeys()); + assertEquals(3, entry1.layerAttrValues()); + var entry2 = stats.get(1); + assertEquals("b", entry2.layer()); + assertEquals(1, entry2.layerFeatures()); + + var formatted = TileStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); + assertEquals( + """ + z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values + 3 1 2 34 999 a 72 2 20 2 3 + 3 1 2 34 999 b 19 1 0 0 0 + """ + .trim(), + (TileStats.headerRow() + String.join("", formatted)).trim()); + } + + @Test + void aggregateTileStats() { + var tileStats = new TileStats(); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + updater1.recordTile(TileCoord.ofXYZ(0, 0, 1), 123, List.of( + new TileStats.LayerStats("a", 1, 2, 3, 4, 5), + new TileStats.LayerStats("b", 6, 7, 8, 9, 10) + )); + updater2.recordTile(TileCoord.ofXYZ(0, 1, 1), 345, List.of( + new TileStats.LayerStats("b", 1, 2, 3, 4, 5), + new TileStats.LayerStats("c", 6, 7, 8, 9, 10) + )); + var summary = tileStats.summary(); + assertEquals(Set.of("a", "b", "c"), Set.copyOf(summary.layers())); + assertEquals(0, summary.get(0).maxSize()); + assertEquals(0, summary.get(0).numTiles()); + assertEquals(7, summary.get(1).maxSize()); + assertEquals(2, summary.get(1).numTiles()); + + assertEquals(0, summary.get(0, "a").maxSize()); + assertEquals(1, summary.get(1, "a").maxSize()); + assertEquals(6, summary.get(1, "b").maxSize()); + assertEquals(6, summary.get(1, "c").maxSize()); + + assertEquals(0, summary.get(0, "a").numTiles()); + assertEquals(1, summary.get(1, "a").numTiles()); + assertEquals(2, summary.get(1, "b").numTiles()); + assertEquals(1, summary.get(1, "c").numTiles()); + + + assertEquals(1, summary.get("a").maxSize()); + assertEquals(6, summary.get("b").maxSize()); + assertEquals(6, summary.get("c").maxSize()); + assertEquals(1, summary.get("a").numTiles()); + assertEquals(2, summary.get("b").numTiles()); + assertEquals(1, summary.get("c").numTiles()); + + assertEquals(7, summary.get().maxSize()); + assertEquals(2, summary.get().numTiles()); + } +}