diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java index a871947f0b..60ebcc1a14 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java @@ -374,12 +374,11 @@ private void tileWriter(Iterable tileBatches) throws ExecutionExcepti archive.finish(tileArchiveMetadata); } + @SuppressWarnings("java:S2629") private void printTileStats() { - if (LOGGER.isDebugEnabled()) { - Format format = Format.defaultInstance(); - tileStats.printStats(config.debugUrlPattern()); - LOGGER.debug(" # features: {}", format.integer(featuresProcessed.get())); - } + Format format = Format.defaultInstance(); + tileStats.printStats(config.debugUrlPattern()); + LOGGER.debug(" # features: {}", format.integer(featuresProcessed.get())); } private long tilesEmitted() { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java index 3b14c9ba67..d7c532e458 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java @@ -159,9 +159,7 @@ record Batch(List tiles, CompletableFuture> stats) {} loggers.awaitAndLog(joinFutures(readBranch.done(), writeBranch.done()), config.logInterval()); timer.stop(); - if (LOGGER.isDebugEnabled()) { - tileStats.printStats(config.debugUrlPattern()); - } + tileStats.printStats(config.debugUrlPattern()); stats.printSummary(); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java index ab6c266c7c..095c24db27 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TilesetSummaryStatistics.java @@ -15,13 +15,22 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Utility that computes min/max/average sizes for each vector tile layers at each zoom level, then computes combined + * summary statistics at the end. + *

+ * Provide a {@link TileWeights} instance to compute weighted average tile sizes based on actual tile traffic. + */ public class TilesetSummaryStatistics { - private final TileWeights tileWeights; private static final int TOP_N_TILES = 10; private static final int WARN_BYTES = 100_000; private static final int ERROR_BYTES = 500_000; private static final Logger LOGGER = LoggerFactory.getLogger(TilesetSummaryStatistics.class); + private final TileWeights tileWeights; + + // instead of threads updating concurrent data structures, each thread gets a thread-local + // Summary instance it can update without contention that are combined at the end. private final List

summaries = new CopyOnWriteArrayList<>(); public TilesetSummaryStatistics(TileWeights tileWeights) { @@ -32,81 +41,6 @@ public TilesetSummaryStatistics() { this(new TileWeights()); } - public Summary summary() { - return summaries.stream().reduce(new Summary(), Summary::mergeIn); - } - - public void printStats(String debugUrlPattern) { - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Tile stats:"); - Summary result = summary(); - var overallStats = result.get(); - var formatter = Format.defaultInstance(); - var biggestTiles = overallStats.biggestTiles(); - LOGGER.debug("Biggest tiles (gzipped)\n{}", - IntStream.range(0, biggestTiles.size()) - .mapToObj(index -> { - var tile = biggestTiles.get(index); - return "%d. %d/%d/%d (%s) %s (%s)".formatted( - index + 1, - tile.coord.z(), - tile.coord.x(), - tile.coord.y(), - formatter.storage(tile.size), - tile.coord.getDebugUrl(debugUrlPattern), - tileBiggestLayers(formatter, tile) - ); - }).collect(Collectors.joining("\n")) - ); - var alreadyListed = biggestTiles.stream().map(TileSummary::coord).collect(Collectors.toSet()); - var otherTiles = result.layers().stream() - .flatMap(layer -> result.get(layer).biggestTiles().stream().limit(1)) - .filter(tile -> !alreadyListed.contains(tile.coord) && tile.size > WARN_BYTES) - .toList(); - if (!otherTiles.isEmpty()) { - LOGGER.info("Other tiles with large layers\n{}", - otherTiles.stream() - .map(tile -> "%d/%d/%d (%s) %s (%s)".formatted( - tile.coord.z(), - tile.coord.x(), - tile.coord.y(), - formatter.storage(tile.size), - tile.coord.getDebugUrl(debugUrlPattern), - tileBiggestLayers(formatter, tile) - )).collect(Collectors.joining("\n"))); - } - - LOGGER.debug("Max tile sizes\n{}\n{}\n{}", - writeStatsTable(result, n -> { - String string = " " + formatter.storage(n, true); - return n.intValue() > ERROR_BYTES ? AnsiColors.red(string) : - n.intValue() > WARN_BYTES ? AnsiColors.yellow(string) : - string; - }, Cell::maxSize), - writeStatsRow(result, "full tile", - formatter::storage, - z -> result.get(z).maxSize(), - result.get().maxSize() - ), - writeStatsRow(result, "gzipped", - formatter::storage, - z -> result.get(z).maxArchivedSize(), - result.get().maxArchivedSize() - ) - ); - LOGGER.debug(" Max tile: {} (gzipped: {})", - formatter.storage(overallStats.maxSize()), - formatter.storage(overallStats.maxArchivedSize())); - LOGGER.debug(" Avg tile: {} (gzipped: {}) {}", - formatter.storage(overallStats.weightedAverageSize()), - formatter.storage(overallStats.weightedAverageArchivedSize()), - overallStats.totalWeight <= 0 ? - "no tile weights, use --download-osm-tile-weights for weighted average" : - "using weighted average based on OSM traffic"); - LOGGER.debug(" # tiles: {}", formatter.integer(overallStats.numTiles())); - } - } - private static String tileBiggestLayers(Format formatter, TileSummary tile) { int minSize = tile.layers.stream().mapToInt(l -> l.layerBytes()).max().orElse(0); return tile.layers.stream() @@ -116,17 +50,17 @@ private static String tileBiggestLayers(Format formatter, TileSummary tile) { .collect(Collectors.joining(", ")); } - private static String writeStatsRow( + private static String writeStatsTableRow( Summary result, String firstColumn, Function formatter, Function extractCells, Number lastColumn ) { - return writeStatsRow(result, firstColumn, extractCells.andThen(formatter), formatter.apply(lastColumn)); + return writeStatsTableRow(result, firstColumn, extractCells.andThen(formatter), formatter.apply(lastColumn)); } - private static String writeStatsRow( + private static String writeStatsTableRow( Summary result, String firstColumn, Function extractStat, @@ -159,11 +93,11 @@ private static String writeStatsTable(Summary result, Function f .toList(); // header: 0 1 2 3 4 ... 15 - builder.append(writeStatsRow(result, "", z -> "z" + z, "all")).append('\n'); + builder.append(writeStatsTableRow(result, "", z -> "z" + z, "all")).append('\n'); // each row: layer for (var layer : layers) { - builder.append(writeStatsRow( + builder.append(writeStatsTableRow( result, layer, formatter, @@ -174,127 +108,147 @@ private static String writeStatsTable(Summary result, Function f return builder.toString().stripTrailing(); } - public Updater threadLocalUpdater() { - return new Updater(); + /** Returns a combined {@link Summary} from each thread's {@link Updater}. */ + public Summary summary() { + return summaries.stream().reduce(new Summary(), Summary::mergeIn); } - public class Summary { - - private final List byTile = - IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) - .mapToObj(i -> new Cell()) - .toList(); - - private final List> byLayer = - IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) - .>mapToObj(i -> new HashMap<>()) - .toList(); - - public Summary mergeIn(Summary other) { - for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { - byTile.get(z).mergeIn(other.byTile.get(z)); - } - for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { - var ourMap = byLayer.get(z); - var theirMap = other.byLayer.get(z); - theirMap.forEach((layer, stats) -> ourMap.merge(layer, stats, Cell::combine)); - } - return this; - } - - public List layers() { - return byLayer.stream().flatMap(e -> e.keySet().stream()).distinct().sorted().toList(); - } - - public Cell get(int z, String layer) { - return byLayer.get(z).getOrDefault(layer, new Cell()); - } - - public Cell get(String layer) { - return combineZooms(byLayer.stream() - .map(e -> e.getOrDefault(layer, new Cell())) - .toList()); - } - - public Cell get(int z) { - return byTile.get(z); - } - - public Cell get() { - return combineZooms(byTile); - } - - private Cell combineZooms(List byTile) { - double sumWeight = 0; - double preSumWeight = 0; - for (int z = 0; z < byTile.size(); z++) { - var cell = byTile.get(z); - long zoomWeight = tileWeights.getZoomWeight(z); - if (cell.numTiles() > 0 && zoomWeight > 0) { - sumWeight += zoomWeight; - preSumWeight += cell.totalWeight; - } - } - boolean noData = sumWeight == 0 || preSumWeight == 0; - Cell result = new Cell(); - for (int z = 0; z < byTile.size(); z++) { - var cell = byTile.get(z); - long zoomWeight = tileWeights.getZoomWeight(z); - if ((cell.numTiles() > 0 && zoomWeight > 0) || noData) { - double weight = noData ? 1 : (zoomWeight / sumWeight) / (cell.totalWeight / preSumWeight); - result.mergeIn(cell, weight); - } - } - return result; - } - - public int minZoomWithData() { - return IntStream.range(0, byTile.size()) - .filter(i -> byTile.get(i).numTiles() > 0) - .min() - .orElse(PlanetilerConfig.MAX_MAXZOOM); + /** Logs biggest tiles, max layer size by zoom, and weighted average tile sizes. */ + @SuppressWarnings("java:S2629") + public void printStats(String debugUrlPattern) { + LOGGER.debug("Tile stats:"); + Summary result = summary(); + var overallStats = result.get(); + var formatter = Format.defaultInstance(); + var biggestTiles = overallStats.biggestTiles(); + LOGGER.debug("Biggest tiles (gzipped)\n{}", + IntStream.range(0, biggestTiles.size()) + .mapToObj(index -> { + var tile = biggestTiles.get(index); + return "%d. %d/%d/%d (%s) %s (%s)".formatted( + index + 1, + tile.coord.z(), + tile.coord.x(), + tile.coord.y(), + formatter.storage(tile.archivedSize), + tile.coord.getDebugUrl(debugUrlPattern), + tileBiggestLayers(formatter, tile) + ); + }).collect(Collectors.joining("\n")) + ); + var alreadyListed = biggestTiles.stream().map(TileSummary::coord).collect(Collectors.toSet()); + var otherTiles = result.layers().stream() + .flatMap(layer -> result.get(layer).biggestTiles().stream().limit(1)) + .filter(tile -> !alreadyListed.contains(tile.coord) && tile.archivedSize > WARN_BYTES) + .toList(); + if (!otherTiles.isEmpty()) { + LOGGER.info("Other tiles with large layers\n{}", + otherTiles.stream() + .map(tile -> "%d/%d/%d (%s) %s (%s)".formatted( + tile.coord.z(), + tile.coord.x(), + tile.coord.y(), + formatter.storage(tile.archivedSize), + tile.coord.getDebugUrl(debugUrlPattern), + tileBiggestLayers(formatter, tile) + )).collect(Collectors.joining("\n"))); } - public int maxZoomWithData() { - return IntStream.range(0, byTile.size()) - .filter(i -> byTile.get(i).numTiles() > 0) - .max() - .orElse(PlanetilerConfig.MAX_MAXZOOM); - } + LOGGER.debug("Max tile sizes\n{}\n{}\n{}", + writeStatsTable(result, n -> { + String string = " " + formatter.storage(n, true); + return n.intValue() > ERROR_BYTES ? AnsiColors.red(string) : + n.intValue() > WARN_BYTES ? AnsiColors.yellow(string) : + string; + }, Cell::maxSize), + writeStatsTableRow(result, "full tile", + formatter::storage, + z -> result.get(z).maxSize(), + result.get().maxSize() + ), + writeStatsTableRow(result, "gzipped", + formatter::storage, + z -> result.get(z).maxArchivedSize(), + result.get().maxArchivedSize() + ) + ); + LOGGER.debug(" Max tile: {} (gzipped: {})", + formatter.storage(overallStats.maxSize()), + formatter.storage(overallStats.maxArchivedSize())); + LOGGER.debug(" Avg tile: {} (gzipped: {}) {}", + formatter.storage(overallStats.weightedAverageSize()), + formatter.storage(overallStats.weightedAverageArchivedSize()), + overallStats.totalWeight <= 0 ? + "no tile weights, use --download-osm-tile-weights for weighted average" : + "using weighted average based on OSM traffic"); + LOGGER.debug(" # tiles: {}", formatter.integer(overallStats.numTiles())); + } - public int minZoomWithData(String layer) { - return IntStream.range(0, byLayer.size()) - .filter(i -> byLayer.get(i).containsKey(layer)) - .min() - .orElse(PlanetilerConfig.MAX_MAXZOOM); - } + /** + * Returns an {@link Updater} that accepts individual tile layer stats from a thread that will eventually be combined + * into the final tileset report. + */ + public Updater threadLocalUpdater() { + return new Updater(); } + /** Aggregated statistics for a layer/zoom, layer, zoom, or entire tileset. */ public static class Cell { - private long weightedBytesSum; - private long weightedArchivedBytesSum; - private long totalWeight; private final LongSummaryStatistics archivedBytes = new LongSummaryStatistics(); private final LongSummaryStatistics bytes = new LongSummaryStatistics(); private final PriorityQueue topTiles = new PriorityQueue<>(); + private long weightedBytesSum; + private long weightedArchivedBytesSum; + private long totalWeight; private int bigTileCutoff = 0; private static Cell combine(Cell a, Cell b) { return new Cell().mergeIn(a).mergeIn(b); } + /** Max raw layer bytes (or tile size when aggregated over all layers). */ public long maxSize() { return Math.max(0, bytes.getMax()); } + /** Max gzipped tile bytes (or 0 when broken-out by layer). */ public long maxArchivedSize() { return Math.max(0, archivedBytes.getMax()); } + /** Total tiles included in this aggregation. */ public long numTiles() { return bytes.getCount(); } + /** + * Returns the biggest tiles in this aggregation by gzipped size (when aggregated over all layers) or raw size + * within an individual layer. + */ + public List biggestTiles() { + return topTiles.stream().sorted(Comparator.comparingLong(s -> -s.archivedSize)).toList(); + } + + /** + * Returns average gzipped tile size in this aggregation, weighted by the {@link TileWeights} instance provided. + *

+ * When multiple zoom-levels are combined, the weighted average respects the weight-per-zoom-level from + * {@link TileWeights} so that low zoom tiles are not overweighted when analyzing a small extract. + */ + public double weightedAverageArchivedSize() { + return totalWeight == 0 ? archivedBytes.getAverage() : (weightedArchivedBytesSum * 1d / totalWeight); + } + + /** + * Returns average raw (not gzipped) tile size in this aggregation, weighted by the {@link TileWeights} instance + * provided. + * + * @see #weightedAverageArchivedSize() + */ + public double weightedAverageSize() { + return totalWeight == 0 ? bytes.getAverage() : (weightedBytesSum * 1d / totalWeight); + } + private Cell mergeIn(Cell other) { totalWeight += other.totalWeight; weightedBytesSum += other.weightedBytesSum; @@ -302,7 +256,7 @@ private Cell mergeIn(Cell other) { archivedBytes.combine(other.archivedBytes); bytes.combine(other.bytes); for (var bigTile : other.topTiles) { - acceptBigTile(bigTile.coord, bigTile.size, bigTile.layers); + acceptBigTile(bigTile.coord, bigTile.archivedSize, bigTile.layers); } return this; } @@ -314,12 +268,11 @@ private Cell mergeIn(Cell other, double weight) { archivedBytes.combine(other.archivedBytes); bytes.combine(other.bytes); for (var bigTile : other.topTiles) { - acceptBigTile(bigTile.coord, bigTile.size, bigTile.layers); + acceptBigTile(bigTile.coord, bigTile.archivedSize, bigTile.layers); } return this; } - private void acceptBigTile(TileCoord coord, int archivedBytes, List layerStats) { if (archivedBytes >= bigTileCutoff) { topTiles.offer(new TileSummary(coord, archivedBytes, layerStats)); @@ -327,31 +280,20 @@ private void acceptBigTile(TileCoord coord, int archivedBytes, List biggestTiles() { - return topTiles.stream().sorted(Comparator.comparingLong(s -> -s.size)).toList(); - } - - public double weightedAverageArchivedSize() { - return totalWeight == 0 ? archivedBytes.getAverage() : (weightedArchivedBytesSum * 1d / totalWeight); - } - - public double weightedAverageSize() { - return totalWeight == 0 ? bytes.getAverage() : (weightedBytesSum * 1d / totalWeight); - } } - public record TileSummary(TileCoord coord, int size, List layers) + /** Statistics for a tile and its layers. */ + public record TileSummary(TileCoord coord, int archivedSize, List layers) implements Comparable { @Override public int compareTo(TileSummary o) { - int result = Integer.compare(size, o.size); + int result = Integer.compare(archivedSize, o.archivedSize); if (result == 0) { result = Integer.compare(coord.encoded(), o.coord.encoded()); } @@ -363,6 +305,110 @@ TileSummary withSize(int newSize) { } } + /** Overall summary statistics for a tileset, aggregated from all {@link Updater Updaters}. */ + public class Summary { + + private final List byTile = + IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) + .mapToObj(i -> new Cell()) + .toList(); + + private final List> byLayer = + IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM) + .>mapToObj(i -> new HashMap<>()) + .toList(); + + /** All the layers that appear in the tileset. */ + public List layers() { + return byLayer.stream().flatMap(e -> e.keySet().stream()).distinct().sorted().toList(); + } + + /** Returns the summary statistics for a layer at a zoom level. */ + public Cell get(int z, String layer) { + return byLayer.get(z).getOrDefault(layer, new Cell()); + } + + /** Returns the summary statistics for a layer from all zoom levels. */ + public Cell get(String layer) { + return combineZooms(byLayer.stream() + .map(e -> e.getOrDefault(layer, new Cell())) + .toList()); + } + + /** Returns the summary statistics for a zoom level from all layers. */ + public Cell get(int z) { + return byTile.get(z); + } + + /** Returns the summary statistics for the entire dataset by aggregating all layers and zoom-levels. */ + public Cell get() { + return combineZooms(byTile); + } + + /** Returns the minimum zoom a tile appears at in the tileset. */ + public int minZoomWithData() { + return IntStream.range(0, byTile.size()) + .filter(i -> byTile.get(i).numTiles() > 0) + .min() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + /** Returns the maximum zoom a tile appears at in the tileset. */ + public int maxZoomWithData() { + return IntStream.range(0, byTile.size()) + .filter(i -> byTile.get(i).numTiles() > 0) + .max() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + /** Returns the minimum zoom a specific layer appears at in the tileset. */ + public int minZoomWithData(String layer) { + return IntStream.range(0, byLayer.size()) + .filter(i -> byLayer.get(i).containsKey(layer)) + .min() + .orElse(PlanetilerConfig.MAX_MAXZOOM); + } + + private Summary mergeIn(Summary other) { + for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { + byTile.get(z).mergeIn(other.byTile.get(z)); + } + for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) { + var ourMap = byLayer.get(z); + var theirMap = other.byLayer.get(z); + theirMap.forEach((layer, stats) -> ourMap.merge(layer, stats, Cell::combine)); + } + return this; + } + + private Cell combineZooms(List byTile) { + // aggregate Cells over zoom levels, but respect the global zoom-level weights + // from TileWeights + double sumWeight = 0; + double preSumWeight = 0; + for (int z = 0; z < byTile.size(); z++) { + var cell = byTile.get(z); + long zoomWeight = tileWeights.getZoomWeight(z); + if (cell.numTiles() > 0 && zoomWeight > 0) { + sumWeight += zoomWeight; + preSumWeight += cell.totalWeight; + } + } + boolean noData = sumWeight == 0 || preSumWeight == 0; + Cell result = new Cell(); + for (int z = 0; z < byTile.size(); z++) { + var cell = byTile.get(z); + long zoomWeight = tileWeights.getZoomWeight(z); + if ((cell.numTiles() > 0 && zoomWeight > 0) || noData) { + double weight = noData ? 1 : (zoomWeight / sumWeight) / (cell.totalWeight / preSumWeight); + result.mergeIn(cell, weight); + } + } + return result; + } + } + + /** Thread local updater that accepts individual statistics for each tile. */ public class Updater { private final Summary summary = new Summary(); @@ -370,6 +416,10 @@ private Updater() { summaries.add(summary); } + private static Cell newCell(String layer) { + return new Cell(); + } + public void recordTile(TileCoord coord, int archivedBytes, List layerStats) { var tileStat = summary.byTile.get(coord.z()); var layerStat = summary.byLayer.get(coord.z()); @@ -391,9 +441,5 @@ public void recordTile(TileCoord coord, int archivedBytes, List toDownload) { CompletableFuture result = new CompletableFuture<>(); var timer = stats.startStage("top-osm-tiles"); diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java index b6d18b6127..0b23cfd9dd 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java @@ -88,7 +88,7 @@ void topLayerTiles() { new TileSizeStats.LayerStats("b", i * 3, i, 0, 0, 0) )); summaries.add(0, summary); - (i % 2 == 0 ? updater1 : updater2).recordTile(summary.coord(), summary.size(), summary.layers()); + (i % 2 == 0 ? updater1 : updater2).recordTile(summary.coord(), summary.archivedSize(), summary.layers()); } assertEquals( summaries.stream().map(d -> d.withSize(d.coord().encoded() * 2)).limit(10).toList(),