From 42d8cf50cbed49798a4fea4d088135af07a248e7 Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Sat, 16 Sep 2023 14:30:03 -0400 Subject: [PATCH] biggest tiles --- .../onthegomap/planetiler/util/TileStats.java | 90 ++++++++++++++++++- .../planetiler/util/TileStatsTest.java | 50 +++++++++++ 2 files changed, 138 insertions(+), 2 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java index 968930ffb0..7c05ae9f7f 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileStats.java @@ -32,10 +32,12 @@ import java.util.List; import java.util.LongSummaryStatistics; import java.util.Map; +import java.util.PriorityQueue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.IntStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,6 +55,7 @@ public class TileStats { .withColumnSeparator('\t') .withLineSeparator("\n"); public static final ObjectWriter WRITER = MAPPER.writer(SCHEMA); + private static final int TOP_N_TILES = 10; private final List summaries = new CopyOnWriteArrayList<>(); public TileStats() { @@ -227,8 +230,40 @@ public void printStats() { Summary result = summary(); var overallStats = result.get(); var formatter = Format.defaultInstance(); - // TODO top 10 biggest tiles - // TODO other notably large tiles + var biggestTiles = overallStats.biggestTiles(); + LOGGER.debug("Biggest tiles (gzipped):\n{}", + IntStream.range(0, biggestTiles.size()) + .mapToObj(index -> { + var tile = biggestTiles.get(index); + return "%d. %d/%d/%d (%s) %s (%s)".formatted( + index + 1, + tile.coord.z(), + tile.coord.x(), + tile.coord.y(), + formatter.storage(tile.size), + tile.coord.getDebugUrl(), + tileBiggestLayers(formatter, tile) + ); + }).collect(Collectors.joining("\n")) + ); + var alreadyListed = biggestTiles.stream().map(TileSummary::coord).collect(Collectors.toSet()); + var otherTiles = result.layers().stream() + .flatMap(layer -> result.get(layer).biggestTiles().stream().limit(1)) + .filter(tile -> !alreadyListed.contains(tile.coord) && tile.size > 100_000) + .toList(); + if (!otherTiles.isEmpty()) { + LOGGER.info("Other tiles with large layers:\n{}", + otherTiles.stream() + .map(tile -> "%d/%d/%d (%s) %s (%s)".formatted( + tile.coord.z(), + tile.coord.x(), + tile.coord.y(), + formatter.storage(tile.size), + tile.coord.getDebugUrl(), + tileBiggestLayers(formatter, tile) + )).collect(Collectors.joining("\n"))); + } + LOGGER.debug("Max tile sizes:\n{}\n{}", writeStatsTable(result, formatter::storage, SummaryCell::maxSize), writeStatsRow(result, "gzipped", @@ -245,6 +280,16 @@ public void printStats() { } } + private static String tileBiggestLayers(Format formatter, TileSummary tile) { + return tile.layers.stream() + .filter( + d -> d.layerBytes > Math.min(100_000, + tile.layers.stream().mapToInt(l -> l.layerBytes).max().orElse(0))) + .sorted(Comparator.comparingInt(d -> -d.layerBytes)) + .map(d -> d.layer + ":" + formatter.storage(d.layerBytes)) + .collect(Collectors.joining(", ")); + } + private static String writeStatsRow( Summary result, String firstColumn, @@ -391,6 +436,8 @@ public int minZoomWithData(String layer) { public static class SummaryCell { private final LongSummaryStatistics archivedBytes = new LongSummaryStatistics(); private final LongSummaryStatistics bytes = new LongSummaryStatistics(); + private int bigTileCutoff = 0; + private final PriorityQueue topTiles = new PriorityQueue<>(); SummaryCell(String layer) {} @@ -411,12 +458,32 @@ public long numTiles() { public SummaryCell merge(SummaryCell other) { archivedBytes.combine(other.archivedBytes); bytes.combine(other.bytes); + for (var bigTile : other.topTiles) { + acceptBigTile(bigTile.coord, bigTile.size, bigTile.layers); + } return this; } + private void acceptBigTile(TileCoord coord, int archivedBytes, List layerStats) { + if (archivedBytes >= bigTileCutoff) { + topTiles.offer(new TileSummary(coord, archivedBytes, layerStats)); + while (topTiles.size() > TOP_N_TILES) { + topTiles.poll(); + var min = topTiles.peek(); + if (min != null) { + bigTileCutoff = min.size(); + } + } + } + } + public static SummaryCell combine(SummaryCell a, SummaryCell b) { return new SummaryCell().merge(a).merge(b); } + + public List biggestTiles() { + return topTiles.stream().sorted(Comparator.comparingLong(s -> -s.size)).toList(); + } } @@ -467,6 +534,22 @@ public int compareTo(LayerStats o) { } } + record TileSummary(TileCoord coord, int size, List layers) implements Comparable { + + @Override + public int compareTo(TileSummary o) { + int result = Integer.compare(size, o.size); + if (result == 0) { + result = Integer.compare(coord.encoded(), o.coord.encoded()); + } + return result; + } + + TileSummary withSize(int newSize) { + return new TileSummary(coord, newSize, layers); + } + } + public class Updater { private final Summary summary = new Summary(); @@ -478,10 +561,13 @@ public void recordTile(TileCoord coord, int archivedBytes, List laye var tileStat = summary.byTile.get(coord.z()); var layerStat = summary.byLayer.get(coord.z()); tileStat.archivedBytes.accept(archivedBytes); + tileStat.acceptBigTile(coord, archivedBytes, layerStats); + int sum = 0; for (var layer : layerStats) { var cell = layerStat.computeIfAbsent(layer.layer, SummaryCell::new); cell.bytes.accept(layer.layerBytes); + cell.acceptBigTile(coord, layer.layerBytes, layerStats); sum += layer.layerBytes; } tileStat.bytes.accept(sum); diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java index 88f3594e91..65592aea65 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileStatsTest.java @@ -6,6 +6,7 @@ import com.onthegomap.planetiler.VectorTile; import com.onthegomap.planetiler.geo.TileCoord; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; @@ -139,4 +140,53 @@ void aggregateTileStats() { assertEquals(7, summary.get().maxSize()); assertEquals(2, summary.get().numTiles()); } + + @Test + void topGzippedTiles() { + var tileStats = new TileStats(); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + for (int i = 0; i < 20; i++) { + (i % 2 == 0 ? updater1 : updater2).recordTile(TileCoord.decode(i), i, List.of()); + } + assertEquals( + List.of( + new TileStats.TileSummary(TileCoord.decode(19), 19, List.of()), + new TileStats.TileSummary(TileCoord.decode(18), 18, List.of()), + new TileStats.TileSummary(TileCoord.decode(17), 17, List.of()), + new TileStats.TileSummary(TileCoord.decode(16), 16, List.of()), + new TileStats.TileSummary(TileCoord.decode(15), 15, List.of()), + new TileStats.TileSummary(TileCoord.decode(14), 14, List.of()), + new TileStats.TileSummary(TileCoord.decode(13), 13, List.of()), + new TileStats.TileSummary(TileCoord.decode(12), 12, List.of()), + new TileStats.TileSummary(TileCoord.decode(11), 11, List.of()), + new TileStats.TileSummary(TileCoord.decode(10), 10, List.of()) + ), + tileStats.summary().get().biggestTiles() + ); + } + + @Test + void topLayerTiles() { + var tileStats = new TileStats(); + var updater1 = tileStats.threadLocalUpdater(); + var updater2 = tileStats.threadLocalUpdater(); + List summaries = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + var summary = new TileStats.TileSummary(TileCoord.decode(i), i, List.of( + new TileStats.LayerStats("a", i * 2, i, 0, 0, 0), + new TileStats.LayerStats("b", i * 3, i, 0, 0, 0) + )); + summaries.add(0, summary); + (i % 2 == 0 ? updater1 : updater2).recordTile(summary.coord(), summary.size(), summary.layers()); + } + assertEquals( + summaries.stream().map(d -> d.withSize(d.coord().encoded() * 2)).limit(10).toList(), + tileStats.summary().get("a").biggestTiles() + ); + assertEquals( + summaries.stream().map(d -> d.withSize(d.coord().encoded() * 3)).limit(10).toList(), + tileStats.summary().get("b").biggestTiles() + ); + } }