From c079ed047e8eebfd928923547095bf42c60046c7 Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Thu, 11 Jan 2024 06:24:41 -0500 Subject: [PATCH] add tie breaker to long merger --- .../collection/BenchmarkKWayMerge.java | 2 +- .../com/onthegomap/planetiler/VectorTile.java | 3 +- .../collection/ArrayLongMinHeap.java | 131 ++++++++++-------- .../planetiler/collection/LongMerger.java | 9 +- .../planetiler/collection/LongMinHeap.java | 6 +- .../planetiler/util/CompareArchives.java | 4 +- .../collection/LongMinHeapTest.java | 29 +++- 7 files changed, 116 insertions(+), 68 deletions(-) diff --git a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/collection/BenchmarkKWayMerge.java b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/collection/BenchmarkKWayMerge.java index 6ad42302fb..20c9548a28 100644 --- a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/collection/BenchmarkKWayMerge.java +++ b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/collection/BenchmarkKWayMerge.java @@ -15,7 +15,7 @@ public class BenchmarkKWayMerge { public static void main(String[] args) { for (int i = 0; i < 4; i++) { System.err.println(); - testMinHeap("quaternary", LongMinHeap::newArrayHeap); + testMinHeap("quaternary", n -> LongMinHeap.newArrayHeap(n, (a, b) -> 0)); System.err.println(String.join("\t", "priorityqueue", Long.toString(testPriorityQueue(10).toMillis()), diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java index 1e46cf5e42..c2c9cac2cc 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java @@ -35,6 +35,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.TreeMap; import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -80,7 +81,7 @@ public class VectorTile { // TODO make these configurable private static final int EXTENT = 4096; private static final double SIZE = 256d; - private final Map layers = new LinkedHashMap<>(); + private final Map layers = new TreeMap<>(); private LayerAttrStats.Updater.ForZoom layerStatsTracker = LayerAttrStats.Updater.ForZoom.NOOP; private static int[] getCommands(Geometry input, int scale) { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ArrayLongMinHeap.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ArrayLongMinHeap.java index 2fa8f1db50..213d781287 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ArrayLongMinHeap.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/ArrayLongMinHeap.java @@ -18,6 +18,7 @@ package com.onthegomap.planetiler.collection; import java.util.Arrays; +import java.util.function.IntBinaryOperator; /** * A min-heap stored in an array where each element has 4 children. @@ -38,24 +39,26 @@ */ class ArrayLongMinHeap implements LongMinHeap { protected static final int NOT_PRESENT = -1; - protected final int[] tree; - protected final int[] positions; - protected final long[] vals; + protected final int[] posToId; + protected final int[] idToPos; + protected final long[] posToValue; protected final int max; protected int size; + private final IntBinaryOperator tieBreaker; /** * @param elements the number of elements that can be stored in this heap. Currently the heap cannot be resized or * shrunk/trimmed after initial creation. elements-1 is the maximum id that can be stored in this heap */ - ArrayLongMinHeap(int elements) { + ArrayLongMinHeap(int elements, IntBinaryOperator tieBreaker) { // we use an offset of one to make the arithmetic a bit simpler/more efficient, the 0th elements are not used! - tree = new int[elements + 1]; - positions = new int[elements + 1]; - Arrays.fill(positions, NOT_PRESENT); - vals = new long[elements + 1]; - vals[0] = Long.MIN_VALUE; + posToId = new int[elements + 1]; + idToPos = new int[elements + 1]; + Arrays.fill(idToPos, NOT_PRESENT); + posToValue = new long[elements + 1]; + posToValue[0] = Long.MIN_VALUE; this.max = elements; + this.tieBreaker = tieBreaker; } private static int firstChild(int index) { @@ -87,58 +90,62 @@ public void push(int id, long value) { " was pushed already, you need to use the update method if you want to change its value"); } size++; - tree[size] = id; - positions[id] = size; - vals[size] = value; + posToId[size] = id; + idToPos[id] = size; + posToValue[size] = value; percolateUp(size); } @Override public boolean contains(int id) { checkIdInRange(id); - return positions[id] != NOT_PRESENT; + return idToPos[id] != NOT_PRESENT; } @Override public void update(int id, long value) { checkIdInRange(id); - int index = positions[id]; + int index = idToPos[id]; if (index < 0) { throw new IllegalStateException( "The heap does not contain: " + id + ". Use the contains method to check this before calling update"); } - long prev = vals[index]; - vals[index] = value; - if (value > prev) { // TODO + long prev = posToValue[index]; + posToValue[index] = value; + int cmp = Long.compare(value, prev); + if (cmp == 0 && value != Long.MIN_VALUE) { + cmp = tieBreaker.applyAsInt(id, posToId[index]); + } + if (cmp > 0) { percolateDown(index); - } else if (value < prev) { // TODO + } else if (cmp < 0) { percolateUp(index); } } @Override public void updateHead(long value) { - vals[1] = value; + posToValue[1] = value; percolateDown(1); } @Override public int peekId() { - return tree[1]; + return posToId[1]; } @Override public long peekValue() { - return vals[1]; + return posToValue[1]; } @Override public int poll() { int id = peekId(); - tree[1] = tree[size]; - vals[1] = vals[size]; - positions[tree[1]] = 1; - positions[id] = NOT_PRESENT; + posToId[1] = posToId[size]; + posToValue[1] = posToValue[size]; + idToPos[posToId[1]] = 1; + idToPos[id] = NOT_PRESENT; size--; percolateDown(1); return id; @@ -147,29 +154,30 @@ public int poll() { @Override public void clear() { for (int i = 1; i <= size; i++) { - positions[tree[i]] = NOT_PRESENT; + idToPos[posToId[i]] = NOT_PRESENT; } size = 0; } - private void percolateUp(int index) { - assert index != 0; - if (index == 1) { + private void percolateUp(int pos) { + assert pos != 0; + if (pos == 1) { return; } - final int el = tree[index]; - final long val = vals[index]; + final int id = posToId[pos]; + final long val = posToValue[pos]; // the finish condition (index==0) is covered here automatically because we set vals[0]=-inf int parent; long parentValue; - while (val < (parentValue = vals[parent = parent(index)])) { // TODO - vals[index] = parentValue; - positions[tree[index] = tree[parent]] = index; - index = parent; + while (val < (parentValue = posToValue[parent = parent(pos)]) || + (val == parentValue && val != Long.MIN_VALUE && tieBreaker.applyAsInt(id, posToId[parent]) < 0)) { + posToValue[pos] = parentValue; + idToPos[posToId[pos] = posToId[parent]] = pos; + pos = parent; } - tree[index] = el; - vals[index] = val; - positions[tree[index]] = index; + posToId[pos] = id; + posToValue[pos] = val; + idToPos[posToId[pos]] = pos; } private void checkIdInRange(int id) { @@ -178,45 +186,52 @@ private void checkIdInRange(int id) { } } - private void percolateDown(int index) { + private void percolateDown(int pos) { if (size == 0) { return; } - assert index > 0; - assert index <= size; - final int el = tree[index]; - final long val = vals[index]; + assert pos > 0; + assert pos <= size; + final int id = posToId[pos]; + final long value = posToValue[pos]; int child; - while ((child = firstChild(index)) <= size) { + while ((child = firstChild(pos)) <= size) { // optimization: this is a very hot code path for performance of k-way merging, // so manually-unroll the loop over the 4 child elements to find the minimum value int minChild = child; - long minValue = vals[child], value; + long minValue = posToValue[child], childValue; if (++child <= size) { - if ((value = vals[child]) < minValue) { // TODO + if ((childValue = posToValue[child]) < minValue || + (childValue == minValue && childValue != Long.MIN_VALUE && + tieBreaker.applyAsInt(posToId[child], posToId[minChild]) < 0)) { minChild = child; - minValue = value; + minValue = childValue; } if (++child <= size) { - if ((value = vals[child]) < minValue) { // TODO + if ((childValue = posToValue[child]) < minValue || + (childValue == minValue && childValue != Long.MIN_VALUE && + tieBreaker.applyAsInt(posToId[child], posToId[minChild]) < 0)) { minChild = child; - minValue = value; + minValue = childValue; } - if (++child <= size && (value = vals[child]) < minValue) { // TODO + if (++child <= size && ((childValue = posToValue[child]) < minValue || + (childValue == minValue && childValue != Long.MIN_VALUE && + tieBreaker.applyAsInt(posToId[child], posToId[minChild]) < 0))) { minChild = child; - minValue = value; + minValue = childValue; } } } - if (minValue >= val) { // TODO ??? + if (minValue > value || + (minValue == value && minValue != Long.MIN_VALUE && tieBreaker.applyAsInt(posToId[minChild], id) >= 0)) { break; } - vals[index] = minValue; - positions[tree[index] = tree[minChild]] = index; - index = minChild; + posToValue[pos] = minValue; + idToPos[posToId[pos] = posToId[minChild]] = pos; + pos = minChild; } - tree[index] = el; - vals[index] = val; - positions[el] = index; + posToId[pos] = id; + posToValue[pos] = value; + idToPos[id] = pos; } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMerger.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMerger.java index 3918e299c7..9a8951c7f9 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMerger.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMerger.java @@ -30,7 +30,7 @@ public static Iterator mergeIterators(List iterators.get(0); case 2 -> new TwoWayMerge<>(iterators.get(0), iterators.get(1), tieBreaker); case 3 -> new ThreeWayMerge<>(iterators.get(0), iterators.get(1), iterators.get(2), tieBreaker); - default -> new KWayMerge<>(iterators); + default -> new KWayMerge<>(iterators, tieBreaker); }; } @@ -182,10 +182,13 @@ private static class KWayMerge implements Iterator private final LongMinHeap heap; @SuppressWarnings("unchecked") - KWayMerge(List> inputIterators) { + KWayMerge(List> inputIterators, Comparator tieBreaker) { this.iterators = new Iterator[inputIterators.size()]; this.items = (T[]) new HasLongSortKey[inputIterators.size()]; - this.heap = LongMinHeap.newArrayHeap(inputIterators.size()); + final int size = inputIterators.size(); + this.heap = LongMinHeap.newArrayHeap(inputIterators.size(), (a, b) -> { + return a >= size || b >= size ? 0 : tieBreaker.compare(items[a], items[b]); + }); int outIdx = 0; for (Iterator iter : inputIterators) { if (iter.hasNext()) { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMinHeap.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMinHeap.java index f29985e995..2b47b75b18 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMinHeap.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongMinHeap.java @@ -17,6 +17,8 @@ */ package com.onthegomap.planetiler.collection; +import java.util.function.IntBinaryOperator; + /** * API for min-heaps that keeps track of {@code int} keys in a range from {@code [0, size)} ordered by {@code long} * values. @@ -31,8 +33,8 @@ public interface LongMinHeap { *

* This is slightly faster than a traditional binary min heap due to a shallower, more cache-friendly memory layout. */ - static LongMinHeap newArrayHeap(int elements) { - return new ArrayLongMinHeap(elements); + static LongMinHeap newArrayHeap(int elements, IntBinaryOperator tieBreaker) { + return new ArrayLongMinHeap(elements, tieBreaker); } int size(); diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CompareArchives.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CompareArchives.java index 0a73c7e521..e7406971ea 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CompareArchives.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CompareArchives.java @@ -205,8 +205,8 @@ private void compareLayer(VectorTileProto.Tile.Layer layer1, VectorTileProto.Til compareList(name, "keys list", layer1.getKeysList(), layer2.getKeysList()); compareList(name, "values list", layer1.getValuesList(), layer2.getValuesList()); if (compareValues(name, "features count", layer1.getFeaturesCount(), layer2.getFeaturesCount())) { - var ids1 = layer1.getFeaturesList().stream().map(f -> f.getId()); - var ids2 = layer1.getFeaturesList().stream().map(f -> f.getId()); + var ids1 = layer1.getFeaturesList().stream().map(f -> f.getId()).toList(); + var ids2 = layer1.getFeaturesList().stream().map(f -> f.getId()).toList(); if (compareValues(name, "feature ids", Set.of(ids1), Set.of(ids2)) && compareValues(name, "feature order", ids1, ids2)) { for (int i = 0; i < layer1.getFeaturesCount() && i < layer2.getFeaturesCount(); i++) { diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/LongMinHeapTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/LongMinHeapTest.java index 7c3d787f8e..65ece71268 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/LongMinHeapTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/LongMinHeapTest.java @@ -29,6 +29,8 @@ import java.util.PriorityQueue; import java.util.Random; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; /** @@ -42,7 +44,7 @@ class LongMinHeapTest { protected LongMinHeap heap; void create(int capacity) { - heap = LongMinHeap.newArrayHeap(capacity); + heap = LongMinHeap.newArrayHeap(capacity, Integer::compare); } @Test @@ -77,6 +79,31 @@ void duplicateElements() { assertThrows(IllegalStateException.class, () -> heap.push(2, 4L)); } + @ParameterizedTest + @CsvSource({ + "0, 1, 2, 3, 4, 5", + "5, 4, 3, 2, 1, 0", + "0, 1, 2, 5, 4, 3", + "0, 1, 5, 2, 4, 3", + "0, 5, 1, 2, 4, 3", + "5, 0, 1, 2, 4, 3", + }) + void tieBreaker(int a, int b, int c, int d, int e, int f) { + heap = LongMinHeap.newArrayHeap(6, (id1, id2) -> -Integer.compare(id1, id2)); + heap.push(a, 0L); + heap.push(b, 0L); + heap.push(c, 0L); + heap.push(d, 0L); + heap.push(e, 0L); + heap.push(f, 0L); + assertEquals(5, heap.poll()); + assertEquals(4, heap.poll()); + assertEquals(3, heap.poll()); + assertEquals(2, heap.poll()); + assertEquals(1, heap.poll()); + assertEquals(0, heap.poll()); + } + @Test void testContains() { create(4);