Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

215 deterministic ordering v2 #3

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0ec539d
added unit test to cover equals() and compareTo() in SortableFeature
phanecak-maptiler Oct 19, 2022
3bed0d3
include value if keys match exactly (see issue #214 and #215)
phanecak-maptiler Oct 19, 2022
b6b53b2
LongMerger&co. refactored into SortableFeatureMerger&co. + tests exte…
phanecak-maptiler Oct 19, 2022
3c7ec26
clean-up/dedup of recent additions (1/x)
phanecak-maptiler Oct 19, 2022
70e4626
clean-up/dedup of recent additions (2/x)
phanecak-maptiler Oct 19, 2022
9dff7ed
clean-up/dedup of recent additions (3/x)
phanecak-maptiler Oct 19, 2022
1737ece
clean-up/dedup of recent additions (4/4)
phanecak-maptiler Oct 19, 2022
169dfeb
clean-up
phanecak-maptiler Oct 19, 2022
97f45f4
clean-up: isLessThanParent() variant used only once and small -> refa…
phanecak-maptiler Oct 20, 2022
e0e3795
clean-up: removed comments
phanecak-maptiler Oct 20, 2022
58d8c2a
FeatureGroup tests extended with features which have save SortableFea…
phanecak-maptiler Oct 31, 2022
f9617aa
preliminary fix for random ordering of attributes in SortableFeature'…
phanecak-maptiler Oct 31, 2022
74167d2
previous fix reworked: avoid sort(), but DO stick with TreeMap for at…
phanecak-maptiler Oct 31, 2022
0ffc974
use ne_id as ID for natrural earth features
phanecak-maptiler Nov 4, 2022
2cfe2bc
use source ID to avoid random IDs (yes, idGenerator gives us a sequen…
phanecak-maptiler Nov 4, 2022
2356e92
Merge branch '215-SortableFeatures-deterministic_value' into xxx-215-…
phanecak-maptiler Nov 4, 2022
8413e1c
Merge branch 'main' into 215-deterministic_ordering-v2
phanecak-maptiler Nov 8, 2022
0426da8
added unit test updateWithEqualKeys() to increase overage of adjusted…
phanecak-maptiler Nov 9, 2022
a9c8a2c
clean-up: tagsCount no longer used after previous change
phanecak-maptiler Nov 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
package com.onthegomap.planetiler.collection;

import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.PriorityQueue;
import java.util.Random;
import java.util.function.IntFunction;
import java.util.stream.IntStream;

/**
* Performance tests for {@link LongMinHeap} implementations.
* Performance tests for {@link SortableFeatureMinHeap} implementations.
*
* Times how long it takes to merge N sorted lists of random elements.
*/
public class BenchmarkKWayMerge {

public static void main(String[] args) {
for (int i = 0; i < 4; i++) {
System.err.println();
testMinHeap("quaternary", LongMinHeap::newArrayHeap);
testMinHeap("quaternary", SortableFeatureMinHeap::newArrayHeap);
System.err.println(String.join("\t",
"priorityqueue",
Long.toString(testPriorityQueue(10).toMillis()),
Expand All @@ -25,7 +27,7 @@ public static void main(String[] args) {
}
}

private static void testMinHeap(String name, IntFunction<LongMinHeap> constructor) {
private static void testMinHeap(String name, IntFunction<SortableFeatureMinHeap> constructor) {
System.err.println(String.join("\t",
name,
Long.toString(testUpdates(10, constructor).toMillis()),
Expand All @@ -36,20 +38,28 @@ private static void testMinHeap(String name, IntFunction<LongMinHeap> constructo

private static final Random random = new Random();

private static long[][] getVals(int size) {
final static ByteBuffer byteBuffer = ByteBuffer.allocate(Long.BYTES);

private static SortableFeature newVal(long i) {
byteBuffer.clear().putLong(i);
return new SortableFeature(i, byteBuffer.array());
}

private static SortableFeature[][] getVals(int size) {
int num = 10_000_000;
return IntStream.range(0, size)
.mapToObj(i -> random
.longs(0, 1_000_000_000)
.limit(num / size)
.sorted()
.toArray()
).toArray(long[][]::new);
.mapToObj(BenchmarkKWayMerge::newVal)
.toArray(SortableFeature[]::new)
).toArray(SortableFeature[][]::new);
}

private static Duration testUpdates(int size, IntFunction<LongMinHeap> heapFn) {
private static Duration testUpdates(int size, IntFunction<SortableFeatureMinHeap> heapFn) {
int[] indexes = new int[size];
long[][] vals = getVals(size);
SortableFeature[][] vals = getVals(size);
var heap = heapFn.apply(size);
for (int i = 0; i < size; i++) {
heap.push(i, vals[i][indexes[i]++]);
Expand All @@ -58,7 +68,7 @@ private static Duration testUpdates(int size, IntFunction<LongMinHeap> heapFn) {
while (!heap.isEmpty()) {
int id = heap.peekId();
int index = indexes[id]++;
long[] valList = vals[id];
SortableFeature[] valList = vals[id];
if (index < valList.length) {
heap.updateHead(valList[index]);
} else {
Expand All @@ -68,52 +78,24 @@ private static Duration testUpdates(int size, IntFunction<LongMinHeap> heapFn) {
return Duration.ofNanos(System.nanoTime() - start);
}

static class Item implements Comparable<Item> {
long value;
int id;

@Override
public int compareTo(Item o) {
return Long.compare(value, o.value);
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

Item item = (Item) o;

return value == item.value;
}

@Override
public int hashCode() {
return (int) (value ^ (value >>> 32));
}
}

private static Duration testPriorityQueue(int size) {
long[][] vals = getVals(size);
SortableFeature[][] vals = getVals(size);
int[] indexes = new int[size];
PriorityQueue<Item> heap = new PriorityQueue<>();
PriorityQueue<SortableFeature> heap = new PriorityQueue<>();
for (int i = 0; i < size; i++) {
Item item = new Item();
item.id = i;
item.value = vals[i][indexes[i]++];
byteBuffer.clear().putLong(i);
SortableFeature temp = vals[i][indexes[i]++];
SortableFeature item = new SortableFeature(temp.key(), byteBuffer.array());
heap.offer(item);
}
var start = System.nanoTime();
while (!heap.isEmpty()) {
var item = heap.poll();
int index = indexes[item.id]++;
long[] valList = vals[item.id];
int id = (int) byteBuffer.clear().put(item.value()).rewind().getLong();
int index = indexes[id]++;
SortableFeature[] valList = vals[id];
if (index < valList.length) {
item.value = valList[index];
item = valList[index];
heap.offer(item);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
import com.onthegomap.planetiler.geo.MutableCoordinateSequence;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.concurrent.NotThreadSafe;
Expand Down Expand Up @@ -366,8 +366,7 @@ public static List<Feature> decode(byte[] encoded) {
}

for (VectorTileProto.Tile.Feature feature : layer.getFeaturesList()) {
int tagsCount = feature.getTagsCount();
Map<String, Object> attrs = new HashMap<>(tagsCount / 2);
Map<String, Object> attrs = new TreeMap<>();
int tagIdx = 0;
while (tagIdx < feature.getTagsCount()) {
String key = keys.get(feature.getTags(tagIdx++));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,27 @@
*
* @see <a href="https://en.wikipedia.org/wiki/D-ary_heap">d-ary heap (wikipedia)</a>
*/
class ArrayLongMinHeap implements LongMinHeap {
class ArraySortableFeatureMinHeap implements SortableFeatureMinHeap {
protected static final int NOT_PRESENT = -1;
protected final int[] tree;
protected final int[] positions;
protected final long[] vals;
protected final SortableFeature[] sortableFeatures;
protected final int max;
protected int size;

/**
* @param elements the number of elements that can be stored in this heap. Currently the heap cannot be resized or
* shrunk/trimmed after initial creation. elements-1 is the maximum id that can be stored in this heap
*/
ArrayLongMinHeap(int elements) {
ArraySortableFeatureMinHeap(int elements) {
// we use an offset of one to make the arithmetic a bit simpler/more efficient, the 0th elements are not used!
tree = new int[elements + 1];
positions = new int[elements + 1];
Arrays.fill(positions, NOT_PRESENT);
vals = new long[elements + 1];
vals[0] = Long.MIN_VALUE;
sortableFeatures = new SortableFeature[elements + 1];
this.max = elements;
}

Expand All @@ -77,7 +79,7 @@ public boolean isEmpty() {
}

@Override
public void push(int id, long value) {
public void push(int id, SortableFeature sf) {
checkIdInRange(id);
if (size == max) {
throw new IllegalStateException("Cannot push anymore, the heap is already full. size: " + size);
Expand All @@ -89,7 +91,8 @@ public void push(int id, long value) {
size++;
tree[size] = id;
positions[id] = size;
vals[size] = value;
vals[size] = sf.key();
sortableFeatures[size] = sf;
percolateUp(size);
}

Expand All @@ -100,25 +103,39 @@ public boolean contains(int id) {
}

@Override
public void update(int id, long value) {
public void update(int id, SortableFeature sf) {
checkIdInRange(id);
int index = positions[id];
if (index < 0) {
throw new IllegalStateException(
"The heap does not contain: " + id + ". Use the contains method to check this before calling update");
}
long prev = vals[index];
long value = sf.key();
vals[index] = value;
if (value > prev) {
sortableFeatures[index] = sf;
percolateDown(index);
} else if (value < prev) {
sortableFeatures[index] = sf;
percolateUp(index);
} else {
byte[] bytes = sf.value();
byte[] prevBytes = sortableFeatures[index].value();
sortableFeatures[index] = sf;
int compareResult = Arrays.compare(bytes, prevBytes);
if (compareResult > 0) {
percolateDown(index);
} else {
percolateUp(index);
}
}
}

@Override
public void updateHead(long value) {
vals[1] = value;
public void updateHead(SortableFeature sf) {
vals[1] = sf.key();
sortableFeatures[1] = sf;
percolateDown(1);
}

Expand All @@ -128,15 +145,17 @@ public int peekId() {
}

@Override
public long peekValue() {
return vals[1];
public SortableFeature peekValue() {
return sortableFeatures[1];
}

@Override
public int poll() {
int id = peekId();
tree[1] = tree[size];
vals[1] = vals[size];
sortableFeatures[1] = sortableFeatures[size];
sortableFeatures[size] = null;
positions[tree[1]] = 1;
positions[id] = NOT_PRESENT;
size--;
Expand All @@ -152,6 +171,26 @@ public void clear() {
size = 0;
}

private void switchSortableFeatures(int index1, int index2) {
final SortableFeature temp = sortableFeatures[index1];
sortableFeatures[index1] = sortableFeatures[index2];
sortableFeatures[index2] = temp;
}

private byte[] getValue(SortableFeature sf) {
if (sf == null) {
return null;
}
return sf.value();
}

private boolean isLessThanParent(int index, int parent, long val, long parentValue) {
if (val == parentValue) {
return Arrays.compare(getValue(sortableFeatures[index]), getValue(sortableFeatures[parent])) < 0;
}
return val < parentValue;
}

private void percolateUp(int index) {
assert index != 0;
if (index == 1) {
Expand All @@ -162,8 +201,9 @@ private void percolateUp(int index) {
// the finish condition (index==0) is covered here automatically because we set vals[0]=-inf
int parent;
long parentValue;
while (val < (parentValue = vals[parent = parent(index)])) {
while (isLessThanParent(index, parent = parent(index), val, parentValue = vals[parent])) {
vals[index] = parentValue;
switchSortableFeatures(index, parent);
positions[tree[index] = tree[parent]] = index;
index = parent;
}
Expand Down Expand Up @@ -208,10 +248,14 @@ private void percolateDown(int index) {
}
}
}
if (minValue >= val) {
if (minValue > val) {
break;
} else if (minValue == val &&
Arrays.compare(sortableFeatures[minChild].value(), sortableFeatures[index].value()) >= 0) {
break;
}
vals[index] = minValue;
switchSortableFeatures(index, minChild);
positions[tree[index] = tree[minChild]] = index;
index = minChild;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ public Iterator<SortableFeature> iterator(int shard, int shards) {
}
}

return LongMerger.mergeIterators(iterators);
return SortableFeatureMerger.mergeIterators(iterators);
}

public int chunks() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
import java.util.function.Function;
Expand Down Expand Up @@ -421,7 +421,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) {
GeometryType geomType = decodeGeomType(geomTypeAndScale);
int scale = decodeScale(geomTypeAndScale);
int mapSize = unpacker.unpackMapHeader();
Map<String, Object> attrs = new HashMap<>(mapSize);
Map<String, Object> attrs = new TreeMap<>();
for (int i = 0; i < mapSize; i++) {
String key = commonValueStrings.decode(unpacker.unpackInt());
Value v = unpacker.unpackValue();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ default ParallelIterator parallelIterator(Stats stats, int threads) {
}
}
});
return new ParallelIterator(reader, LongMerger.mergeSuppliers(queues));
return new ParallelIterator(reader, SortableFeatureMerger.mergeSuppliers(queues));
}

record ParallelIterator(Worker reader, @Override Iterator<SortableFeature> iterator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* An item with a {@code long key} that can be used for sorting/grouping.
*
* These items can be sorted or grouped by {@link FeatureSort}/{@link FeatureGroup} implementations. Sorted lists can
* also be merged using {@link LongMerger}.
* also be merged using {@link SortableFeatureMerger}.
*/
public interface HasLongSortKey {
/** Value to sort/group items by. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ public record SortableFeature(@Override long key, byte[] value) implements Compa

@Override
public int compareTo(SortableFeature o) {
return Long.compare(key, o.key);
int result = Long.compare(key, o.key);
if (result == 0) {
result = Arrays.compare(value, o.value);
}
return result;
}

@Override
Expand Down
Loading