diff --git a/jvector-base/pom.xml b/jvector-base/pom.xml index cbf849544..9ac247f76 100644 --- a/jvector-base/pom.xml +++ b/jvector-base/pom.xml @@ -11,4 +11,12 @@ jvector-base Base + + + + org.jctools + jctools-core + 4.0.1 + + diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java index 2bc014af4..3eb4314d1 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java @@ -26,8 +26,12 @@ import io.github.jbellis.jvector.util.Accountable; import io.github.jbellis.jvector.util.RamUsageEstimator; +import org.jctools.maps.NonBlockingHashMap; +import org.jctools.maps.NonBlockingHashMapLong; +import java.util.Arrays; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLongFieldUpdater; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiFunction; @@ -39,10 +43,12 @@ * and `nextNeighbor` operations. */ public final class OnHeapGraphIndex implements GraphIndex, Accountable { + private static final AtomicLongFieldUpdater entryPointUpdater = AtomicLongFieldUpdater.newUpdater(OnHeapGraphIndex.class, "entryPointVal"); + // the current graph entry node on the top level. -1 if not set - private final AtomicReference entryPoint; + private volatile long entryPointVal; - private final ConcurrentHashMap nodes; + private final NonBlockingHashMapLong nodes; // max neighbors/edges per node final int nsize0; @@ -51,11 +57,10 @@ public final class OnHeapGraphIndex implements GraphIndex, Accountable { OnHeapGraphIndex( int M, BiFunction neighborFactory) { this.neighborFactory = neighborFactory; - this.entryPoint = - new AtomicReference<>(-1); // Entry node should be negative until a node is added + this.entryPointVal = -1; // Entry node should be negative until a node is added this.nsize0 = 2 * M; - this.nodes = new ConcurrentHashMap<>(); + this.nodes = new NonBlockingHashMapLong<>(1024); } /** @@ -91,7 +96,7 @@ public void addNode(int node) { /** must be called after addNode once neighbors are linked in all levels. */ void markComplete(int node) { - entryPoint.accumulateAndGet( + entryPointUpdater.accumulateAndGet(this, node, (oldEntry, newEntry) -> { if (oldEntry >= 0) { @@ -103,7 +108,7 @@ void markComplete(int node) { } public void updateEntryNode(int node) { - entryPoint.set(node); + entryPointUpdater.set(this, node); } @Override @@ -112,7 +117,7 @@ public int maxDegree() { } int entry() { - return entryPoint.get(); + return (int) entryPointUpdater.get(this); } @Override @@ -120,11 +125,11 @@ public NodesIterator getNodes() { // We avoid the temptation to optimize this by using ArrayNodesIterator. // This is because, while the graph will contain sequential ordinals once the graph is complete, // we should not assume that that is the only time it will be called. - var keysInts = nodes.keySet().stream().mapToInt(Integer::intValue).iterator(); + var keysInts = Arrays.stream(nodes.keySetLong()).iterator(); return new NodesIterator(nodes.size()) { @Override public int nextInt() { - return keysInts.nextInt(); + return keysInts.next().intValue(); } @Override @@ -210,7 +215,7 @@ private static long concurrentHashMapRamUsed(int externalSize) { @Override public String toString() { - return String.format("OnHeapGraphIndex(size=%d, entryPoint=%d)", size(), entryPoint.get()); + return String.format("OnHeapGraphIndex(size=%d, entryPoint=%d)", size(), entryPointUpdater.get(this)); } @Override @@ -232,7 +237,7 @@ void validateEntryNode() { if (size() == 0) { return; } - var en = entryPoint.get(); + var en = entryPointUpdater.get(this); if (!(en >= 0 && nodes.containsKey(en))) { throw new IllegalStateException("Entry node was incompletely added! " + en); } @@ -255,12 +260,12 @@ public int size() { @Override public int entryNode() { - return OnHeapGraphIndex.this.entryPoint.get(); + return (int) entryPointUpdater.get(OnHeapGraphIndex.this); } @Override public String toString() { - return "OnHeapGraphIndexView(size=" + size() + ", entryPoint=" + entryPoint.get(); + return "OnHeapGraphIndexView(size=" + size() + ", entryPoint=" + entryPointUpdater.get(OnHeapGraphIndex.this); } @Override diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Bench.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Bench.java index 25a1e25c2..8acd0abe0 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Bench.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Bench.java @@ -137,7 +137,7 @@ public static void main(String[] args) throws IOException { var mGrid = List.of(8, 12, 16, 24, 32, 48, 64); var efConstructionGrid = List.of(60, 80, 100, 120, 160, 200, 400, 600, 800); var efSearchGrid = List.of(1, 2); - var diskGrid = List.of(false, true); + var diskGrid = List.of( true); // this dataset contains more than 10k query vectors, so we limit it with .subList var adaSet = loadWikipediaData();