Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deletes #117

Merged
merged 42 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
4545399
rename
jbellis Oct 6, 2023
596d133
new node value won't change, pull it out of loop
jbellis Oct 6, 2023
28db65e
TODO
jbellis Oct 6, 2023
430f057
wip
jbellis Oct 6, 2023
4c81c1e
Add markNodeDeleted
jbellis Oct 6, 2023
3b22f17
remove View.getSortedNodes
jbellis Oct 6, 2023
73525a1
wip
jbellis Oct 6, 2023
a7e258e
merge
jbellis Oct 6, 2023
9ec01ff
merge and get it building
jbellis Oct 7, 2023
b0f21d3
formatting
jbellis Oct 8, 2023
38bd370
replace validateGraph with assertGraphEquals
jbellis Oct 8, 2023
118c5b1
clean out vestigial document cruft from mock vectorvalues
jbellis Oct 8, 2023
4600ccf
format
jbellis Oct 8, 2023
18b99db
r/m numVectors field (always equal to array length)
jbellis Oct 8, 2023
f37bd49
createRandom[]Vectors no longer leaves null entries that need to be c…
jbellis Oct 8, 2023
b7db4a3
formatting
jbellis Oct 8, 2023
2cec2e2
first test for deletions
jbellis Oct 8, 2023
3abdfa9
wiring in the purge. almost passes tests
jbellis Oct 8, 2023
35bf868
fix mergeNeighbors to not add duplicate nodes, and fix test to check …
jbellis Oct 8, 2023
cb6ac31
- fix removeDeletedNeighbors
jbellis Oct 8, 2023
557d9d6
- fix removeDeletedNeighbors
jbellis Oct 8, 2023
291aefe
merge from main
jbellis Oct 11, 2023
9437684
finish implementing renumbering for writes
jbellis Oct 11, 2023
e05cd0c
rename nsize0 -> maxDegree
jbellis Oct 12, 2023
5cf1d74
show input vectors when assert fails
jbellis Oct 12, 2023
feec7f7
re-use buildSequentially
jbellis Oct 12, 2023
cc33203
encapsulate OHGI better
jbellis Oct 12, 2023
31a54e9
instead of renumbering implicitly, let caller provide remapper
jbellis Oct 12, 2023
1788350
add save and load methods for OHGI
jbellis Oct 12, 2023
15003b0
Merge remote-tracking branch 'origin/main' into deletes
jbellis Oct 12, 2023
d01d737
r/m unused CNS.insert method with confusing semantics
jbellis Oct 13, 2023
14ace18
Merge remote-tracking branch 'origin/deletes' into deletes
jbellis Oct 13, 2023
be714fd
fix insertDiverse ignoring current neighbors
jbellis Oct 13, 2023
bdda8a2
ram freed is proportional to nodes removed
jbellis Oct 13, 2023
952fe1a
merge ConcurrentNeighborArray into NeighborArray
jbellis Oct 13, 2023
fea613b
fix node-present check
jbellis Oct 13, 2023
bf989c1
make getSequentialRenumbering public
jbellis Oct 13, 2023
046c799
add failing testRenumberingOnDelete
jbellis Oct 13, 2023
ff77ff5
refactor to take Map instead of Function; sort writes by new ordinal …
jbellis Oct 13, 2023
2e0c63f
fix ci bitching about javadoc
jbellis Oct 13, 2023
a0fa7ac
fix typos
jbellis Oct 19, 2023
7020849
merge
jbellis Oct 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import io.github.jbellis.jvector.graph.GraphIndex;
import io.github.jbellis.jvector.graph.NodesIterator;
import io.github.jbellis.jvector.util.Accountable;
import io.github.jbellis.jvector.util.Bits;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand Down Expand Up @@ -106,13 +107,8 @@ public int entryNode() {
}

@Override
public int[] getSortedNodes() {
return View.super.getSortedNodes();
}

@Override
public int getNeighborCount(int node) {
return View.super.getNeighborCount(node);
public Bits liveNodes() {
return view.liveNodes();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,19 @@

import io.github.jbellis.jvector.graph.GraphIndex;
import io.github.jbellis.jvector.graph.NodesIterator;
import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
import io.github.jbellis.jvector.util.Accountable;
import io.github.jbellis.jvector.util.Bits;

import java.io.DataOutput;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.IntStream;

public class OnDiskGraphIndex<T> implements GraphIndex<T>, AutoCloseable, Accountable
{
Expand All @@ -49,6 +56,26 @@ public OnDiskGraphIndex(ReaderSupplier readerSupplier, long offset)
}
}

/**
* @return a Map of old to new graph ordinals where the new ordinals are sequential starting at 0,
* while preserving the original relative ordering in `graph`. That is, for all node ids i and j,
* if i &lt; j in `graph` then map[i] &lt; map[j] in the returned map.
*/
public static <T> Map<Integer, Integer> getSequentialRenumbering(GraphIndex<T> graph) {
try (var view = graph.getView()) {
Map<Integer, Integer> oldToNewMap = new HashMap<>();
int nextOrdinal = 0;
for (int i = 0; i < view.getIdUpperBound(); i++) {
if (graph.containsNode(i)) {
oldToNewMap.put(i, nextOrdinal++);
}
}
return oldToNewMap;
} catch (Exception e) {
throw new RuntimeException(e);
}
}

@Override
public int size() {
return size;
Expand Down Expand Up @@ -118,6 +145,11 @@ public int entryNode() {
return OnDiskGraphIndex.this.entryNode;
}

@Override
public Bits liveNodes() {
return Bits.ALL;
}

@Override
public void close() throws IOException {
reader.close();
Expand All @@ -127,7 +159,7 @@ public void close() throws IOException {
@Override
public NodesIterator getNodes()
{
throw new UnsupportedOperationException();
return NodesIterator.fromPrimitiveIterator(IntStream.range(0, size).iterator(), size);
}

@Override
Expand All @@ -139,37 +171,92 @@ public void close() throws IOException {
readerSupplier.close();
}

// takes Graph and Vectors separately since I'm reluctant to introduce a Vectors reference
// to OnHeapGraphIndex just for this method. Maybe that will end up the best solution,
// but I'm not sure yet.
public static <T> void write(GraphIndex<T> graph, RandomAccessVectorValues<T> vectors, DataOutput out) throws IOException {
assert graph.size() == vectors.size() : String.format("graph size %d != vectors size %d", graph.size(), vectors.size());

var view = graph.getView();

// graph-level properties
out.writeInt(graph.size());
out.writeInt(vectors.dimension());
out.writeInt(view.entryNode());
out.writeInt(graph.maxDegree());

// for each graph node, write the associated vector and its neighbors
for (int node = 0; node < graph.size(); node++) {
out.writeInt(node); // unnecessary, but a reasonable sanity check
Io.writeFloats(out, (float[]) vectors.vectorValue(node));

var neighbors = view.getNeighborsIterator(node);
out.writeInt(neighbors.size());
int n = 0;
for ( ; n < neighbors.size(); n++) {
out.writeInt(neighbors.nextInt());
/**
* @param graph the graph to write
* @param vectors the vectors associated with each node
* @param out the output to write to
*
* If any nodes have been deleted, you must use the overload specifying `oldToNewOrdinals` instead.
jkni marked this conversation as resolved.
Show resolved Hide resolved
*/
public static <T> void write(GraphIndex<T> graph, RandomAccessVectorValues<T> vectors, DataOutput out)
throws IOException
{
try (var view = graph.getView()) {
if (view.getIdUpperBound() > graph.size()) {
throw new IllegalArgumentException("Graph contains deletes, must specify oldToNewOrdinals map");
}
} catch (Exception e) {
throw new IOException(e);
}
write(graph, vectors, getSequentialRenumbering(graph), out);
}

/**
* @param graph the graph to write
* @param vectors the vectors associated with each node
* @param oldToNewOrdinals A map from old to new ordinals. If ordinal numbering does not matter,
* you can use `getSequentialRenumbering`, which will "fill in" holes left by
* any deleted nodes.
* @param out the output to write to
*/
public static <T> void write(GraphIndex<T> graph,
RandomAccessVectorValues<T> vectors,
Map<Integer, Integer> oldToNewOrdinals,
DataOutput out)
throws IOException
{
if (graph instanceof OnHeapGraphIndex) {
var ohgi = (OnHeapGraphIndex<T>) graph;
if (ohgi.getDeletedNodes().cardinality() > 0) {
throw new IllegalArgumentException("Run builder.cleanup() before writing the graph");
}
assert !neighbors.hasNext();
}
if (oldToNewOrdinals.size() != graph.size()) {
throw new IllegalArgumentException(String.format("ordinalMapper size %d does not match graph size %d",
oldToNewOrdinals.size(), graph.size()));
}

var entriesByNewOrdinal = new ArrayList<>(oldToNewOrdinals.entrySet());
entriesByNewOrdinal.sort(Comparator.comparingInt(Map.Entry::getValue));
// the last new ordinal should be size-1
if (graph.size() > 0 && entriesByNewOrdinal.get(entriesByNewOrdinal.size() - 1).getValue() != graph.size() - 1) {
throw new IllegalArgumentException("oldToNewOrdinals produced out-of-range entries");
}

try (var view = graph.getView()) {
// graph-level properties
out.writeInt(graph.size());
out.writeInt(vectors.dimension());
out.writeInt(view.entryNode());
out.writeInt(graph.maxDegree());

// for each graph node, write the associated vector and its neighbors
for (int i = 0; i < oldToNewOrdinals.size(); i++) {
var entry = entriesByNewOrdinal.get(i);
int originalOrdinal = entry.getKey();
int newOrdinal = entry.getValue();
if (!graph.containsNode(originalOrdinal)) {
continue;
}

// pad out to maxEdgesPerNode
for (; n < graph.maxDegree(); n++) {
out.writeInt(-1);
out.writeInt(newOrdinal); // unnecessary, but a reasonable sanity check
Io.writeFloats(out, (float[]) vectors.vectorValue(originalOrdinal));

var neighbors = view.getNeighborsIterator(originalOrdinal);
out.writeInt(neighbors.size());
int n = 0;
for (; n < neighbors.size(); n++) {
out.writeInt(oldToNewOrdinals.get(neighbors.nextInt()));
}
assert !neighbors.hasNext();

// pad out to maxEdgesPerNode
for (; n < graph.maxDegree(); n++) {
out.writeInt(-1);
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
}
Loading
Loading