Skip to content

Commit

Permalink
Non-uniform vector quantization (#374)
Browse files Browse the repository at this point in the history
Adds support for Non-uniform Vector Quantization (NVQ, pronounced as "new vec"). This new technique quantizes the values  in each vector with high accuracy by first applying a nonlinear transformation that is individually fit to each  vector. These nonlinearities are designed to be lightweight and have a negligible impact on distance computation  performance.

---------

Co-authored-by: Joel Knighton <[email protected]>
Co-authored-by: Ted Willke <[email protected]>
  • Loading branch information
3 people authored Jan 2, 2025
1 parent 431538e commit e8d5c3c
Show file tree
Hide file tree
Showing 47 changed files with 2,810 additions and 87 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ The graph is represented by an on-disk adjacency list per node, with additional

The second pass can be performed with
* Full resolution float32 vectors
* NVQ, which uses a non-uniform technique to quantize vectors with high-accuracy

[This two-pass design reduces memory usage and reduces latency while preserving accuracy](https://thenewstack.io/why-vector-size-matters/).

Expand Down
6 changes: 6 additions & 0 deletions UPGRADING.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
`CompressedVectors` directly from `encodeAll()`.
- `PQVectors::getProductQuantization` is removed; it duplicated `CompressedVectors::getCompressor` unnecessarily

## New features
- Support for Non-uniform Vector Quantization (NVQ, pronounced as "new vec"). This new technique quantizes the values
in each vector with high accuracy by first applying a nonlinear transformation that is individually fit to each
vector. These nonlinearities are designed to be lightweight and have a negligible impact on distance computation
performance.

# Upgrading from 2.0.x to 3.0.x

## Critical API changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,20 +211,20 @@ public static GraphIndexBuilder rescore(GraphIndexBuilder other, BuildScoreProvi
var neighbors = other.graph.getNeighbors(i);
var sf = newProvider.searchProviderFor(i).scoreFunction();
var newNeighbors = new NodeArray(neighbors.size());

// Copy edges, compute new scores
for (var it = neighbors.iterator(); it.hasNext(); ) {
int neighbor = it.nextInt();
// since we're using a different score provider, use insertSorted instead of addInOrder
newNeighbors.insertSorted(neighbor, sf.similarityTo(neighbor));
}

newBuilder.graph.addNode(i, newNeighbors);
}

// Set the entry node
newBuilder.graph.updateEntryNode(other.graph.entry());

return newBuilder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
*/
public enum FeatureId {
INLINE_VECTORS(InlineVectors::load),
FUSED_ADC(FusedADC::load);
FUSED_ADC(FusedADC::load),
NVQ_VECTORS(NVQ::load);

public static final Set<FeatureId> ALL = Collections.unmodifiableSet(EnumSet.allOf(FeatureId.class));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.GraphIndex;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
import io.github.jbellis.jvector.pq.FusedADCPQDecoder;
import io.github.jbellis.jvector.pq.PQVectors;
import io.github.jbellis.jvector.pq.ProductQuantization;
import io.github.jbellis.jvector.quantization.FusedADCPQDecoder;
import io.github.jbellis.jvector.quantization.PQVectors;
import io.github.jbellis.jvector.quantization.ProductQuantization;
import io.github.jbellis.jvector.util.ExplicitThreadLocal;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.VectorizationProvider;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright DataStax, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.github.jbellis.jvector.graph.disk;

import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
import io.github.jbellis.jvector.quantization.NVQScorer;
import io.github.jbellis.jvector.quantization.NVQuantization;
import io.github.jbellis.jvector.quantization.NVQuantization.QuantizedVector;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.DataOutput;
import java.io.IOException;
import java.io.UncheckedIOException;

/**
* Implements the storage of NuVeQ vectors in an on-disk graph index. These can be used for reranking.
*/
public class NVQ implements Feature {
private final NVQuantization nvq;
private final NVQScorer scorer;
private final ThreadLocal<QuantizedVector> reusableQuantizedVector;

public NVQ(NVQuantization nvq) {
this.nvq = nvq;
scorer = new NVQScorer(this.nvq);
reusableQuantizedVector = ThreadLocal.withInitial(() -> NVQuantization.QuantizedVector.createEmpty(nvq.subvectorSizesAndOffsets, nvq.bitsPerDimension));
}

@Override
public FeatureId id() {
return FeatureId.NVQ_VECTORS;
}

@Override
public int headerSize() {
return nvq.compressorSize();
}

@Override
public int inlineSize() { return nvq.compressedVectorSize();}

public int dimension() {
return nvq.globalMean.length();
}

static NVQ load(CommonHeader header, RandomAccessReader reader) {
try {
return new NVQ(NVQuantization.load(reader));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

@Override
public void writeHeader(DataOutput out) throws IOException {
nvq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
}

@Override
public void writeInline(DataOutput out, Feature.State state_) throws IOException {
var state = (NVQ.State) state_;
state.vector.write(out);
}

public static class State implements Feature.State {
public final QuantizedVector vector;

public State(QuantizedVector vector) {
this.vector = vector;
}
}

ScoreFunction.ExactScoreFunction rerankerFor(VectorFloat<?> queryVector,
VectorSimilarityFunction vsf,
FeatureSource source) {
var function = scorer.scoreFunctionFor(queryVector, vsf);

return node2 -> {
try {
var reader = source.inlineReaderForNode(node2, FeatureId.NVQ_VECTORS);
QuantizedVector.loadInto(reader, reusableQuantizedVector.get());
} catch (IOException e) {
throw new RuntimeException(e);
}
return function.similarityTo(reusableQuantizedVector.get());
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,13 @@ public void close() throws IOException {

@Override
public ScoreFunction.ExactScoreFunction rerankerFor(VectorFloat<?> queryVector, VectorSimilarityFunction vsf) {
if (!features.containsKey(FeatureId.INLINE_VECTORS)) {
throw new UnsupportedOperationException("No inline vectors in this graph");
if (features.containsKey(FeatureId.INLINE_VECTORS)) {
return RandomAccessVectorValues.super.rerankerFor(queryVector, vsf);
} else if (features.containsKey(FeatureId.NVQ_VECTORS)) {
return ((NVQ) features.get(FeatureId.NVQ_VECTORS)).rerankerFor(queryVector, vsf, this);
} else {
throw new UnsupportedOperationException("No reranker available for this graph");
}
return RandomAccessVectorValues.super.rerankerFor(queryVector, vsf);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,10 @@ public OnDiskGraphIndexWriter build() throws IOException {
int dimension;
if (features.containsKey(FeatureId.INLINE_VECTORS)) {
dimension = ((InlineVectors) features.get(FeatureId.INLINE_VECTORS)).dimension();
} else if (features.containsKey(FeatureId.NVQ_VECTORS)) {
dimension = ((NVQ) features.get(FeatureId.NVQ_VECTORS)).dimension();
} else {
throw new IllegalArgumentException("Inline vectors must be provided.");
throw new IllegalArgumentException("Inline or NVQ vectors must be provided.");
}

if (ordinalMapper == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
package io.github.jbellis.jvector.graph.similarity;

import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
import io.github.jbellis.jvector.pq.BQVectors;
import io.github.jbellis.jvector.pq.PQVectors;
import io.github.jbellis.jvector.quantization.BQVectors;
import io.github.jbellis.jvector.quantization.PQVectors;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.VectorUtil;
import io.github.jbellis.jvector.vector.VectorizationProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package io.github.jbellis.jvector.graph.similarity;

import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
import io.github.jbellis.jvector.pq.PQVectors;
import io.github.jbellis.jvector.quantization.PQVectors;
import io.github.jbellis.jvector.vector.types.VectorFloat;
import org.agrona.collections.Int2ObjectHashMap;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.graph.disk.FusedADC;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

public class ImmutableBQVectors extends BQVectors {
public ImmutableBQVectors(BinaryQuantization bq, long[][] compressedVectors) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.vector.types.ByteSequence;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.vector.Matrix;
import io.github.jbellis.jvector.vector.VectorUtil;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

public class MutableBQVectors extends BQVectors implements MutableCompressedVectors<long[]> {
private static final int INITIAL_CAPACITY = 1024;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

public interface MutableCompressedVectors<T> extends CompressedVectors {
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;
package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.ByteSequence;
Expand Down
Loading

0 comments on commit e8d5c3c

Please sign in to comment.