Skip to content

Commit

Permalink
feat: maxsim operator and indexing on maxsim operator
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <[email protected]>
  • Loading branch information
usamoi committed Feb 24, 2025
1 parent d16d469 commit 1299f7f
Show file tree
Hide file tree
Showing 22 changed files with 683 additions and 897 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pg17 = ["pgrx/pg17", "pgrx-catalog/pg17"]

[dependencies]
algorithm = { path = "./crates/algorithm" }
always_equal = { path = "./crates/always_equal" }
distance = { path = "./crates/distance" }
k_means = { path = "./crates/k_means" }
random_orthogonal_matrix = { path = "./crates/random_orthogonal_matrix" }
Expand Down
2 changes: 1 addition & 1 deletion crates/algorithm/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::operator::{Accessor2, Operator, Vector};
use crate::tape::TapeWriter;
use crate::tuples::*;
use crate::types::*;
use crate::{Branch, DerefMut, Page, PageGuard, RelationWrite};
use crate::{Branch, DerefMut, IndexPointer, Page, PageGuard, RelationWrite};
use simd::fast_scan::{any_pack, padding_pack};
use vector::VectorOwned;

Expand Down
2 changes: 1 addition & 1 deletion crates/algorithm/src/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::operator::*;
use crate::select_heap::SelectHeap;
use crate::tuples::*;
use crate::vectors::{self};
use crate::{Page, RelationWrite, tape};
use crate::{IndexPointer, Page, RelationWrite, tape};
use always_equal::AlwaysEqual;
use distance::Distance;
use std::cmp::Reverse;
Expand Down
7 changes: 6 additions & 1 deletion crates/algorithm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ pub use prewarm::prewarm;
pub use rerank::{rerank_heap, rerank_index};
pub use search::search;

use crate::tuples::IndexPointer;
use std::ops::{Deref, DerefMut};
use zerocopy_derive::{FromBytes, Immutable, IntoBytes, KnownLayout};

Expand Down Expand Up @@ -89,3 +88,9 @@ pub(crate) struct Branch<T> {
pub signs: Vec<bool>,
pub extra: T,
}

#[repr(transparent)]
#[derive(
Debug, Default, Clone, Copy, PartialEq, Eq, Hash, IntoBytes, FromBytes, Immutable, KnownLayout,
)]
pub struct IndexPointer(pub u64);
3 changes: 1 addition & 2 deletions crates/algorithm/src/rerank.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::operator::*;
use crate::tuples::*;
use crate::{RelationRead, vectors};
use crate::{IndexPointer, RelationRead, vectors};
use always_equal::AlwaysEqual;
use distance::Distance;
use std::cmp::Reverse;
Expand Down
2 changes: 1 addition & 1 deletion crates/algorithm/src/search.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::linked_vec::LinkedVec;
use crate::operator::*;
use crate::tuples::*;
use crate::{Page, RelationRead, RerankMethod, tape, vectors};
use crate::{IndexPointer, Page, RelationRead, RerankMethod, tape, vectors};
use always_equal::AlwaysEqual;
use distance::Distance;
use std::cmp::Reverse;
Expand Down
2 changes: 1 addition & 1 deletion crates/algorithm/src/tape.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::operator::Accessor1;
use crate::tuples::*;
use crate::{Page, PageGuard, RelationRead, RelationWrite};
use crate::{IndexPointer, Page, PageGuard, RelationRead, RelationWrite};
use std::marker::PhantomData;
use std::num::NonZeroU64;
use std::ops::DerefMut;
Expand Down
7 changes: 1 addition & 6 deletions crates/algorithm/src/tuples.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::IndexPointer;
use crate::operator::Vector;
use std::marker::PhantomData;
use std::num::{NonZeroU8, NonZeroU64};
Expand Down Expand Up @@ -989,12 +990,6 @@ const fn soundness_check() {
)]
pub struct ZeroU8(Option<NonZeroU8>);

#[repr(transparent)]
#[derive(
Debug, Default, Clone, Copy, PartialEq, Eq, Hash, IntoBytes, FromBytes, Immutable, KnownLayout,
)]
pub struct IndexPointer(pub u64);

#[repr(transparent)]
#[derive(
Debug,
Expand Down
2 changes: 1 addition & 1 deletion crates/algorithm/src/vectors.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::operator::*;
use crate::tuples::*;
use crate::{Page, PageGuard, RelationRead, RelationWrite, tape};
use crate::{IndexPointer, Page, PageGuard, RelationRead, RelationWrite, tape};
use std::num::NonZeroU64;
use vector::VectorOwned;

Expand Down
16 changes: 16 additions & 0 deletions src/datatype/memory_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,22 @@ impl IntoDatum for VectorOutput {

// UnboxDatum

unsafe impl<'a> pgrx::datum::UnboxDatum for VectorInput<'a> {
type As<'src>
= VectorInput<'src>
where
'a: 'src;
#[inline]
unsafe fn unbox<'src>(datum: pgrx::datum::Datum<'src>) -> Self::As<'src>
where
Self: 'src,
{
let datum = datum.sans_lifetime();
let ptr = NonNull::new(datum.cast_mut_ptr()).unwrap();
unsafe { Self::from_ptr(ptr) }
}
}

unsafe impl pgrx::datum::UnboxDatum for VectorOutput {
type As<'src> = VectorOutput;
#[inline]
Expand Down
41 changes: 41 additions & 0 deletions src/datatype/operators_vector.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::datatype::memory_vector::{VectorInput, VectorOutput};
use distance::Distance;
use pgrx::Array;
use std::num::NonZero;
use vector::VectorBorrowed;
use vector::vect::VectBorrowed;
Expand Down Expand Up @@ -74,3 +76,42 @@ fn _vchord_vector_sphere_cosine_in(
let d = VectBorrowed::operator_cos(lhs, center).to_f32();
d < radius
}

#[pgrx::pg_extern(immutable, strict, parallel_safe)]
fn _vchord_vector_operator_maxsim_l2(lhs: Array<'_, VectorInput<'_>>, rhs: VectorInput<'_>) -> f32 {
let mut maxsim = Distance::INFINITY;
for lhs in lhs.iter().flatten() {
maxsim = maxsim.min(VectBorrowed::operator_l2(
lhs.as_borrowed(),
rhs.as_borrowed(),
));
}
maxsim.to_f32().sqrt()
}

#[pgrx::pg_extern(immutable, strict, parallel_safe)]
fn _vchord_vector_operator_maxsim_ip(lhs: Array<'_, VectorInput<'_>>, rhs: VectorInput<'_>) -> f32 {
let mut maxsim = Distance::INFINITY;
for lhs in lhs.iter().flatten() {
maxsim = maxsim.min(VectBorrowed::operator_dot(
lhs.as_borrowed(),
rhs.as_borrowed(),
));
}
maxsim.to_f32()
}

#[pgrx::pg_extern(immutable, strict, parallel_safe)]
fn _vchord_vector_operator_maxsim_cosine(
lhs: Array<'_, VectorInput<'_>>,
rhs: VectorInput<'_>,
) -> f32 {
let mut maxsim = Distance::INFINITY;
for lhs in lhs.iter().flatten() {
maxsim = maxsim.min(VectBorrowed::operator_cos(
lhs.as_borrowed(),
rhs.as_borrowed(),
));
}
maxsim.to_f32()
}
179 changes: 179 additions & 0 deletions src/index/algorithm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
use super::opclass::Opfamily;
use crate::index::am::am_build::InternalBuild;
use algorithm::operator::{Dot, L2, Op};
use algorithm::types::*;
use algorithm::{RelationRead, RelationWrite};
use half::f16;
use std::num::NonZeroU64;
use vector::VectorOwned;
use vector::vect::{VectBorrowed, VectOwned};

pub fn prewarm(
opfamily: Opfamily,
index: impl RelationRead,
height: i32,
check: impl Fn(),
) -> String {
match (opfamily.vector_kind(), opfamily.distance_kind()) {
(VectorKind::Vecf32, DistanceKind::L2) => {
algorithm::prewarm::<Op<VectOwned<f32>, L2>>(index, height, check)
}
(VectorKind::Vecf32, DistanceKind::Dot) => {
algorithm::prewarm::<Op<VectOwned<f32>, Dot>>(index, height, check)
}
(VectorKind::Vecf16, DistanceKind::L2) => {
algorithm::prewarm::<Op<VectOwned<f16>, L2>>(index, height, check)
}
(VectorKind::Vecf16, DistanceKind::Dot) => {
algorithm::prewarm::<Op<VectOwned<f16>, Dot>>(index, height, check)
}
}
}

pub fn bulkdelete(
opfamily: Opfamily,
index: impl RelationWrite,
check: impl Fn(),
callback: impl Fn(NonZeroU64) -> bool,
) {
match (opfamily.vector_kind(), opfamily.distance_kind()) {
(VectorKind::Vecf32, DistanceKind::L2) => {
algorithm::bulkdelete::<Op<VectOwned<f32>, L2>>(index, check, callback)
}
(VectorKind::Vecf32, DistanceKind::Dot) => {
algorithm::bulkdelete::<Op<VectOwned<f32>, Dot>>(index, check, callback)
}
(VectorKind::Vecf16, DistanceKind::L2) => {
algorithm::bulkdelete::<Op<VectOwned<f16>, L2>>(index, check, callback)
}
(VectorKind::Vecf16, DistanceKind::Dot) => {
algorithm::bulkdelete::<Op<VectOwned<f16>, Dot>>(index, check, callback)
}
}
}

pub fn maintain(opfamily: Opfamily, index: impl RelationWrite, check: impl Fn()) {
match (opfamily.vector_kind(), opfamily.distance_kind()) {
(VectorKind::Vecf32, DistanceKind::L2) => {
algorithm::maintain::<Op<VectOwned<f32>, L2>>(index, check)
}
(VectorKind::Vecf32, DistanceKind::Dot) => {
algorithm::maintain::<Op<VectOwned<f32>, Dot>>(index, check)
}
(VectorKind::Vecf16, DistanceKind::L2) => {
algorithm::maintain::<Op<VectOwned<f16>, L2>>(index, check)
}
(VectorKind::Vecf16, DistanceKind::Dot) => {
algorithm::maintain::<Op<VectOwned<f16>, Dot>>(index, check)
}
}
}

pub fn build(
vector_options: VectorOptions,
vchordrq_options: VchordrqIndexOptions,
index: impl RelationWrite,
structures: Vec<Structure<Vec<f32>>>,
) {
match (vector_options.v, vector_options.d) {
(VectorKind::Vecf32, DistanceKind::L2) => algorithm::build::<Op<VectOwned<f32>, L2>>(
vector_options,
vchordrq_options,
index,
map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)),
),
(VectorKind::Vecf32, DistanceKind::Dot) => algorithm::build::<Op<VectOwned<f32>, Dot>>(
vector_options,
vchordrq_options,
index,
map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)),
),
(VectorKind::Vecf16, DistanceKind::L2) => algorithm::build::<Op<VectOwned<f16>, L2>>(
vector_options,
vchordrq_options,
index,
map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)),
),
(VectorKind::Vecf16, DistanceKind::Dot) => algorithm::build::<Op<VectOwned<f16>, Dot>>(
vector_options,
vchordrq_options,
index,
map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)),
),
}
}

pub fn insert(
opfamily: Opfamily,
index: impl RelationWrite,
payload: NonZeroU64,
vector: OwnedVector,
) {
match (vector, opfamily.distance_kind()) {
(OwnedVector::Vecf32(vector), DistanceKind::L2) => {
assert!(opfamily.vector_kind() == VectorKind::Vecf32);
algorithm::insert::<Op<VectOwned<f32>, L2>>(
index,
payload,
RandomProject::project(vector.as_borrowed()),
)
}
(OwnedVector::Vecf32(vector), DistanceKind::Dot) => {
assert!(opfamily.vector_kind() == VectorKind::Vecf32);
algorithm::insert::<Op<VectOwned<f32>, Dot>>(
index,
payload,
RandomProject::project(vector.as_borrowed()),
)
}
(OwnedVector::Vecf16(vector), DistanceKind::L2) => {
assert!(opfamily.vector_kind() == VectorKind::Vecf16);
algorithm::insert::<Op<VectOwned<f16>, L2>>(
index,
payload,
RandomProject::project(vector.as_borrowed()),
)
}
(OwnedVector::Vecf16(vector), DistanceKind::Dot) => {
assert!(opfamily.vector_kind() == VectorKind::Vecf16);
algorithm::insert::<Op<VectOwned<f16>, Dot>>(
index,
payload,
RandomProject::project(vector.as_borrowed()),
)
}
}
}

fn map_structures<T, U>(x: Vec<Structure<T>>, f: impl Fn(T) -> U + Copy) -> Vec<Structure<U>> {
x.into_iter()
.map(|Structure { means, children }| Structure {
means: means.into_iter().map(f).collect(),
children,
})
.collect()
}

pub trait RandomProject {
type Output;
fn project(self) -> Self::Output;
}

impl RandomProject for VectBorrowed<'_, f32> {
type Output = VectOwned<f32>;
fn project(self) -> VectOwned<f32> {
use crate::index::projection::project;
let input = self.slice();
VectOwned::new(project(input))
}
}

impl RandomProject for VectBorrowed<'_, f16> {
type Output = VectOwned<f16>;
fn project(self) -> VectOwned<f16> {
use crate::index::projection::project;
use simd::Floating;
let input = f16::vector_to_f32(self.slice());
VectOwned::new(f16::vector_from_f32(&project(&input)))
}
}
Loading

0 comments on commit 1299f7f

Please sign in to comment.