Skip to content

Commit

Permalink
make ALP-RD work for f32, more compute fns
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Oct 1, 2024
1 parent 4667a02 commit c51ecc4
Show file tree
Hide file tree
Showing 9 changed files with 331 additions and 54 deletions.
91 changes: 68 additions & 23 deletions encodings/alp/src/alp_rd/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ use vortex_dtype::{DType, PType};
use vortex_error::{vortex_bail, VortexExpect, VortexResult};

use crate::alp_rd::alp_rd_decode;
use crate::{RealDouble, RealFloat};

impl_encoding!("vortex.alprd", ids::ALP_RD, ALPRD);

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ALPRDMetadata {
is_f32: bool,
right_bit_width: u8,
// left_bit_width is implicit from the dict_len.
dict_len: u8,
Expand Down Expand Up @@ -67,10 +69,13 @@ impl ALPRDArray {
dict[idx] = *v;
}

let is_f32 = dtype.ptype() == Some(PType::F32);

Self::try_from_parts(
dtype,
len,
ALPRDMetadata {
is_f32,
right_bit_width,
dict_len: left_parts_dict.as_ref().len() as u8,
dict,
Expand All @@ -94,10 +99,16 @@ impl ALPRDArray {

/// The rightmost (least significant) bits of the floating point values stored in the array.
pub fn right_parts(&self) -> Array {
let uint_ptype = if self.metadata().is_f32 {
PType::U32
} else {
PType::U64
};

self.as_ref()
.child(
1,
&DType::Primitive(PType::U64, self.metadata().left_parts_dtype.nullability()),
&DType::Primitive(uint_ptype, self.metadata().left_parts_dtype.nullability()),
self.len(),
)
.vortex_expect("ALPRDArray: right_parts child")
Expand Down Expand Up @@ -156,17 +167,31 @@ impl IntoCanonical for ALPRDArray {
exc_u16 = PrimitiveArray::from(Vec::<u16>::new());
}

let decoded = alp_rd_decode(
left_parts.maybe_null_slice::<u16>(),
left_parts_dict,
self.metadata().right_bit_width,
right_parts.maybe_null_slice::<u64>(),
&exc_pos,
exc_u16.maybe_null_slice::<u16>(),
);

let decoded_array =
PrimitiveArray::from_vec(decoded, self.logical_validity().into_validity());
let decoded_array = if self.metadata().is_f32 {
PrimitiveArray::from_vec(
alp_rd_decode::<RealFloat>(
left_parts.maybe_null_slice::<u16>(),
left_parts_dict,
self.metadata().right_bit_width,
right_parts.maybe_null_slice::<u32>(),
&exc_pos,
exc_u16.maybe_null_slice::<u16>(),
),
self.logical_validity().into_validity(),
)
} else {
PrimitiveArray::from_vec(
alp_rd_decode::<RealDouble>(
left_parts.maybe_null_slice::<u16>(),
left_parts_dict,
self.metadata().right_bit_width,
right_parts.maybe_null_slice::<u64>(),
&exc_pos,
exc_u16.maybe_null_slice::<u16>(),
),
self.logical_validity().into_validity(),
)
};

Ok(Canonical::Primitive(decoded_array))
}
Expand Down Expand Up @@ -204,23 +229,43 @@ mod test {
use vortex::array::PrimitiveArray;
use vortex::{IntoArray, IntoCanonical};

use crate::alp_rd;
use crate::{alp_rd, RealDouble, RealFloat};

macro_rules! n_reals {
($seed:expr, $n:expr) => {
(0..$n)
.scan($seed, |state, _| {
let prev = *state;
*state = state.next_up();
Some(prev)
})
.collect::<Vec<_>>()
};
}

#[test]
fn test_array_encode_f32() {
const SEED: f32 = 0.1f32.next_up();
let reals = n_reals!(SEED, 1024);

let real_floats = PrimitiveArray::from(reals.clone());
let encoder = alp_rd::Encoder::<RealFloat>::new(&[SEED]);
let rd_array = encoder.encode(&real_floats);
let decoded = rd_array
.into_array()
.into_canonical()
.unwrap()
.into_primitive()
.unwrap();

fn real_doubles(seed: f64, n: usize) -> Vec<f64> {
(0..n)
.scan(seed, |state, _| {
let prev = *state;
*state = state.next_up();
Some(prev)
})
.collect()
assert_eq!(decoded.maybe_null_slice::<f32>(), &reals);
}

#[test]
fn test_array_encode_with_nulls_and_exceptions() {
const SEED: f64 = 1.123_848_591_110_992_f64;
// Create a vector of 1024 "real" doubles
let reals = real_doubles(SEED, 1024);
let reals = n_reals!(SEED, 1024);
// Null out some of the values.
let mut reals: Vec<Option<f64>> = reals.into_iter().map(Some).collect();
reals[1] = None;
Expand All @@ -231,7 +276,7 @@ mod test {
let real_doubles = PrimitiveArray::from_nullable_vec(reals.clone());

// Pick a seed that we know will trigger lots of exceptions.
let encoder = alp_rd::Encoder::new(&[100.0f64]);
let encoder: alp_rd::Encoder<RealDouble> = alp_rd::Encoder::new(&[100.0f64]);

let rd_array = encoder.encode(&real_doubles);

Expand Down
24 changes: 24 additions & 0 deletions encodings/alp/src/alp_rd/compute/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use vortex::compute::{filter, FilterFn};
use vortex::{Array, ArrayDType, IntoArray};
use vortex_error::VortexResult;

use crate::ALPRDArray;

impl FilterFn for ALPRDArray {
fn filter(&self, predicate: &Array) -> VortexResult<Array> {
let left_parts_exceptions = match self.left_parts_exceptions() {
None => None,
Some(exc) => Some(filter(&exc, predicate)?),
};

Ok(ALPRDArray::try_new(
self.dtype().clone(),
filter(self.left_parts(), predicate)?,
self.left_parts_dict(),
filter(self.right_parts(), predicate)?,
self.right_bit_width(),
left_parts_exceptions,
)?
.into_array())
}
}
18 changes: 17 additions & 1 deletion encodings/alp/src/alp_rd/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
use vortex::compute::ArrayCompute;
use vortex::compute::unary::ScalarAtFn;
use vortex::compute::{ArrayCompute, FilterFn, SliceFn, TakeFn};

use crate::ALPRDArray;

mod filter;
mod scalar_at;
mod slice;
mod take;

impl ArrayCompute for ALPRDArray {
fn filter(&self) -> Option<&dyn FilterFn> {
Some(self)
}

fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}

fn slice(&self) -> Option<&dyn SliceFn> {
Some(self)
}

fn take(&self) -> Option<&dyn TakeFn> {
Some(self)
}
}
4 changes: 2 additions & 2 deletions encodings/alp/src/alp_rd/compute/scalar_at.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ mod test {
use vortex::array::PrimitiveArray;
use vortex::compute::unary::scalar_at;

use crate::Encoder;
use crate::{Encoder, RealDouble};

#[test]
fn test_scalar_at() {
Expand All @@ -43,7 +43,7 @@ mod test {
let outlier = 3e100f64.next_up();

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);
let encoded = Encoder::<RealDouble>::new(&[a, b]).encode(&array);

// Make sure that we're testing the exception pathway.
assert!(encoded.left_parts_exceptions().is_some());
Expand Down
52 changes: 52 additions & 0 deletions encodings/alp/src/alp_rd/compute/slice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use vortex::compute::{slice, SliceFn};
use vortex::{Array, ArrayDType, IntoArray};
use vortex_error::VortexResult;

use crate::ALPRDArray;

impl SliceFn for ALPRDArray {
fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> {
let left_parts_exceptions = match self.left_parts_exceptions() {
None => None,
Some(exc) => Some(slice(&exc, start, stop)?),
};

Ok(ALPRDArray::try_new(
self.dtype().clone(),
slice(self.left_parts(), start, stop)?,
self.left_parts_dict(),
slice(self.right_parts(), start, stop)?,
self.right_bit_width(),
left_parts_exceptions,
)?
.into_array())
}
}

#[cfg(test)]
mod test {
use vortex::array::PrimitiveArray;
use vortex::compute::slice;
use vortex::IntoArrayVariant;

use crate::{Encoder, RealDouble};

#[test]
fn test_slice() {
let a = 0.1f64.next_up();
let b = 0.2f64.next_up();
let outlier = 3e100f64.next_up();

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::<RealDouble>::new(&[a, b]).encode(&array);

assert!(encoded.left_parts_exceptions().is_some());

let decoded = slice(encoded.as_ref(), 1, 3)
.unwrap()
.into_primitive()
.unwrap();

assert_eq!(decoded.maybe_null_slice::<f64>(), &[b, outlier]);
}
}
52 changes: 52 additions & 0 deletions encodings/alp/src/alp_rd/compute/take.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use vortex::compute::{take, TakeFn};
use vortex::{Array, ArrayDType, IntoArray};
use vortex_error::VortexResult;

use crate::ALPRDArray;

impl TakeFn for ALPRDArray {
fn take(&self, indices: &Array) -> VortexResult<Array> {
let left_parts_exceptions = match self.left_parts_exceptions() {
None => None,
Some(exc) => Some(take(&exc, indices)?),
};

Ok(ALPRDArray::try_new(
self.dtype().clone(),
take(self.left_parts(), indices)?,
self.left_parts_dict(),
take(self.right_parts(), indices)?,
self.right_bit_width(),
left_parts_exceptions,
)?
.into_array())
}
}

#[cfg(test)]
mod test {
use vortex::array::PrimitiveArray;
use vortex::compute::take;
use vortex::IntoArrayVariant;

use crate::{Encoder, RealDouble};

#[test]
fn test_take() {
let a = 0.1f64.next_up();
let b = 0.2f64.next_up();
let outlier = 3e100f64.next_up();

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::<RealDouble>::new(&[a, b]).encode(&array);

assert!(encoded.left_parts_exceptions().is_some());

let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
.unwrap()
.into_primitive()
.unwrap();

assert_eq!(taken.maybe_null_slice::<f64>(), &[a, outlier]);
}
}
Loading

0 comments on commit c51ecc4

Please sign in to comment.