Skip to content

Commit

Permalink
cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Oct 2, 2024
1 parent 6dfa5b3 commit bc46516
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 148 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion encodings/alp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ readme = { workspace = true }
workspace = true

[dependencies]
fastlanes = { workspace = true }
vortex-fastlanes = { workspace = true }
itertools = { workspace = true }
num-traits = { workspace = true }
Expand All @@ -30,6 +29,7 @@ vortex-scalar = { workspace = true }
[dev-dependencies]
arrow = { workspace = true }
divan = { workspace = true }
rstest = { workspace = true }

[[bench]]
name = "alp_compress"
Expand Down
4 changes: 2 additions & 2 deletions encodings/alp/benches/alp_compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ fn main() {
}

#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
fn compress_alp<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
T::encode(values.as_slice(), None)
}

#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
fn rd_compress<T: ALPRDFloat>(bencher: Bencher, n: usize) {
fn compress_rd<T: ALPRDFloat>(bencher: Bencher, n: usize) {
let values: Vec<T> = vec![T::from(1.23).unwrap(); n];
let primitive = PrimitiveArray::from(values);
let encoder = Encoder::new(&[T::from(1.23).unwrap()]);
Expand Down
83 changes: 34 additions & 49 deletions encodings/alp/src/alp_rd/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,57 +225,42 @@ impl ArrayTrait for ALPRDArray {}

#[cfg(test)]
mod test {
use rstest::rstest;
use vortex::array::PrimitiveArray;
use vortex::{IntoArray, IntoCanonical};

use crate::alp_rd;

macro_rules! n_reals {
($seed:expr, $n:expr) => {
(0..$n)
.scan($seed, |state, _| {
let prev = *state;
*state = state.next_up();
Some(prev)
})
.collect::<Vec<_>>()
};
}

macro_rules! test_encode_nulls_excs_generic {
($typ:ty, $seed:expr) => {{
// Create a vector of 1024 "real" doubles
let reals = n_reals!($seed, 1024);
// Null out some of the values.
let mut reals: Vec<Option<$typ>> = reals.into_iter().map(Some).collect();
reals[1] = None;
reals[5] = None;
reals[90] = None;

// Create a new array from this.
let real_array = PrimitiveArray::from_nullable_vec(reals.clone());

// Pick a seed that we know will trigger lots of exceptions.
let encoder: alp_rd::Encoder = alp_rd::Encoder::new(&[$seed / 100.0]);

let rd_array = encoder.encode(&real_array);

let decoded = rd_array
.into_array()
.into_canonical()
.unwrap()
.into_primitive()
.unwrap();

let maybe_null_reals: Vec<$typ> =
reals.into_iter().map(|v| v.unwrap_or_default()).collect();
assert_eq!(decoded.maybe_null_slice::<$typ>(), &maybe_null_reals);
}};
}

#[test]
fn test_array_encode_with_nulls_and_exceptions() {
test_encode_nulls_excs_generic!(f32, 1.123_848_f32);
test_encode_nulls_excs_generic!(f64, 1.123_848_591_110_992_f64);
use crate::{alp_rd, ALPRDFloat};

#[rstest]
#[case(vec![0.1f32.next_up(); 1024], 1.123_848_f32)]
#[case(vec![0.1f64.next_up(); 1024], 1.123_848_591_110_992_f64)]
fn test_array_encode_with_nulls_and_exceptions<T: ALPRDFloat>(
#[case] reals: Vec<T>,
#[case] seed: T,
) {
assert_eq!(reals.len(), 1024, "test expects 1024-length fixture");
// Null out some of the values.
let mut reals: Vec<Option<T>> = reals.into_iter().map(Some).collect();
reals[1] = None;
reals[5] = None;
reals[900] = None;

// Create a new array from this.
let real_array = PrimitiveArray::from_nullable_vec(reals.clone());

// Pick a seed that we know will trigger lots of exceptions.
let encoder: alp_rd::Encoder = alp_rd::Encoder::new(&[seed.powi(-2)]);

let rd_array = encoder.encode(&real_array);

let decoded = rd_array
.into_array()
.into_canonical()
.unwrap()
.into_primitive()
.unwrap();

let maybe_null_reals: Vec<T> = reals.into_iter().map(|v| v.unwrap_or_default()).collect();
assert_eq!(decoded.maybe_null_slice::<T>(), &maybe_null_reals);
}
}
40 changes: 16 additions & 24 deletions encodings/alp/src/alp_rd/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,36 +25,28 @@ impl FilterFn for ALPRDArray {

#[cfg(test)]
mod test {
use rstest::rstest;
use vortex::array::{BoolArray, PrimitiveArray};
use vortex::compute::filter;
use vortex::IntoArrayVariant;

use crate::Encoder;
use crate::{ALPRDFloat, Encoder};

macro_rules! test_filter_generic {
($typ:ty, $rd:ty) => {
let a: $typ = (0.1 as $typ).next_up();
let b: $typ = (0.2 as $typ).next_up();
let outlier: $typ = (3e25 as $typ).next_up();
#[rstest]
#[case(0.1f32, 0.2f32, 3e25f32)]
#[case(0.1f64, 0.2f64, 3e100f64)]
fn test_filter<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);
// Make sure that we're testing the exception pathway.
assert!(encoded.left_parts_exceptions().is_some());

// Make sure that we're testing the exception pathway.
assert!(encoded.left_parts_exceptions().is_some());

// The first two values need no patching
let filtered = filter(encoded.as_ref(), BoolArray::from(vec![true, false, true]))
.unwrap()
.into_primitive()
.unwrap();
assert_eq!(filtered.maybe_null_slice::<$typ>(), &[a, outlier]);
};
}

#[test]
fn test_filter() {
test_filter_generic!(f32, RealFloat);
test_filter_generic!(f64, RealDouble);
// The first two values need no patching
let filtered = filter(encoded.as_ref(), BoolArray::from(vec![true, false, true]))
.unwrap()
.into_primitive()
.unwrap();
assert_eq!(filtered.maybe_null_slice::<T>(), &[a, outlier]);
}
}
43 changes: 20 additions & 23 deletions encodings/alp/src/alp_rd/compute/scalar_at.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,35 +38,32 @@ impl ScalarAtFn for ALPRDArray {

#[cfg(test)]
mod test {
use rstest::rstest;
use vortex::array::PrimitiveArray;
use vortex::compute::unary::scalar_at;
use vortex_scalar::Scalar;

use crate::Encoder;
use crate::{ALPRDFloat, Encoder};

macro_rules! test_scalar_at_generic {
($typ:ty) => {
let a: $typ = (0.1 as $typ).next_up();
let b: $typ = (0.2 as $typ).next_up();
let outlier: $typ = (3e30 as $typ).next_up();
#[rstest]
#[case(0.1f32, 0.2f32, 3e25f32)]
#[case(0.1f64, 0.2f64, 3e100f64)]
fn test_scalar_at<T: ALPRDFloat + Into<Scalar>>(
#[case] a: T,
#[case] b: T,
#[case] outlier: T,
) {
let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);
// Make sure that we're testing the exception pathway.
assert!(encoded.left_parts_exceptions().is_some());

// Make sure that we're testing the exception pathway.
assert!(encoded.left_parts_exceptions().is_some());
// The first two values need no patching
assert_eq!(scalar_at(encoded.as_ref(), 0).unwrap(), a.into());
assert_eq!(scalar_at(encoded.as_ref(), 1).unwrap(), b.into());

// The first two values need no patching
assert_eq!(scalar_at(encoded.as_ref(), 0).unwrap(), a.into());
assert_eq!(scalar_at(encoded.as_ref(), 1).unwrap(), b.into());

// The right value hits the left_part_exceptions
assert_eq!(scalar_at(encoded.as_ref(), 2).unwrap(), outlier.into());
};
}

#[test]
fn test_scalar_at() {
test_scalar_at_generic!(f32);
test_scalar_at_generic!(f64);
// The right value hits the left_part_exceptions
assert_eq!(scalar_at(encoded.as_ref(), 2).unwrap(), outlier.into());
}
}
36 changes: 14 additions & 22 deletions encodings/alp/src/alp_rd/compute/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,27 @@ impl SliceFn for ALPRDArray {

#[cfg(test)]
mod test {
use rstest::rstest;
use vortex::array::PrimitiveArray;
use vortex::compute::slice;
use vortex::IntoArrayVariant;

use crate::Encoder;
use crate::{ALPRDFloat, Encoder};

macro_rules! test_slice_generic {
($typ:ty) => {
let a: $typ = (0.1 as $typ).next_up();
let b: $typ = (0.2 as $typ).next_up();
let outlier: $typ = (3e30 as $typ).next_up();
#[rstest]
#[case(0.1f32, 0.2f32, 3e25f32)]
#[case(0.1f64, 0.2f64, 3e100f64)]
fn test_slice<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);
assert!(encoded.left_parts_exceptions().is_some());

assert!(encoded.left_parts_exceptions().is_some());
let decoded = slice(encoded.as_ref(), 1, 3)
.unwrap()
.into_primitive()
.unwrap();

let decoded = slice(encoded.as_ref(), 1, 3)
.unwrap()
.into_primitive()
.unwrap();

assert_eq!(decoded.maybe_null_slice::<$typ>(), &[b, outlier]);
};
}

#[test]
fn test_slice() {
test_slice_generic!(f32);
test_slice_generic!(f64);
assert_eq!(decoded.maybe_null_slice::<T>(), &[b, outlier]);
}
}
36 changes: 14 additions & 22 deletions encodings/alp/src/alp_rd/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,27 @@ impl TakeFn for ALPRDArray {

#[cfg(test)]
mod test {
use rstest::rstest;
use vortex::array::PrimitiveArray;
use vortex::compute::take;
use vortex::IntoArrayVariant;

use crate::Encoder;
use crate::{ALPRDFloat, Encoder};

macro_rules! test_take_generic {
($typ:ty) => {
let a: $typ = (0.1 as $typ).next_up();
let b: $typ = (0.2 as $typ).next_up();
let outlier: $typ = (3e30 as $typ).next_up();
#[rstest]
#[case(0.1f32, 0.2f32, 3e25f32)]
#[case(0.1f64, 0.2f64, 3e100f64)]
fn test_take<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);

let array = PrimitiveArray::from(vec![a, b, outlier]);
let encoded = Encoder::new(&[a, b]).encode(&array);
assert!(encoded.left_parts_exceptions().is_some());

assert!(encoded.left_parts_exceptions().is_some());
let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
.unwrap()
.into_primitive()
.unwrap();

let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
.unwrap()
.into_primitive()
.unwrap();

assert_eq!(taken.maybe_null_slice::<$typ>(), &[a, outlier]);
};
}

#[test]
fn test_take() {
test_take_generic!(f32);
test_take_generic!(f64);
assert_eq!(taken.maybe_null_slice::<T>(), &[a, outlier]);
}
}
5 changes: 1 addition & 4 deletions encodings/alp/src/alp_rd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,6 @@ impl Encoder {
// SparseArray for exceptions.
let exceptions = (!exceptions_pos.is_empty()).then(|| {
let max_exc_pos = exceptions_pos.last().copied().unwrap_or_default();
// Add one to get next power of two as well here.
// If we're going to be doing more of this, it just works.
let bw = (max_exc_pos + 1).next_power_of_two().ilog2() as usize;

let exc_pos_array = PrimitiveArray::from(exceptions_pos);
Expand Down Expand Up @@ -256,7 +254,7 @@ pub fn alp_rd_decode<T: ALPRDFloat>(
left_parts_decoded.push(<T as ALPRDFloat>::from_u16(dict[*code as usize]));
}

// Apply the exception patches. Only applies for the left-parts
// Apply the exception patches to left_parts
for (pos, val) in exc_pos.iter().zip(exceptions.iter()) {
left_parts_decoded[*pos as usize] = <T as ALPRDFloat>::from_u16(*val);
}
Expand Down Expand Up @@ -364,7 +362,6 @@ fn estimate_compression_size(
struct ALPRDDictionary {
/// Items in the dictionary are bit patterns, along with their 16-bit encoding.
dictionary: HashMap<u16, u16>,
/// Recreate the dictionary by encoding the hash instead.
/// The (compressed) left bit width. This is after bit-packing the dictionary codes.
left_bit_width: u8,
/// The right bit width. This is the bit-packed width of each of the "real double" values.
Expand Down

0 comments on commit bc46516

Please sign in to comment.