cleanups

spiraldb · Oct 2, 2024 · bc46516 · bc46516
1 parent 6dfa5b3
commit bc46516
Show file tree

Hide file tree

Showing 9 changed files with 103 additions and 148 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/encodings/alp/Cargo.toml b/encodings/alp/Cargo.toml
@@ -17,7 +17,6 @@ readme = { workspace = true }
 workspace = true
 
 [dependencies]
-fastlanes = { workspace = true }
 vortex-fastlanes = { workspace = true }
 itertools = { workspace = true }
 num-traits = { workspace = true }
@@ -30,6 +29,7 @@ vortex-scalar = { workspace = true }
 [dev-dependencies]
 arrow = { workspace = true }
 divan = { workspace = true }
+rstest = { workspace = true }
 
 [[bench]]
 name = "alp_compress"

diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs
@@ -15,13 +15,13 @@ fn main() {
 }
 
 #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
-fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
+fn compress_alp<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
     let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
     T::encode(values.as_slice(), None)
 }
 
 #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
-fn rd_compress<T: ALPRDFloat>(bencher: Bencher, n: usize) {
+fn compress_rd<T: ALPRDFloat>(bencher: Bencher, n: usize) {
     let values: Vec<T> = vec![T::from(1.23).unwrap(); n];
     let primitive = PrimitiveArray::from(values);
     let encoder = Encoder::new(&[T::from(1.23).unwrap()]);

diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs
@@ -225,57 +225,42 @@ impl ArrayTrait for ALPRDArray {}
 
 #[cfg(test)]
 mod test {
+    use rstest::rstest;
     use vortex::array::PrimitiveArray;
     use vortex::{IntoArray, IntoCanonical};
 
-    use crate::alp_rd;
-
-    macro_rules! n_reals {
-        ($seed:expr, $n:expr) => {
-            (0..$n)
-                .scan($seed, |state, _| {
-                    let prev = *state;
-                    *state = state.next_up();
-                    Some(prev)
-                })
-                .collect::<Vec<_>>()
-        };
-    }
-
-    macro_rules! test_encode_nulls_excs_generic {
-        ($typ:ty, $seed:expr) => {{
-            // Create a vector of 1024 "real" doubles
-            let reals = n_reals!($seed, 1024);
-            // Null out some of the values.
-            let mut reals: Vec<Option<$typ>> = reals.into_iter().map(Some).collect();
-            reals[1] = None;
-            reals[5] = None;
-            reals[90] = None;
-
-            // Create a new array from this.
-            let real_array = PrimitiveArray::from_nullable_vec(reals.clone());
-
-            // Pick a seed that we know will trigger lots of exceptions.
-            let encoder: alp_rd::Encoder = alp_rd::Encoder::new(&[$seed / 100.0]);
-
-            let rd_array = encoder.encode(&real_array);
-
-            let decoded = rd_array
-                .into_array()
-                .into_canonical()
-                .unwrap()
-                .into_primitive()
-                .unwrap();
-
-            let maybe_null_reals: Vec<$typ> =
-                reals.into_iter().map(|v| v.unwrap_or_default()).collect();
-            assert_eq!(decoded.maybe_null_slice::<$typ>(), &maybe_null_reals);
-        }};
-    }
-
-    #[test]
-    fn test_array_encode_with_nulls_and_exceptions() {
-        test_encode_nulls_excs_generic!(f32, 1.123_848_f32);
-        test_encode_nulls_excs_generic!(f64, 1.123_848_591_110_992_f64);
+    use crate::{alp_rd, ALPRDFloat};
+
+    #[rstest]
+    #[case(vec![0.1f32.next_up(); 1024], 1.123_848_f32)]
+    #[case(vec![0.1f64.next_up(); 1024], 1.123_848_591_110_992_f64)]
+    fn test_array_encode_with_nulls_and_exceptions<T: ALPRDFloat>(
+        #[case] reals: Vec<T>,
+        #[case] seed: T,
+    ) {
+        assert_eq!(reals.len(), 1024, "test expects 1024-length fixture");
+        // Null out some of the values.
+        let mut reals: Vec<Option<T>> = reals.into_iter().map(Some).collect();
+        reals[1] = None;
+        reals[5] = None;
+        reals[900] = None;
+
+        // Create a new array from this.
+        let real_array = PrimitiveArray::from_nullable_vec(reals.clone());
+
+        // Pick a seed that we know will trigger lots of exceptions.
+        let encoder: alp_rd::Encoder = alp_rd::Encoder::new(&[seed.powi(-2)]);
+
+        let rd_array = encoder.encode(&real_array);
+
+        let decoded = rd_array
+            .into_array()
+            .into_canonical()
+            .unwrap()
+            .into_primitive()
+            .unwrap();
+
+        let maybe_null_reals: Vec<T> = reals.into_iter().map(|v| v.unwrap_or_default()).collect();
+        assert_eq!(decoded.maybe_null_slice::<T>(), &maybe_null_reals);
     }
 }
diff --git a/encodings/alp/src/alp_rd/compute/filter.rs b/encodings/alp/src/alp_rd/compute/filter.rs
@@ -25,36 +25,28 @@ impl FilterFn for ALPRDArray {
 
 #[cfg(test)]
 mod test {
+    use rstest::rstest;
     use vortex::array::{BoolArray, PrimitiveArray};
     use vortex::compute::filter;
     use vortex::IntoArrayVariant;
 
-    use crate::Encoder;
+    use crate::{ALPRDFloat, Encoder};
 
-    macro_rules! test_filter_generic {
-        ($typ:ty, $rd:ty) => {
-            let a: $typ = (0.1 as $typ).next_up();
-            let b: $typ = (0.2 as $typ).next_up();
-            let outlier: $typ = (3e25 as $typ).next_up();
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_filter<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = Encoder::new(&[a, b]).encode(&array);
 
-            let array = PrimitiveArray::from(vec![a, b, outlier]);
-            let encoded = Encoder::new(&[a, b]).encode(&array);
+        // Make sure that we're testing the exception pathway.
+        assert!(encoded.left_parts_exceptions().is_some());
 
-            // Make sure that we're testing the exception pathway.
-            assert!(encoded.left_parts_exceptions().is_some());
-
-            // The first two values need no patching
-            let filtered = filter(encoded.as_ref(), BoolArray::from(vec![true, false, true]))
-                .unwrap()
-                .into_primitive()
-                .unwrap();
-            assert_eq!(filtered.maybe_null_slice::<$typ>(), &[a, outlier]);
-        };
-    }
-
-    #[test]
-    fn test_filter() {
-        test_filter_generic!(f32, RealFloat);
-        test_filter_generic!(f64, RealDouble);
+        // The first two values need no patching
+        let filtered = filter(encoded.as_ref(), BoolArray::from(vec![true, false, true]))
+            .unwrap()
+            .into_primitive()
+            .unwrap();
+        assert_eq!(filtered.maybe_null_slice::<T>(), &[a, outlier]);
     }
 }
diff --git a/encodings/alp/src/alp_rd/compute/scalar_at.rs b/encodings/alp/src/alp_rd/compute/scalar_at.rs
@@ -38,35 +38,32 @@ impl ScalarAtFn for ALPRDArray {
 
 #[cfg(test)]
 mod test {
+    use rstest::rstest;
     use vortex::array::PrimitiveArray;
     use vortex::compute::unary::scalar_at;
+    use vortex_scalar::Scalar;
 
-    use crate::Encoder;
+    use crate::{ALPRDFloat, Encoder};
 
-    macro_rules! test_scalar_at_generic {
-        ($typ:ty) => {
-            let a: $typ = (0.1 as $typ).next_up();
-            let b: $typ = (0.2 as $typ).next_up();
-            let outlier: $typ = (3e30 as $typ).next_up();
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_scalar_at<T: ALPRDFloat + Into<Scalar>>(
+        #[case] a: T,
+        #[case] b: T,
+        #[case] outlier: T,
+    ) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = Encoder::new(&[a, b]).encode(&array);
 
-            let array = PrimitiveArray::from(vec![a, b, outlier]);
-            let encoded = Encoder::new(&[a, b]).encode(&array);
+        // Make sure that we're testing the exception pathway.
+        assert!(encoded.left_parts_exceptions().is_some());
 
-            // Make sure that we're testing the exception pathway.
-            assert!(encoded.left_parts_exceptions().is_some());
+        // The first two values need no patching
+        assert_eq!(scalar_at(encoded.as_ref(), 0).unwrap(), a.into());
+        assert_eq!(scalar_at(encoded.as_ref(), 1).unwrap(), b.into());
 
-            // The first two values need no patching
-            assert_eq!(scalar_at(encoded.as_ref(), 0).unwrap(), a.into());
-            assert_eq!(scalar_at(encoded.as_ref(), 1).unwrap(), b.into());
-
-            // The right value hits the left_part_exceptions
-            assert_eq!(scalar_at(encoded.as_ref(), 2).unwrap(), outlier.into());
-        };
-    }
-
-    #[test]
-    fn test_scalar_at() {
-        test_scalar_at_generic!(f32);
-        test_scalar_at_generic!(f64);
+        // The right value hits the left_part_exceptions
+        assert_eq!(scalar_at(encoded.as_ref(), 2).unwrap(), outlier.into());
     }
 }
diff --git a/encodings/alp/src/alp_rd/compute/slice.rs b/encodings/alp/src/alp_rd/compute/slice.rs
@@ -25,35 +25,27 @@ impl SliceFn for ALPRDArray {
 
 #[cfg(test)]
 mod test {
+    use rstest::rstest;
     use vortex::array::PrimitiveArray;
     use vortex::compute::slice;
     use vortex::IntoArrayVariant;
 
-    use crate::Encoder;
+    use crate::{ALPRDFloat, Encoder};
 
-    macro_rules! test_slice_generic {
-        ($typ:ty) => {
-            let a: $typ = (0.1 as $typ).next_up();
-            let b: $typ = (0.2 as $typ).next_up();
-            let outlier: $typ = (3e30 as $typ).next_up();
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_slice<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = Encoder::new(&[a, b]).encode(&array);
 
-            let array = PrimitiveArray::from(vec![a, b, outlier]);
-            let encoded = Encoder::new(&[a, b]).encode(&array);
+        assert!(encoded.left_parts_exceptions().is_some());
 
-            assert!(encoded.left_parts_exceptions().is_some());
+        let decoded = slice(encoded.as_ref(), 1, 3)
+            .unwrap()
+            .into_primitive()
+            .unwrap();
 
-            let decoded = slice(encoded.as_ref(), 1, 3)
-                .unwrap()
-                .into_primitive()
-                .unwrap();
-
-            assert_eq!(decoded.maybe_null_slice::<$typ>(), &[b, outlier]);
-        };
-    }
-
-    #[test]
-    fn test_slice() {
-        test_slice_generic!(f32);
-        test_slice_generic!(f64);
+        assert_eq!(decoded.maybe_null_slice::<T>(), &[b, outlier]);
     }
 }
diff --git a/encodings/alp/src/alp_rd/compute/take.rs b/encodings/alp/src/alp_rd/compute/take.rs
@@ -25,35 +25,27 @@ impl TakeFn for ALPRDArray {
 
 #[cfg(test)]
 mod test {
+    use rstest::rstest;
     use vortex::array::PrimitiveArray;
     use vortex::compute::take;
     use vortex::IntoArrayVariant;
 
-    use crate::Encoder;
+    use crate::{ALPRDFloat, Encoder};
 
-    macro_rules! test_take_generic {
-        ($typ:ty) => {
-            let a: $typ = (0.1 as $typ).next_up();
-            let b: $typ = (0.2 as $typ).next_up();
-            let outlier: $typ = (3e30 as $typ).next_up();
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_take<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = Encoder::new(&[a, b]).encode(&array);
 
-            let array = PrimitiveArray::from(vec![a, b, outlier]);
-            let encoded = Encoder::new(&[a, b]).encode(&array);
+        assert!(encoded.left_parts_exceptions().is_some());
 
-            assert!(encoded.left_parts_exceptions().is_some());
+        let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
+            .unwrap()
+            .into_primitive()
+            .unwrap();
 
-            let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
-                .unwrap()
-                .into_primitive()
-                .unwrap();
-
-            assert_eq!(taken.maybe_null_slice::<$typ>(), &[a, outlier]);
-        };
-    }
-
-    #[test]
-    fn test_take() {
-        test_take_generic!(f32);
-        test_take_generic!(f64);
+        assert_eq!(taken.maybe_null_slice::<T>(), &[a, outlier]);
     }
 }
diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs
@@ -196,8 +196,6 @@ impl Encoder {
         // SparseArray for exceptions.
         let exceptions = (!exceptions_pos.is_empty()).then(|| {
             let max_exc_pos = exceptions_pos.last().copied().unwrap_or_default();
-            // Add one to get next power of two as well here.
-            // If we're going to be doing more of this, it just works.
             let bw = (max_exc_pos + 1).next_power_of_two().ilog2() as usize;
 
             let exc_pos_array = PrimitiveArray::from(exceptions_pos);
@@ -256,7 +254,7 @@ pub fn alp_rd_decode<T: ALPRDFloat>(
         left_parts_decoded.push(<T as ALPRDFloat>::from_u16(dict[*code as usize]));
     }
 
-    // Apply the exception patches. Only applies for the left-parts
+    // Apply the exception patches to left_parts
     for (pos, val) in exc_pos.iter().zip(exceptions.iter()) {
         left_parts_decoded[*pos as usize] = <T as ALPRDFloat>::from_u16(*val);
     }
@@ -364,7 +362,6 @@ fn estimate_compression_size(
 struct ALPRDDictionary {
     /// Items in the dictionary are bit patterns, along with their 16-bit encoding.
     dictionary: HashMap<u16, u16>,
-    /// Recreate the dictionary by encoding the hash instead.
     /// The (compressed) left bit width. This is after bit-packing the dictionary codes.
     left_bit_width: u8,
     /// The right bit width. This is the bit-packed width of each of the "real double" values.