feat: implement ALP-RD compression (#947)

Fixes #10: Add ALP-RD compression. Currently our only floating point compression algorithm is standard ALP, which targets floats/doubles that are originally decimal, and thus have some natural integer they can round to when you undo the exponent. For science/math datasets, there are a lot of "real doubles", i.e. floating point numbers that use most/all of their available precision. These do not compress with standard ALP. The ALP paper authors had a solution for this called "ALP for 'Real' Doubles" / ALP-RD, which is implemented in this PR. ## Basics The key insight of ALP-RD is that even for dense floating point numbers, within a column they often share the front bits (exponent + first few bits of mantissa). We try and find the best cut-point within the leftmost 16-bits. There are generally a small number of unique values for the leftmost bits, so you can create a dictionary of fixed size (here we use the choice of 8 from the C++ implementation) which naturally bit-packs down to 3 bits. If you compress perfectly without exceptions, you can store 53 bits/value ~17% compression. In practice the amount varies. In the comments below you can see a test with the POI dataset referenced in the ALP paper, and we replicate their results of 55 and 56 bits/value respectively. ## List of changes * Reorganized the `vortex-alp` crate. I created two top-level modules, `alp` and alp_rd`, and moved the previous implementation into the `alp` module * Added new `ALPRDArray` in the `alp_rd` module. It supports both f32 and f64, and all major compute functions are implemented (save for `MaybeCompareFn` and the Accessors I will file an issue to implement these in a FLUP if alright, this PR is already quite large) * Added corresponding `ALPRDCompressor` and wired the CompressorRef everywhere I could find ALPCompressor * New benchmark for RD compression in the existing ALP benchmarks suite
spiraldb · Oct 2, 2024 · 389e6a4 · 389e6a4
1 parent 251d3ed
commit 389e6a4
Show file tree

Hide file tree

Showing 24 changed files with 1,101 additions and 14 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs
@@ -20,6 +20,7 @@ use vortex::{Array, Context, IntoArray};
 use vortex_dtype::DType;
 use vortex_fastlanes::DeltaEncoding;
 use vortex_sampling_compressor::compressors::alp::ALPCompressor;
+use vortex_sampling_compressor::compressors::alp_rd::ALPRDCompressor;
 use vortex_sampling_compressor::compressors::bitpacked::BitPackedCompressor;
 use vortex_sampling_compressor::compressors::date_time_parts::DateTimePartsCompressor;
 use vortex_sampling_compressor::compressors::dict::DictCompressor;
@@ -54,6 +55,7 @@ lazy_static! {
 lazy_static! {
     pub static ref COMPRESSORS: HashSet<CompressorRef<'static>> = [
         &ALPCompressor as CompressorRef<'static>,
+        &ALPRDCompressor,
         &DictCompressor,
         &BitPackedCompressor,
         &FoRCompressor,

diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs
@@ -89,7 +89,7 @@ pub async fn rewrite_parquet_as_vortex<W: VortexWrite>(
     Ok(())
 }
 
-pub fn read_parquet_to_vortex(parquet_path: &Path) -> VortexResult<ChunkedArray> {
+pub fn read_parquet_to_vortex<P: AsRef<Path>>(parquet_path: P) -> VortexResult<ChunkedArray> {
     let taxi_pq = File::open(parquet_path)?;
     let builder = ParquetRecordBatchReaderBuilder::try_new(taxi_pq)?;
     // FIXME(ngates): #157 the compressor should handle batch size.

diff --git a/encodings/alp/Cargo.toml b/encodings/alp/Cargo.toml
@@ -17,6 +17,7 @@ readme = { workspace = true }
 workspace = true
 
 [dependencies]
+vortex-fastlanes = { workspace = true }
 itertools = { workspace = true }
 num-traits = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
@@ -28,6 +29,7 @@ vortex-scalar = { workspace = true }
 [dev-dependencies]
 arrow = { workspace = true }
 divan = { workspace = true }
+rstest = { workspace = true }
 
 [[bench]]
 name = "alp_compress"

diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs
@@ -7,19 +7,28 @@ use vortex::array::PrimitiveArray;
 use vortex::validity::Validity;
 use vortex::variants::PrimitiveArrayTrait;
 use vortex::IntoCanonical;
-use vortex_alp::{alp_encode_components, ALPArray, ALPFloat, Exponents};
+use vortex_alp::{alp_encode_components, ALPArray, ALPFloat, ALPRDFloat, Exponents, RDEncoder};
 use vortex_dtype::NativePType;
 
 fn main() {
     divan::main();
 }
 
 #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
-fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
+fn compress_alp<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
     let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
     T::encode(values.as_slice(), None)
 }
 
+#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
+fn compress_rd<T: ALPRDFloat>(bencher: Bencher, n: usize) {
+    let values: Vec<T> = vec![T::from(1.23).unwrap(); n];
+    let primitive = PrimitiveArray::from(values);
+    let encoder = RDEncoder::new(&[T::from(1.23).unwrap()]);
+
+    bencher.bench_local(|| encoder.encode(&primitive));
+}
+
 #[divan::bench(types = [f32, f64], args = [100_000, 1_000_000, 10_000_000])]
 fn alp_iter<T>(bencher: Bencher, n: usize)
 where

diff --git a/encodings/alp/src/array.rs → encodings/alp/src/alp/array.rs b/encodings/alp/src/array.rs → encodings/alp/src/alp/array.rs
@@ -15,8 +15,7 @@ use vortex::{
 use vortex_dtype::{DType, PType};
 use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult};
 
-use crate::alp::Exponents;
-use crate::compress::{alp_encode, decompress};
+use crate::alp::{alp_encode, decompress, Exponents};
 use crate::ALPFloat;
 
 impl_encoding!("vortex.alp", ids::ALP, ALP);

diff --git a/encodings/alp/src/compress.rs → encodings/alp/src/alp/compress.rs b/encodings/alp/src/compress.rs → encodings/alp/src/alp/compress.rs
@@ -5,8 +5,7 @@ use vortex_dtype::{NativePType, PType};
 use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
 use vortex_scalar::ScalarValue;
 
-use crate::alp::ALPFloat;
-use crate::array::ALPArray;
+use crate::alp::{ALPArray, ALPFloat};
 use crate::Exponents;
 
 #[macro_export]

diff --git a/encodings/alp/src/compute.rs → encodings/alp/src/alp/compute.rs b/encodings/alp/src/compute.rs → encodings/alp/src/alp/compute.rs
diff --git a/encodings/alp/src/alp.rs → encodings/alp/src/alp/mod.rs b/encodings/alp/src/alp.rs → encodings/alp/src/alp/mod.rs
@@ -5,6 +5,13 @@ use itertools::Itertools;
 use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
 use serde::{Deserialize, Serialize};
 
+mod array;
+mod compress;
+mod compute;
+
+pub use array::*;
+pub use compress::*;
+
 const SAMPLE_SIZE: usize = 32;
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -19,7 +26,14 @@ impl Display for Exponents {
     }
 }
 
-pub trait ALPFloat: Float + Display + 'static {
+mod private {
+    pub trait Sealed {}
+
+    impl Sealed for f32 {}
+    impl Sealed for f64 {}
+}
+
+pub trait ALPFloat: private::Sealed + Float + Display + 'static {
     type ALPInt: PrimInt + Display + ToPrimitive;
 
     const FRACTIONAL_BITS: u8;