From 389e6a40f210c2423b8226b207a7b8b85b3950db Mon Sep 17 00:00:00 2001
From: Andrew Duffy <a10y@users.noreply.github.com>
Date: Wed, 2 Oct 2024 12:07:21 -0400
Subject: [PATCH] feat: implement ALP-RD compression (#947)

Fixes #10: Add ALP-RD compression.

Currently our only floating point compression algorithm is standard ALP,
which targets floats/doubles that are originally decimal, and thus have
some natural integer they can round to when you undo the exponent.

For science/math datasets, there are a lot of "real doubles", i.e.
floating point numbers that use most/all of their available precision.
These do not compress with standard ALP. The ALP paper authors had a
solution for this called "ALP for 'Real' Doubles" / ALP-RD, which is
implemented in this PR.

## Basics

The key insight of ALP-RD is that even for dense floating point numbers,
within a column they often share the front bits (exponent + first few
bits of mantissa). We try and find the best cut-point within the
leftmost 16-bits.

There are generally a small number of unique values for the leftmost
bits, so you can create a dictionary of fixed size (here we use the
choice of 8 from the C++ implementation) which naturally bit-packs down
to 3 bits. If you compress perfectly without exceptions, you can store
53 bits/value ~17% compression. In practice the amount varies. In the
comments below you can see a test with the POI dataset referenced in the
ALP paper, and we replicate their results of 55 and 56 bits/value
respectively.

## List of changes

* Reorganized the `vortex-alp` crate. I created two top-level modules,
`alp` and alp_rd`, and moved the previous implementation into the `alp`
module
* Added new `ALPRDArray` in the `alp_rd` module. It supports both f32
and f64, and all major compute functions are implemented (save for
`MaybeCompareFn` and the Accessors I will file an issue to implement
these in a FLUP if alright, this PR is already quite large)
* Added corresponding `ALPRDCompressor` and wired the CompressorRef
everywhere I could find ALPCompressor
* New benchmark for RD compression in the existing ALP benchmarks suite
---
 Cargo.lock                                    |   2 +
 bench-vortex/src/lib.rs                       |   2 +
 bench-vortex/src/reader.rs                    |   2 +-
 encodings/alp/Cargo.toml                      |   2 +
 encodings/alp/benches/alp_compress.rs         |  13 +-
 encodings/alp/src/{ => alp}/array.rs          |   3 +-
 encodings/alp/src/{ => alp}/compress.rs       |   3 +-
 encodings/alp/src/{ => alp}/compute.rs        |   0
 encodings/alp/src/{alp.rs => alp/mod.rs}      |  16 +-
 encodings/alp/src/alp_rd/array.rs             | 270 ++++++++++++
 encodings/alp/src/alp_rd/compute/filter.rs    |  52 +++
 encodings/alp/src/alp_rd/compute/mod.rs       |  27 ++
 encodings/alp/src/alp_rd/compute/scalar_at.rs |  68 +++
 encodings/alp/src/alp_rd/compute/slice.rs     |  51 +++
 encodings/alp/src/alp_rd/compute/take.rs      |  51 +++
 encodings/alp/src/alp_rd/mod.rs               | 404 ++++++++++++++++++
 encodings/alp/src/alp_rd/variants.rs          |  15 +
 encodings/alp/src/lib.rs                      |  24 +-
 .../fastlanes/src/bitpacking/compress.rs      |  24 ++
 vortex-array/src/encoding.rs                  |   1 +
 .../src/compressors/alp_rd.rs                 |  78 ++++
 .../src/compressors/mod.rs                    |   1 +
 vortex-sampling-compressor/src/lib.rs         |   4 +-
 vortex-sampling-compressor/tests/smoketest.rs |   2 +
 24 files changed, 1101 insertions(+), 14 deletions(-)
 rename encodings/alp/src/{ => alp}/array.rs (99%)
 rename encodings/alp/src/{ => alp}/compress.rs (99%)
 rename encodings/alp/src/{ => alp}/compute.rs (100%)
 rename encodings/alp/src/{alp.rs => alp/mod.rs} (97%)
 create mode 100644 encodings/alp/src/alp_rd/array.rs
 create mode 100644 encodings/alp/src/alp_rd/compute/filter.rs
 create mode 100644 encodings/alp/src/alp_rd/compute/mod.rs
 create mode 100644 encodings/alp/src/alp_rd/compute/scalar_at.rs
 create mode 100644 encodings/alp/src/alp_rd/compute/slice.rs
 create mode 100644 encodings/alp/src/alp_rd/compute/take.rs
 create mode 100644 encodings/alp/src/alp_rd/mod.rs
 create mode 100644 encodings/alp/src/alp_rd/variants.rs
 create mode 100644 vortex-sampling-compressor/src/compressors/alp_rd.rs

diff --git a/Cargo.lock b/Cargo.lock
index 4ea23ee2a3..3cc43cab3c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4280,10 +4280,12 @@ dependencies = [
  "divan",
  "itertools 0.13.0",
  "num-traits",
+ "rstest",
  "serde",
  "vortex-array",
  "vortex-dtype",
  "vortex-error",
+ "vortex-fastlanes",
  "vortex-scalar",
 ]
 
diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs
index 04f8b364db..2dc8d871e6 100644
--- a/bench-vortex/src/lib.rs
+++ b/bench-vortex/src/lib.rs
@@ -20,6 +20,7 @@ use vortex::{Array, Context, IntoArray};
 use vortex_dtype::DType;
 use vortex_fastlanes::DeltaEncoding;
 use vortex_sampling_compressor::compressors::alp::ALPCompressor;
+use vortex_sampling_compressor::compressors::alp_rd::ALPRDCompressor;
 use vortex_sampling_compressor::compressors::bitpacked::BitPackedCompressor;
 use vortex_sampling_compressor::compressors::date_time_parts::DateTimePartsCompressor;
 use vortex_sampling_compressor::compressors::dict::DictCompressor;
@@ -54,6 +55,7 @@ lazy_static! {
 lazy_static! {
     pub static ref COMPRESSORS: HashSet<CompressorRef<'static>> = [
         &ALPCompressor as CompressorRef<'static>,
+        &ALPRDCompressor,
         &DictCompressor,
         &BitPackedCompressor,
         &FoRCompressor,
diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs
index 77e6dcca08..23eb7c2bfd 100644
--- a/bench-vortex/src/reader.rs
+++ b/bench-vortex/src/reader.rs
@@ -89,7 +89,7 @@ pub async fn rewrite_parquet_as_vortex<W: VortexWrite>(
     Ok(())
 }
 
-pub fn read_parquet_to_vortex(parquet_path: &Path) -> VortexResult<ChunkedArray> {
+pub fn read_parquet_to_vortex<P: AsRef<Path>>(parquet_path: P) -> VortexResult<ChunkedArray> {
     let taxi_pq = File::open(parquet_path)?;
     let builder = ParquetRecordBatchReaderBuilder::try_new(taxi_pq)?;
     // FIXME(ngates): #157 the compressor should handle batch size.
diff --git a/encodings/alp/Cargo.toml b/encodings/alp/Cargo.toml
index 8ea9a66e2b..1e1f502e51 100644
--- a/encodings/alp/Cargo.toml
+++ b/encodings/alp/Cargo.toml
@@ -17,6 +17,7 @@ readme = { workspace = true }
 workspace = true
 
 [dependencies]
+vortex-fastlanes = { workspace = true }
 itertools = { workspace = true }
 num-traits = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
@@ -28,6 +29,7 @@ vortex-scalar = { workspace = true }
 [dev-dependencies]
 arrow = { workspace = true }
 divan = { workspace = true }
+rstest = { workspace = true }
 
 [[bench]]
 name = "alp_compress"
diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs
index d88728557f..c571de4018 100644
--- a/encodings/alp/benches/alp_compress.rs
+++ b/encodings/alp/benches/alp_compress.rs
@@ -7,7 +7,7 @@ use vortex::array::PrimitiveArray;
 use vortex::validity::Validity;
 use vortex::variants::PrimitiveArrayTrait;
 use vortex::IntoCanonical;
-use vortex_alp::{alp_encode_components, ALPArray, ALPFloat, Exponents};
+use vortex_alp::{alp_encode_components, ALPArray, ALPFloat, ALPRDFloat, Exponents, RDEncoder};
 use vortex_dtype::NativePType;
 
 fn main() {
@@ -15,11 +15,20 @@ fn main() {
 }
 
 #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
-fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
+fn compress_alp<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
     let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
     T::encode(values.as_slice(), None)
 }
 
+#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
+fn compress_rd<T: ALPRDFloat>(bencher: Bencher, n: usize) {
+    let values: Vec<T> = vec![T::from(1.23).unwrap(); n];
+    let primitive = PrimitiveArray::from(values);
+    let encoder = RDEncoder::new(&[T::from(1.23).unwrap()]);
+
+    bencher.bench_local(|| encoder.encode(&primitive));
+}
+
 #[divan::bench(types = [f32, f64], args = [100_000, 1_000_000, 10_000_000])]
 fn alp_iter<T>(bencher: Bencher, n: usize)
 where
diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/alp/array.rs
similarity index 99%
rename from encodings/alp/src/array.rs
rename to encodings/alp/src/alp/array.rs
index d14fc41407..4717431557 100644
--- a/encodings/alp/src/array.rs
+++ b/encodings/alp/src/alp/array.rs
@@ -15,8 +15,7 @@ use vortex::{
 use vortex_dtype::{DType, PType};
 use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult};
 
-use crate::alp::Exponents;
-use crate::compress::{alp_encode, decompress};
+use crate::alp::{alp_encode, decompress, Exponents};
 use crate::ALPFloat;
 
 impl_encoding!("vortex.alp", ids::ALP, ALP);
diff --git a/encodings/alp/src/compress.rs b/encodings/alp/src/alp/compress.rs
similarity index 99%
rename from encodings/alp/src/compress.rs
rename to encodings/alp/src/alp/compress.rs
index 12e8388fa9..d0c0aaad96 100644
--- a/encodings/alp/src/compress.rs
+++ b/encodings/alp/src/alp/compress.rs
@@ -5,8 +5,7 @@ use vortex_dtype::{NativePType, PType};
 use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
 use vortex_scalar::ScalarValue;
 
-use crate::alp::ALPFloat;
-use crate::array::ALPArray;
+use crate::alp::{ALPArray, ALPFloat};
 use crate::Exponents;
 
 #[macro_export]
diff --git a/encodings/alp/src/compute.rs b/encodings/alp/src/alp/compute.rs
similarity index 100%
rename from encodings/alp/src/compute.rs
rename to encodings/alp/src/alp/compute.rs
diff --git a/encodings/alp/src/alp.rs b/encodings/alp/src/alp/mod.rs
similarity index 97%
rename from encodings/alp/src/alp.rs
rename to encodings/alp/src/alp/mod.rs
index 710b8b25b3..7297fc0497 100644
--- a/encodings/alp/src/alp.rs
+++ b/encodings/alp/src/alp/mod.rs
@@ -5,6 +5,13 @@ use itertools::Itertools;
 use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
 use serde::{Deserialize, Serialize};
 
+mod array;
+mod compress;
+mod compute;
+
+pub use array::*;
+pub use compress::*;
+
 const SAMPLE_SIZE: usize = 32;
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -19,7 +26,14 @@ impl Display for Exponents {
     }
 }
 
-pub trait ALPFloat: Float + Display + 'static {
+mod private {
+    pub trait Sealed {}
+
+    impl Sealed for f32 {}
+    impl Sealed for f64 {}
+}
+
+pub trait ALPFloat: private::Sealed + Float + Display + 'static {
     type ALPInt: PrimInt + Display + ToPrimitive;
 
     const FRACTIONAL_BITS: u8;
diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs
new file mode 100644
index 0000000000..d942a104ee
--- /dev/null
+++ b/encodings/alp/src/alp_rd/array.rs
@@ -0,0 +1,270 @@
+use serde::{Deserialize, Serialize};
+use vortex::array::{PrimitiveArray, SparseArray};
+use vortex::encoding::ids;
+use vortex::stats::{ArrayStatisticsCompute, StatsSet};
+use vortex::validity::{ArrayValidity, LogicalValidity};
+use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor};
+use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical};
+use vortex_dtype::{DType, PType};
+use vortex_error::{vortex_bail, VortexExpect, VortexResult};
+
+use crate::alp_rd::alp_rd_decode;
+
+impl_encoding!("vortex.alprd", ids::ALP_RD, ALPRD);
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ALPRDMetadata {
+    right_bit_width: u8,
+    dict_len: u8,
+    dict: [u16; 8],
+    left_parts_dtype: DType,
+    has_exceptions: bool,
+}
+
+impl ALPRDArray {
+    pub fn try_new(
+        dtype: DType,
+        left_parts: Array,
+        left_parts_dict: impl AsRef<[u16]>,
+        right_parts: Array,
+        right_bit_width: u8,
+        left_parts_exceptions: Option<Array>,
+    ) -> VortexResult<Self> {
+        if !dtype.is_float() {
+            vortex_bail!("ALPRDArray given invalid DType ({dtype})");
+        }
+
+        if left_parts.len() != right_parts.len() {
+            vortex_bail!("left_parts and right_parts must be of same length");
+        }
+
+        let len = left_parts.len();
+
+        if !left_parts.dtype().is_unsigned_int() {
+            vortex_bail!("left_parts dtype must be uint");
+        }
+
+        let left_parts_dtype = left_parts.dtype().clone();
+
+        if !right_parts.dtype().is_unsigned_int() {
+            vortex_bail!("right_parts dtype must be uint");
+        }
+
+        let mut children = vec![left_parts, right_parts];
+        let has_exceptions = left_parts_exceptions.is_some();
+
+        if let Some(exceptions) = left_parts_exceptions {
+            // Enforce that the exceptions are SparseArray so that we have access to indices and values.
+            if exceptions.encoding().id().code() != ids::SPARSE {
+                vortex_bail!("left_parts_exceptions must be SparseArray encoded");
+            }
+            children.push(exceptions);
+        }
+
+        let mut dict = [0u16; 8];
+        for (idx, v) in left_parts_dict.as_ref().iter().enumerate() {
+            dict[idx] = *v;
+        }
+
+        Self::try_from_parts(
+            dtype,
+            len,
+            ALPRDMetadata {
+                right_bit_width,
+                dict_len: left_parts_dict.as_ref().len() as u8,
+                dict,
+                left_parts_dtype,
+                has_exceptions,
+            },
+            children.into(),
+            StatsSet::new(),
+        )
+    }
+
+    /// Returns true if logical type of the array values is f32.
+    ///
+    /// Returns false if the logical type of the array values is f64.
+    #[inline]
+    pub fn is_f32(&self) -> bool {
+        PType::try_from(self.dtype()).vortex_expect("ALPRDArray must have primitive type")
+            == PType::F32
+    }
+
+    /// The leftmost (most significant) bits of the floating point values stored in the array.
+    ///
+    /// These are bit-packed and dictionary encoded, and cannot directly be interpreted without
+    /// the metadata of this array.
+    pub fn left_parts(&self) -> Array {
+        self.as_ref()
+            .child(0, &self.metadata().left_parts_dtype, self.len())
+            .vortex_expect("ALPRDArray: left_parts child")
+    }
+
+    /// The rightmost (least significant) bits of the floating point values stored in the array.
+    pub fn right_parts(&self) -> Array {
+        let uint_ptype = if self.is_f32() {
+            PType::U32
+        } else {
+            PType::U64
+        };
+
+        self.as_ref()
+            .child(
+                1,
+                &DType::Primitive(uint_ptype, self.metadata().left_parts_dtype.nullability()),
+                self.len(),
+            )
+            .vortex_expect("ALPRDArray: right_parts child")
+    }
+
+    /// Patches of left-most bits.
+    pub fn left_parts_exceptions(&self) -> Option<Array> {
+        self.metadata().has_exceptions.then(|| {
+            self.as_ref()
+                .child(
+                    2,
+                    &self.metadata().left_parts_dtype.as_nullable(),
+                    self.len(),
+                )
+                .vortex_expect("ALPRDArray: left_parts_exceptions child")
+        })
+    }
+
+    /// The dictionary that maps the codes in `left_parts` into bit patterns.
+    #[inline]
+    pub fn left_parts_dict(&self) -> &[u16] {
+        &self.metadata().dict[0..self.metadata().dict_len as usize]
+    }
+
+    #[inline]
+    pub(crate) fn right_bit_width(&self) -> u8 {
+        self.metadata().right_bit_width
+    }
+}
+
+impl IntoCanonical for ALPRDArray {
+    fn into_canonical(self) -> VortexResult<Canonical> {
+        let left_parts = self.left_parts().into_canonical()?.into_primitive()?;
+        let right_parts = self.right_parts().into_canonical()?.into_primitive()?;
+
+        // Decode the left_parts using our builtin dictionary.
+        let left_parts_dict = &self.metadata().dict[0..self.metadata().dict_len as usize];
+
+        let exc_pos: Vec<u64>;
+        let exc_u16: PrimitiveArray;
+
+        if let Some(left_parts_exceptions) = self.left_parts_exceptions() {
+            let left_parts_exceptions = SparseArray::try_from(left_parts_exceptions)
+                .vortex_expect("ALPRDArray: exceptions must be SparseArray encoded");
+            exc_pos = left_parts_exceptions
+                .resolved_indices()
+                .into_iter()
+                .map(|v| v as _)
+                .collect();
+            exc_u16 = left_parts_exceptions
+                .values()
+                .into_canonical()?
+                .into_primitive()?;
+        } else {
+            exc_pos = Vec::new();
+            exc_u16 = PrimitiveArray::from(Vec::<u16>::new());
+        }
+
+        let decoded_array = if self.is_f32() {
+            PrimitiveArray::from_vec(
+                alp_rd_decode::<f32>(
+                    left_parts.maybe_null_slice::<u16>(),
+                    left_parts_dict,
+                    self.metadata().right_bit_width,
+                    right_parts.maybe_null_slice::<u32>(),
+                    &exc_pos,
+                    exc_u16.maybe_null_slice::<u16>(),
+                ),
+                self.logical_validity().into_validity(),
+            )
+        } else {
+            PrimitiveArray::from_vec(
+                alp_rd_decode::<f64>(
+                    left_parts.maybe_null_slice::<u16>(),
+                    left_parts_dict,
+                    self.metadata().right_bit_width,
+                    right_parts.maybe_null_slice::<u64>(),
+                    &exc_pos,
+                    exc_u16.maybe_null_slice::<u16>(),
+                ),
+                self.logical_validity().into_validity(),
+            )
+        };
+
+        Ok(Canonical::Primitive(decoded_array))
+    }
+}
+
+impl ArrayValidity for ALPRDArray {
+    fn is_valid(&self, index: usize) -> bool {
+        // Use validity from left_parts
+        self.left_parts().with_dyn(|a| a.is_valid(index))
+    }
+
+    fn logical_validity(&self) -> LogicalValidity {
+        self.left_parts().with_dyn(|a| a.logical_validity())
+    }
+}
+
+impl AcceptArrayVisitor for ALPRDArray {
+    fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
+        visitor.visit_child("left_parts", &self.left_parts())?;
+        visitor.visit_child("right_parts", &self.right_parts())?;
+        if let Some(left_parts_exceptions) = self.left_parts_exceptions() {
+            visitor.visit_child("left_parts_exceptions", &left_parts_exceptions)
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl ArrayStatisticsCompute for ALPRDArray {}
+
+impl ArrayTrait for ALPRDArray {}
+
+#[cfg(test)]
+mod test {
+    use rstest::rstest;
+    use vortex::array::PrimitiveArray;
+    use vortex::{IntoArray, IntoCanonical};
+
+    use crate::{alp_rd, ALPRDFloat};
+
+    #[rstest]
+    #[case(vec![0.1f32.next_up(); 1024], 1.123_848_f32)]
+    #[case(vec![0.1f64.next_up(); 1024], 1.123_848_591_110_992_f64)]
+    fn test_array_encode_with_nulls_and_exceptions<T: ALPRDFloat>(
+        #[case] reals: Vec<T>,
+        #[case] seed: T,
+    ) {
+        assert_eq!(reals.len(), 1024, "test expects 1024-length fixture");
+        // Null out some of the values.
+        let mut reals: Vec<Option<T>> = reals.into_iter().map(Some).collect();
+        reals[1] = None;
+        reals[5] = None;
+        reals[900] = None;
+
+        // Create a new array from this.
+        let real_array = PrimitiveArray::from_nullable_vec(reals.clone());
+
+        // Pick a seed that we know will trigger lots of exceptions.
+        let encoder: alp_rd::RDEncoder = alp_rd::RDEncoder::new(&[seed.powi(-2)]);
+
+        let rd_array = encoder.encode(&real_array);
+
+        let decoded = rd_array
+            .into_array()
+            .into_canonical()
+            .unwrap()
+            .into_primitive()
+            .unwrap();
+
+        let maybe_null_reals: Vec<T> = reals.into_iter().map(|v| v.unwrap_or_default()).collect();
+        assert_eq!(decoded.maybe_null_slice::<T>(), &maybe_null_reals);
+    }
+}
diff --git a/encodings/alp/src/alp_rd/compute/filter.rs b/encodings/alp/src/alp_rd/compute/filter.rs
new file mode 100644
index 0000000000..d35c796867
--- /dev/null
+++ b/encodings/alp/src/alp_rd/compute/filter.rs
@@ -0,0 +1,52 @@
+use vortex::compute::{filter, FilterFn};
+use vortex::{Array, ArrayDType, IntoArray};
+use vortex_error::VortexResult;
+
+use crate::ALPRDArray;
+
+impl FilterFn for ALPRDArray {
+    fn filter(&self, predicate: &Array) -> VortexResult<Array> {
+        let left_parts_exceptions = self
+            .left_parts_exceptions()
+            .map(|array| filter(&array, predicate))
+            .transpose()?;
+
+        Ok(ALPRDArray::try_new(
+            self.dtype().clone(),
+            filter(self.left_parts(), predicate)?,
+            self.left_parts_dict(),
+            filter(self.right_parts(), predicate)?,
+            self.right_bit_width(),
+            left_parts_exceptions,
+        )?
+        .into_array())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use rstest::rstest;
+    use vortex::array::{BoolArray, PrimitiveArray};
+    use vortex::compute::filter;
+    use vortex::IntoArrayVariant;
+
+    use crate::{ALPRDFloat, RDEncoder};
+
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_filter<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = RDEncoder::new(&[a, b]).encode(&array);
+
+        // Make sure that we're testing the exception pathway.
+        assert!(encoded.left_parts_exceptions().is_some());
+
+        // The first two values need no patching
+        let filtered = filter(encoded.as_ref(), BoolArray::from(vec![true, false, true]))
+            .unwrap()
+            .into_primitive()
+            .unwrap();
+        assert_eq!(filtered.maybe_null_slice::<T>(), &[a, outlier]);
+    }
+}
diff --git a/encodings/alp/src/alp_rd/compute/mod.rs b/encodings/alp/src/alp_rd/compute/mod.rs
new file mode 100644
index 0000000000..5420b362fd
--- /dev/null
+++ b/encodings/alp/src/alp_rd/compute/mod.rs
@@ -0,0 +1,27 @@
+use vortex::compute::unary::ScalarAtFn;
+use vortex::compute::{ArrayCompute, FilterFn, SliceFn, TakeFn};
+
+use crate::ALPRDArray;
+
+mod filter;
+mod scalar_at;
+mod slice;
+mod take;
+
+impl ArrayCompute for ALPRDArray {
+    fn filter(&self) -> Option<&dyn FilterFn> {
+        Some(self)
+    }
+
+    fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
+        Some(self)
+    }
+
+    fn slice(&self) -> Option<&dyn SliceFn> {
+        Some(self)
+    }
+
+    fn take(&self) -> Option<&dyn TakeFn> {
+        Some(self)
+    }
+}
diff --git a/encodings/alp/src/alp_rd/compute/scalar_at.rs b/encodings/alp/src/alp_rd/compute/scalar_at.rs
new file mode 100644
index 0000000000..65a4bc9433
--- /dev/null
+++ b/encodings/alp/src/alp_rd/compute/scalar_at.rs
@@ -0,0 +1,68 @@
+use vortex::compute::unary::{scalar_at, ScalarAtFn};
+use vortex_error::{VortexResult, VortexUnwrap};
+use vortex_scalar::Scalar;
+
+use crate::alp_rd::array::ALPRDArray;
+
+impl ScalarAtFn for ALPRDArray {
+    fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
+        // The left value can either be a direct value, or an exception.
+        // The exceptions array represents exception positions with non-null values.
+        let left: u16 = match self.left_parts_exceptions() {
+            Some(exceptions) if exceptions.with_dyn(|a| a.is_valid(index)) => {
+                scalar_at(&exceptions, index)?.try_into()?
+            }
+            _ => {
+                let left_code: u16 = scalar_at(&self.left_parts(), index)?.try_into()?;
+                self.left_parts_dict()[left_code as usize]
+            }
+        };
+
+        // combine left and right values
+        if self.is_f32() {
+            let right: u32 = scalar_at(&self.right_parts(), index)?.try_into()?;
+            let packed = f32::from_bits((left as u32) << self.right_bit_width() | right);
+            Ok(packed.into())
+        } else {
+            let right: u64 = scalar_at(&self.right_parts(), index)?.try_into()?;
+            let packed = f64::from_bits(((left as u64) << self.right_bit_width()) | right);
+            Ok(packed.into())
+        }
+    }
+
+    fn scalar_at_unchecked(&self, index: usize) -> Scalar {
+        self.scalar_at(index).vortex_unwrap()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use rstest::rstest;
+    use vortex::array::PrimitiveArray;
+    use vortex::compute::unary::scalar_at;
+    use vortex_scalar::Scalar;
+
+    use crate::{ALPRDFloat, RDEncoder};
+
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_scalar_at<T: ALPRDFloat + Into<Scalar>>(
+        #[case] a: T,
+        #[case] b: T,
+        #[case] outlier: T,
+    ) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = RDEncoder::new(&[a, b]).encode(&array);
+
+        // Make sure that we're testing the exception pathway.
+        assert!(encoded.left_parts_exceptions().is_some());
+
+        // The first two values need no patching
+        assert_eq!(scalar_at(encoded.as_ref(), 0).unwrap(), a.into());
+        assert_eq!(scalar_at(encoded.as_ref(), 1).unwrap(), b.into());
+
+        // The right value hits the left_part_exceptions
+        assert_eq!(scalar_at(encoded.as_ref(), 2).unwrap(), outlier.into());
+    }
+}
diff --git a/encodings/alp/src/alp_rd/compute/slice.rs b/encodings/alp/src/alp_rd/compute/slice.rs
new file mode 100644
index 0000000000..827e30528e
--- /dev/null
+++ b/encodings/alp/src/alp_rd/compute/slice.rs
@@ -0,0 +1,51 @@
+use vortex::compute::{slice, SliceFn};
+use vortex::{Array, ArrayDType, IntoArray};
+use vortex_error::VortexResult;
+
+use crate::ALPRDArray;
+
+impl SliceFn for ALPRDArray {
+    fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> {
+        let left_parts_exceptions = self
+            .left_parts_exceptions()
+            .map(|array| slice(&array, start, stop))
+            .transpose()?;
+
+        Ok(ALPRDArray::try_new(
+            self.dtype().clone(),
+            slice(self.left_parts(), start, stop)?,
+            self.left_parts_dict(),
+            slice(self.right_parts(), start, stop)?,
+            self.right_bit_width(),
+            left_parts_exceptions,
+        )?
+        .into_array())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use rstest::rstest;
+    use vortex::array::PrimitiveArray;
+    use vortex::compute::slice;
+    use vortex::IntoArrayVariant;
+
+    use crate::{ALPRDFloat, RDEncoder};
+
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_slice<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = RDEncoder::new(&[a, b]).encode(&array);
+
+        assert!(encoded.left_parts_exceptions().is_some());
+
+        let decoded = slice(encoded.as_ref(), 1, 3)
+            .unwrap()
+            .into_primitive()
+            .unwrap();
+
+        assert_eq!(decoded.maybe_null_slice::<T>(), &[b, outlier]);
+    }
+}
diff --git a/encodings/alp/src/alp_rd/compute/take.rs b/encodings/alp/src/alp_rd/compute/take.rs
new file mode 100644
index 0000000000..28ce8a6441
--- /dev/null
+++ b/encodings/alp/src/alp_rd/compute/take.rs
@@ -0,0 +1,51 @@
+use vortex::compute::{take, TakeFn};
+use vortex::{Array, ArrayDType, IntoArray};
+use vortex_error::VortexResult;
+
+use crate::ALPRDArray;
+
+impl TakeFn for ALPRDArray {
+    fn take(&self, indices: &Array) -> VortexResult<Array> {
+        let left_parts_exceptions = self
+            .left_parts_exceptions()
+            .map(|array| take(&array, indices))
+            .transpose()?;
+
+        Ok(ALPRDArray::try_new(
+            self.dtype().clone(),
+            take(self.left_parts(), indices)?,
+            self.left_parts_dict(),
+            take(self.right_parts(), indices)?,
+            self.right_bit_width(),
+            left_parts_exceptions,
+        )?
+        .into_array())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use rstest::rstest;
+    use vortex::array::PrimitiveArray;
+    use vortex::compute::take;
+    use vortex::IntoArrayVariant;
+
+    use crate::{ALPRDFloat, RDEncoder};
+
+    #[rstest]
+    #[case(0.1f32, 0.2f32, 3e25f32)]
+    #[case(0.1f64, 0.2f64, 3e100f64)]
+    fn test_take<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
+        let array = PrimitiveArray::from(vec![a, b, outlier]);
+        let encoded = RDEncoder::new(&[a, b]).encode(&array);
+
+        assert!(encoded.left_parts_exceptions().is_some());
+
+        let taken = take(encoded.as_ref(), PrimitiveArray::from(vec![0, 2]).as_ref())
+            .unwrap()
+            .into_primitive()
+            .unwrap();
+
+        assert_eq!(taken.maybe_null_slice::<T>(), &[a, outlier]);
+    }
+}
diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs
new file mode 100644
index 0000000000..2c22c00929
--- /dev/null
+++ b/encodings/alp/src/alp_rd/mod.rs
@@ -0,0 +1,404 @@
+pub use array::*;
+
+mod array;
+mod compute;
+mod variants;
+
+use std::collections::HashMap;
+use std::ops::{Shl, Shr};
+
+use itertools::Itertools;
+use num_traits::{Float, One, PrimInt};
+use vortex::array::{PrimitiveArray, SparseArray};
+use vortex::{ArrayDType, IntoArray};
+use vortex_dtype::{DType, NativePType};
+use vortex_error::{VortexExpect, VortexUnwrap};
+use vortex_fastlanes::bitpack_encode_unchecked;
+use vortex_scalar::ScalarValue;
+
+use crate::match_each_alp_float_ptype;
+
+macro_rules! bit_width {
+    ($value:expr) => {
+        if $value == 0 {
+            1
+        } else {
+            $value.ilog2().wrapping_add(1) as usize
+        }
+    };
+}
+
+/// Max number of bits to cut from the MSB section of each float.
+const CUT_LIMIT: usize = 16;
+
+const MAX_DICT_SIZE: u8 = 8;
+
+mod private {
+    pub trait Sealed {}
+
+    impl Sealed for f32 {}
+    impl Sealed for f64 {}
+}
+
+/// Main trait for ALP-RD encodable floating point numbers.
+///
+/// Like the paper, we limit this to the IEEE7 754 single-precision (`f32`) and double-precision
+/// (`f64`) floating point types.
+pub trait ALPRDFloat: private::Sealed + Float + Copy + NativePType {
+    /// The unsigned integer type with the same bit-width as the floating-point type.
+    type UINT: NativePType + PrimInt + One + Copy;
+
+    /// Number of bits the value occupies in registers.
+    const BITS: usize = size_of::<Self>() * 8;
+
+    /// Bit-wise transmute from the unsigned integer type to the floating-point type.
+    fn from_bits(bits: Self::UINT) -> Self;
+
+    /// Bit-wise transmute into the unsigned integer type.
+    fn to_bits(value: Self) -> Self::UINT;
+
+    /// Truncating conversion from the unsigned integer type to `u16`.
+    fn to_u16(bits: Self::UINT) -> u16;
+
+    /// Type-widening conversion from `u16` to the unsigned integer type.
+    fn from_u16(value: u16) -> Self::UINT;
+}
+
+impl ALPRDFloat for f64 {
+    type UINT = u64;
+
+    fn from_bits(bits: Self::UINT) -> Self {
+        f64::from_bits(bits)
+    }
+
+    fn to_bits(value: Self) -> Self::UINT {
+        value.to_bits()
+    }
+
+    fn to_u16(bits: Self::UINT) -> u16 {
+        bits as u16
+    }
+
+    fn from_u16(value: u16) -> Self::UINT {
+        value as u64
+    }
+}
+
+impl ALPRDFloat for f32 {
+    type UINT = u32;
+
+    fn from_bits(bits: Self::UINT) -> Self {
+        f32::from_bits(bits)
+    }
+
+    fn to_bits(value: Self) -> Self::UINT {
+        value.to_bits()
+    }
+
+    fn to_u16(bits: Self::UINT) -> u16 {
+        bits as u16
+    }
+
+    fn from_u16(value: u16) -> Self::UINT {
+        value as u32
+    }
+}
+
+/// Encoder for ALP-RD ("real doubles") values.
+///
+/// The encoder calculates its parameters from a single sample of floating-point values,
+/// and then can be applied to many vectors.
+///
+/// ALP-RD uses the algorithm outlined in Section 3.4 of the paper. The crux of it is that the front
+/// (most significant) bits of many double vectors tend to be  the same, i.e. most doubles in a
+/// vector often use the same exponent and front bits. Compression proceeds by finding the best
+/// prefix of up to 16 bits that can be collapsed into a dictionary of
+/// up to 8 elements. Each double can then be broken into the front/left `L` bits, which neatly
+/// bit-packs down to 1-3 bits per element (depending on the actual dictionary size).
+/// The remaining `R` bits naturally bit-pack.
+///
+/// In the ideal case, this scheme allows us to store a sequence of doubles in 49 bits-per-value.
+///
+/// Our implementation draws on the MIT-licensed [C++ implementation] provided by the original authors.
+///
+/// [C++ implementation]: https://github.com/cwida/ALP/blob/main/include/alp/rd.hpp
+pub struct RDEncoder {
+    right_bit_width: u8,
+    codes: Vec<u16>,
+}
+
+impl RDEncoder {
+    /// Build a new encoder from a sample of doubles.
+    pub fn new<T>(sample: &[T]) -> Self
+    where
+        T: ALPRDFloat + NativePType,
+        T::UINT: NativePType,
+    {
+        let dictionary = find_best_dictionary::<T>(sample);
+
+        let mut codes = vec![0; dictionary.dictionary.len()];
+        dictionary.dictionary.into_iter().for_each(|(bits, code)| {
+            // write the reverse mapping into the codes vector.
+            codes[code as usize] = bits
+        });
+
+        Self {
+            right_bit_width: dictionary.right_bit_width,
+            codes,
+        }
+    }
+
+    /// Encode a set of floating point values with ALP-RD.
+    ///
+    /// Each value will be split into a left and right component, which are compressed individually.
+    pub fn encode(&self, array: &PrimitiveArray) -> ALPRDArray {
+        match_each_alp_float_ptype!(array.ptype(), |$P| {
+            self.encode_generic::<$P>(array)
+        })
+    }
+
+    fn encode_generic<T>(&self, array: &PrimitiveArray) -> ALPRDArray
+    where
+        T: ALPRDFloat + NativePType,
+        T::UINT: NativePType,
+    {
+        assert!(
+            !self.codes.is_empty(),
+            "codes lookup table must be populated before RD encoding"
+        );
+
+        let doubles = array.maybe_null_slice::<T>();
+
+        let mut left_parts: Vec<u16> = Vec::with_capacity(doubles.len());
+        let mut right_parts: Vec<T::UINT> = Vec::with_capacity(doubles.len());
+        let mut exceptions_pos: Vec<u64> = Vec::with_capacity(doubles.len() / 4);
+        let mut exceptions: Vec<u16> = Vec::with_capacity(doubles.len() / 4);
+
+        // mask for right-parts
+        let right_mask = T::UINT::one().shl(self.right_bit_width as _) - T::UINT::one();
+        let max_code = self.codes.len() - 1;
+        let left_bit_width = bit_width!(max_code);
+
+        for v in doubles.iter().copied() {
+            right_parts.push(T::to_bits(v) & right_mask);
+            left_parts.push(<T as ALPRDFloat>::to_u16(
+                T::to_bits(v).shr(self.right_bit_width as _),
+            ));
+        }
+
+        // dict-encode the left-parts, keeping track of exceptions
+        for (idx, left) in left_parts.iter_mut().enumerate() {
+            // TODO: revisit if we need to change the branch order for perf.
+            if let Some(code) = self.codes.iter().position(|v| *v == *left) {
+                *left = code as u16;
+            } else {
+                exceptions.push(*left);
+                exceptions_pos.push(idx as _);
+
+                *left = 0u16;
+            }
+        }
+
+        // Bit-pack down the encoded left-parts array that have been dictionary encoded.
+        let primitive_left = PrimitiveArray::from_vec(left_parts, array.validity());
+        // SAFETY: by construction, all values in left_parts can be packed to left_bit_width.
+        let packed_left = unsafe {
+            bitpack_encode_unchecked(primitive_left, left_bit_width as _)
+                .vortex_unwrap()
+                .into_array()
+        };
+
+        let primitive_right = PrimitiveArray::from_vec(right_parts, array.validity());
+        // SAFETY: by construction, all values in right_parts are right_bit_width + leading zeros.
+        let packed_right = unsafe {
+            bitpack_encode_unchecked(primitive_right, self.right_bit_width as _)
+                .vortex_unwrap()
+                .into_array()
+        };
+
+        // Bit-pack the dict-encoded left-parts
+        // Bit-pack the right-parts
+        // SparseArray for exceptions.
+        let exceptions = (!exceptions_pos.is_empty()).then(|| {
+            let max_exc_pos = exceptions_pos.last().copied().unwrap_or_default();
+            let bw = bit_width!(max_exc_pos);
+
+            let exc_pos_array = PrimitiveArray::from(exceptions_pos);
+            // SAFETY: We calculate bw such that it is wide enough to hold the largest position index.
+            let packed_pos = unsafe {
+                bitpack_encode_unchecked(exc_pos_array, bw)
+                    .vortex_unwrap()
+                    .into_array()
+            };
+
+            let exc_array =
+                PrimitiveArray::from_nullable_vec(exceptions.into_iter().map(Some).collect())
+                    .into_array();
+            SparseArray::try_new(packed_pos, exc_array, doubles.len(), ScalarValue::Null)
+                .vortex_expect("ALP-RD: construction of exceptions SparseArray")
+                .into_array()
+        });
+
+        ALPRDArray::try_new(
+            DType::Primitive(T::PTYPE, packed_left.dtype().nullability()),
+            packed_left,
+            &self.codes,
+            packed_right,
+            self.right_bit_width,
+            exceptions,
+        )
+        .vortex_expect("ALPRDArray construction in encode")
+    }
+}
+
+/// Decode a vector of ALP-RD encoded values back into their original floating point format.
+///
+/// # Panics
+///
+/// The function panics if the provided `left_parts` and `right_parts` differ in length.
+///
+/// The function panics if the provided `exc_pos` and `exceptions` differ in length.
+pub fn alp_rd_decode<T: ALPRDFloat>(
+    left_parts: &[u16],
+    left_parts_dict: &[u16],
+    right_bit_width: u8,
+    right_parts: &[T::UINT],
+    exc_pos: &[u64],
+    exceptions: &[u16],
+) -> Vec<T> {
+    assert_eq!(
+        left_parts.len(),
+        right_parts.len(),
+        "alp_rd_decode: left_parts.len != right_parts.len"
+    );
+
+    assert_eq!(
+        exc_pos.len(),
+        exceptions.len(),
+        "alp_rd_decode: exc_pos.len != exceptions.len"
+    );
+
+    let mut dict = Vec::with_capacity(left_parts_dict.len());
+    dict.extend_from_slice(left_parts_dict);
+
+    let mut left_parts_decoded: Vec<T::UINT> = Vec::with_capacity(left_parts.len());
+
+    // Decode with bit-packing and dict unpacking.
+    for code in left_parts {
+        left_parts_decoded.push(<T as ALPRDFloat>::from_u16(dict[*code as usize]));
+    }
+
+    // Apply the exception patches to left_parts
+    for (pos, val) in exc_pos.iter().zip(exceptions.iter()) {
+        left_parts_decoded[*pos as usize] = <T as ALPRDFloat>::from_u16(*val);
+    }
+
+    // recombine the left-and-right parts, adjusting by the right_bit_width.
+    left_parts_decoded
+        .into_iter()
+        .zip(right_parts.iter().copied())
+        .map(|(left, right)| T::from_bits((left << (right_bit_width as usize)) | right))
+        .collect()
+}
+
+/// Find the best "cut point" for a set of floating point values such that we can
+/// cast them all to the relevant value instead.
+fn find_best_dictionary<T: ALPRDFloat>(samples: &[T]) -> ALPRDDictionary {
+    let mut best_est_size = f64::MAX;
+    let mut best_dict = ALPRDDictionary::default();
+
+    for p in 1..=16 {
+        let candidate_right_bw = (T::BITS - p) as u8;
+        let (dictionary, exception_count) =
+            build_left_parts_dictionary::<T>(samples, candidate_right_bw, MAX_DICT_SIZE);
+        let estimated_size = estimate_compression_size(
+            dictionary.right_bit_width,
+            dictionary.left_bit_width,
+            exception_count,
+            samples.len(),
+        );
+        if estimated_size < best_est_size {
+            best_est_size = estimated_size;
+            best_dict = dictionary;
+        }
+    }
+
+    best_dict
+}
+
+/// Build dictionary of the leftmost bits.
+fn build_left_parts_dictionary<T: ALPRDFloat>(
+    samples: &[T],
+    right_bw: u8,
+    max_dict_size: u8,
+) -> (ALPRDDictionary, usize) {
+    assert!(
+        right_bw >= (T::BITS - CUT_LIMIT) as _,
+        "left-parts must be <= 16 bits"
+    );
+
+    // Count the number of occurrences of each left bit pattern
+    let counts = samples
+        .iter()
+        .copied()
+        .map(|v| <T as ALPRDFloat>::to_u16(T::to_bits(v).shr(right_bw as _)))
+        .counts();
+
+    // Sorted counts: sort by negative count so that heavy hitters sort first.
+    let mut sorted_bit_counts: Vec<(u16, usize)> = counts.into_iter().collect_vec();
+    sorted_bit_counts.sort_by_key(|(_, count)| count.wrapping_neg());
+
+    // Assign the most-frequently occurring left-bits as dictionary codes, up to `dict_size`...
+    let mut dictionary = HashMap::with_capacity(max_dict_size as _);
+    let mut code = 0u16;
+    while code < (max_dict_size as _) && (code as usize) < sorted_bit_counts.len() {
+        let (bits, _) = sorted_bit_counts[code as usize];
+        dictionary.insert(bits, code);
+        code += 1;
+    }
+
+    // ...and the rest are exceptions.
+    let exception_count: usize = sorted_bit_counts
+        .iter()
+        .skip(code as _)
+        .map(|(_, count)| *count)
+        .sum();
+
+    // Left bit-width is determined based on the actual dictionary size.
+    let max_code = dictionary.len() - 1;
+    let left_bw = bit_width!(max_code) as u8;
+
+    (
+        ALPRDDictionary {
+            dictionary,
+            right_bit_width: right_bw,
+            left_bit_width: left_bw,
+        },
+        exception_count,
+    )
+}
+
+/// Estimate the bits-per-value when using these compression settings.
+fn estimate_compression_size(
+    right_bw: u8,
+    left_bw: u8,
+    exception_count: usize,
+    sample_n: usize,
+) -> f64 {
+    const EXC_POSITION_SIZE: usize = 16; // two bytes for exception position.
+    const EXC_SIZE: usize = 16; // two bytes for each exception (up to 16 front bits).
+
+    let exceptions_size = exception_count * (EXC_POSITION_SIZE + EXC_SIZE);
+    (right_bw as f64) + (left_bw as f64) + ((exceptions_size as f64) / (sample_n as f64))
+}
+
+/// The ALP-RD dictionary, encoding the "left parts" and their dictionary encoding.
+#[derive(Debug, Default)]
+struct ALPRDDictionary {
+    /// Items in the dictionary are bit patterns, along with their 16-bit encoding.
+    dictionary: HashMap<u16, u16>,
+    /// The (compressed) left bit width. This is after bit-packing the dictionary codes.
+    left_bit_width: u8,
+    /// The right bit width. This is the bit-packed width of each of the "real double" values.
+    right_bit_width: u8,
+}
diff --git a/encodings/alp/src/alp_rd/variants.rs b/encodings/alp/src/alp_rd/variants.rs
new file mode 100644
index 0000000000..127eb8ded4
--- /dev/null
+++ b/encodings/alp/src/alp_rd/variants.rs
@@ -0,0 +1,15 @@
+use vortex::variants::{ArrayVariants, PrimitiveArrayTrait};
+
+use crate::ALPRDArray;
+
+impl ArrayVariants for ALPRDArray {
+    fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> {
+        Some(self)
+    }
+
+    fn as_primitive_array_unchecked(&self) -> &dyn PrimitiveArrayTrait {
+        self
+    }
+}
+
+impl PrimitiveArrayTrait for ALPRDArray {}
diff --git a/encodings/alp/src/lib.rs b/encodings/alp/src/lib.rs
index fde87a65f2..d47da5bdea 100644
--- a/encodings/alp/src/lib.rs
+++ b/encodings/alp/src/lib.rs
@@ -1,8 +1,22 @@
+#![feature(float_next_up_down)]
+
+//! This crate contains an implementation of the floating point compression algorithm from the
+//! paper ["ALP: Adaptive Lossless floating-Point Compression"][paper] by Afroozeh et al.
+//!
+//! The compressor has two variants, classic ALP which is well-suited for data that does not use
+//! the full precision, and "real doubles", values that do.
+//!
+//! Classic ALP will return small integers, and it is meant to be cascaded with other integer
+//! compression techniques such as bit-packing and frame-of-reference encoding. Combined this allows
+//! for significant compression on the order of what you can get for integer values.
+//!
+//! ALP-RD is generally terminal, and in the ideal case it can represent an f64 is just 49 bits,
+//! though generally it is closer to 54 bits per value or ~12.5% compression.
+//!
+//! [paper]: https://ir.cwi.nl/pub/33334/33334.pdf
+
 pub use alp::*;
-pub use array::*;
-pub use compress::*;
+pub use alp_rd::*;
 
 mod alp;
-mod array;
-mod compress;
-mod compute;
+mod alp_rd;
diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs
index 3d10335bce..783fe52b71 100644
--- a/encodings/fastlanes/src/bitpacking/compress.rs
+++ b/encodings/fastlanes/src/bitpacking/compress.rs
@@ -40,6 +40,30 @@ pub fn bitpack_encode(array: PrimitiveArray, bit_width: usize) -> VortexResult<B
     )
 }
 
+/// Bitpack an array into the specified bit-width without checking statistics.
+///
+/// # Safety
+///
+/// It is the caller's responsibility to ensure that all values in the array can lossless pack
+/// into the specified bit-width.
+///
+/// Failure to do so will result in data loss.
+pub unsafe fn bitpack_encode_unchecked(
+    array: PrimitiveArray,
+    bit_width: usize,
+) -> VortexResult<BitPackedArray> {
+    let packed = bitpack(&array, bit_width)?;
+
+    BitPackedArray::try_new(
+        packed,
+        array.ptype(),
+        array.validity(),
+        None,
+        bit_width,
+        array.len(),
+    )
+}
+
 /// Bitpack a [PrimitiveArray] to the given width.
 ///
 /// On success, returns a [Buffer] containing the packed data.
diff --git a/vortex-array/src/encoding.rs b/vortex-array/src/encoding.rs
index 77137b4da2..a0991d085f 100644
--- a/vortex-array/src/encoding.rs
+++ b/vortex-array/src/encoding.rs
@@ -138,6 +138,7 @@ pub mod ids {
     pub const RUN_END: u16 = 27;
     pub const RUN_END_BOOL: u16 = 28;
     pub const ZIGZAG: u16 = 29;
+    pub const ALP_RD: u16 = 30;
 }
 
 #[cfg(test)]
diff --git a/vortex-sampling-compressor/src/compressors/alp_rd.rs b/vortex-sampling-compressor/src/compressors/alp_rd.rs
new file mode 100644
index 0000000000..e877d4067a
--- /dev/null
+++ b/vortex-sampling-compressor/src/compressors/alp_rd.rs
@@ -0,0 +1,78 @@
+use std::any::Any;
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use vortex::array::PrimitiveArray;
+use vortex::encoding::EncodingRef;
+use vortex::{Array, ArrayDef, IntoArray, IntoArrayVariant};
+use vortex_alp::{match_each_alp_float_ptype, ALPRDEncoding, RDEncoder as ALPRDEncoder, ALPRD};
+use vortex_dtype::PType;
+use vortex_error::{vortex_bail, VortexResult};
+use vortex_fastlanes::BitPackedEncoding;
+
+use crate::compressors::{CompressedArray, CompressionTree, EncoderMetadata, EncodingCompressor};
+use crate::SamplingCompressor;
+
+#[derive(Debug)]
+pub struct ALPRDCompressor;
+
+impl EncoderMetadata for ALPRDEncoder {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl EncodingCompressor for ALPRDCompressor {
+    fn id(&self) -> &str {
+        ALPRD::ID.as_ref()
+    }
+
+    fn can_compress(&self, array: &Array) -> Option<&dyn EncodingCompressor> {
+        // Only support primitive arrays
+        let parray = PrimitiveArray::try_from(array).ok()?;
+
+        // Only supports f32 and f64
+        if !matches!(parray.ptype(), PType::F32 | PType::F64) {
+            return None;
+        }
+
+        Some(self)
+    }
+
+    fn compress<'a>(
+        &'a self,
+        array: &Array,
+        like: Option<CompressionTree<'a>>,
+        _ctx: SamplingCompressor<'a>,
+    ) -> VortexResult<CompressedArray<'a>> {
+        let primitive = array.clone().into_primitive()?;
+
+        // Train a new compressor or reuse an existing compressor.
+        let encoder = like
+            .clone()
+            .and_then(|mut tree| tree.metadata())
+            .map(VortexResult::Ok)
+            .unwrap_or_else(|| Ok(Arc::new(alp_rd_new_encoder(&primitive))))?;
+
+        let Some(alp_rd_encoder) = encoder.as_any().downcast_ref::<ALPRDEncoder>() else {
+            vortex_bail!("Could not downcast metadata as ALPRDEncoder");
+        };
+
+        let encoded = alp_rd_encoder.encode(&primitive).into_array();
+        Ok(CompressedArray::new(
+            encoded,
+            Some(CompressionTree::new_with_metadata(self, vec![], encoder)),
+        ))
+    }
+
+    fn used_encodings(&self) -> HashSet<EncodingRef> {
+        HashSet::from([&ALPRDEncoding as EncodingRef, &BitPackedEncoding])
+    }
+}
+
+/// Create a new `ALPRDEncoder` from the given array of samples.
+fn alp_rd_new_encoder(array: &PrimitiveArray) -> ALPRDEncoder {
+    match_each_alp_float_ptype!(array.ptype(), |$P| {
+        ALPRDEncoder::new(array.maybe_null_slice::<$P>())
+    })
+}
diff --git a/vortex-sampling-compressor/src/compressors/mod.rs b/vortex-sampling-compressor/src/compressors/mod.rs
index f8cac1063a..05d6288721 100644
--- a/vortex-sampling-compressor/src/compressors/mod.rs
+++ b/vortex-sampling-compressor/src/compressors/mod.rs
@@ -11,6 +11,7 @@ use vortex_error::VortexResult;
 use crate::SamplingCompressor;
 
 pub mod alp;
+pub mod alp_rd;
 pub mod bitpacked;
 pub mod constant;
 pub mod date_time_parts;
diff --git a/vortex-sampling-compressor/src/lib.rs b/vortex-sampling-compressor/src/lib.rs
index f6a7790eb7..bef4e74849 100644
--- a/vortex-sampling-compressor/src/lib.rs
+++ b/vortex-sampling-compressor/src/lib.rs
@@ -16,6 +16,7 @@ use vortex::{Array, ArrayDType, ArrayDef, IntoArray, IntoCanonical};
 use vortex_error::VortexResult;
 
 use crate::compressors::alp::ALPCompressor;
+use crate::compressors::alp_rd::ALPRDCompressor;
 use crate::compressors::bitpacked::BitPackedCompressor;
 use crate::compressors::constant::ConstantCompressor;
 use crate::compressors::date_time_parts::DateTimePartsCompressor;
@@ -35,8 +36,9 @@ pub mod compressors;
 mod sampling;
 
 lazy_static! {
-    pub static ref ALL_COMPRESSORS: [CompressorRef<'static>; 11] = [
+    pub static ref ALL_COMPRESSORS: [CompressorRef<'static>; 12] = [
         &ALPCompressor as CompressorRef,
+        &ALPRDCompressor,
         &BitPackedCompressor,
         &DateTimePartsCompressor,
         &DEFAULT_RUN_END_COMPRESSOR,
diff --git a/vortex-sampling-compressor/tests/smoketest.rs b/vortex-sampling-compressor/tests/smoketest.rs
index 6477810446..f3c112148b 100644
--- a/vortex-sampling-compressor/tests/smoketest.rs
+++ b/vortex-sampling-compressor/tests/smoketest.rs
@@ -29,6 +29,7 @@ mod tests {
     use vortex_datetime_parts::DateTimeParts;
     use vortex_dict::Dict;
     use vortex_fastlanes::FoR;
+    use vortex_sampling_compressor::compressors::alp_rd::ALPRDCompressor;
     use vortex_sampling_compressor::compressors::fsst::FSSTCompressor;
 
     use super::*;
@@ -39,6 +40,7 @@ mod tests {
         let compressor = SamplingCompressor::new_with_options(
             HashSet::from([
                 &ALPCompressor as CompressorRef,
+                &ALPRDCompressor as CompressorRef,
                 &BitPackedCompressor,
                 // TODO(robert): Implement minimal compute for DeltaArrays - scalar_at and slice
                 // &DeltaCompressor,