From b50d382a57812aa152ee5c87a2007521eabf10ed Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 30 Sep 2024 12:21:56 -0400 Subject: [PATCH] for compress the offsets before delta --- vortex-sampling-compressor/src/compressors/fsst.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vortex-sampling-compressor/src/compressors/fsst.rs b/vortex-sampling-compressor/src/compressors/fsst.rs index c72fc0a086..844ee828e0 100644 --- a/vortex-sampling-compressor/src/compressors/fsst.rs +++ b/vortex-sampling-compressor/src/compressors/fsst.rs @@ -12,6 +12,7 @@ use vortex_error::{vortex_bail, VortexResult}; use vortex_fsst::{fsst_compress, fsst_train_compressor, FSSTArray, FSSTEncoding, FSST}; use super::delta::DeltaCompressor; +use super::r#for::FoRCompressor; use super::{CompressedArray, CompressionTree, EncoderMetadata, EncodingCompressor}; use crate::SamplingCompressor; @@ -96,15 +97,21 @@ impl EncodingCompressor for FSSTCompressor { let codes_varbin = VarBinArray::try_from(fsst_array.codes())?; let codes_varbin_dtype = codes_varbin.dtype().clone(); - let codes_offsets_delta = DeltaCompressor.compress( + let codes_offsets_for = FoRCompressor.compress( &codes_varbin.offsets(), like.as_ref().and_then(|l| l.child(1).cloned()), - ctx.auxiliary("offsets"), + ctx.auxiliary("offsets_for"), + )?; + + let codes_offsets_delta = DeltaCompressor.compress( + &codes_offsets_for.array, + like.as_ref().and_then(|l| l.child(2).cloned()), + ctx.auxiliary("offsets_for_delta"), )?; let codes_offsets_compressed = codes_compressor.auxiliary("delta_offsets").compress( &codes_offsets_delta.array, - like.as_ref().and_then(|l| l.child(2)), + like.as_ref().and_then(|l| l.child(3)), )?; let codes = VarBinArray::try_new( @@ -128,6 +135,7 @@ impl EncodingCompressor for FSSTCompressor { self, vec![ uncompressed_lengths.path, + codes_offsets_for.path, codes_offsets_delta.path, codes_offsets_compressed.path, ],