From 95255a0d37b48b7eb1e1b429cb0df02e7f115fd2 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Wed, 4 Sep 2024 17:08:06 -0400 Subject: [PATCH] comments --- .../fastlanes/src/bitpacking/compute/search_sorted.rs | 4 +++- encodings/fastlanes/src/bitpacking/mod.rs | 4 ++-- encodings/fastlanes/src/for/compress.rs | 5 ++--- encodings/fastlanes/src/for/compute.rs | 10 ++++------ .../src/array/primitive/compute/search_sorted.rs | 10 +++++----- vortex-array/src/compute/search_sorted.rs | 3 +++ 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index b8eb131947..9a33fe158d 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -33,7 +33,9 @@ where { let unwrapped_value: T = value.cast(array.dtype())?.try_into()?; if let Some(patches_array) = array.patches() { - if unwrapped_value.as_() >= array.max_packed_value() { + // If patches exist they must be the last elements in the array, if the value we're looking for is greater than + // max packed value just search the patches + if unwrapped_value.as_() > array.max_packed_value() { search_sorted(&patches_array, value.clone(), side) } else { Ok(SearchSorted::search_sorted( diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index faddbb314f..1b94e49fd2 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -1,5 +1,5 @@ -use ::serde::{Deserialize, Serialize}; pub use compress::*; +use ::serde::{Deserialize, Serialize}; use vortex::array::{Primitive, PrimitiveArray}; use vortex::stats::{ArrayStatisticsCompute, StatsSet}; use vortex::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; @@ -164,7 +164,7 @@ impl BitPackedArray { #[inline] pub fn max_packed_value(&self) -> usize { - 1 << self.bit_width() + 1 << self.bit_width() - 1 } } diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index cdb59b5ff9..95698b4d60 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -66,12 +66,11 @@ fn compress_primitive( ) -> PrimitiveArray { assert!(shift < T::PTYPE.bit_width() as u8); let values = if shift > 0 { - let shifted_min = min >> shift as usize; parray .maybe_null_slice::() .iter() - .map(|&v| v >> shift as usize) - .map(|v| v.wrapping_sub(&shifted_min)) + .map(|&v| v.wrapping_sub(&min)) + .map(|v| v >> shift as usize) .collect_vec() } else { parray diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 570b1e53b2..ebb3882bc9 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -97,11 +97,9 @@ where + Into, { let min: T = array.reference().try_into()?; - let shifted_min = min >> array.shift(); - let unwrapped_value: T = value.cast(array.dtype())?.as_ref().try_into()?; - let shifted_value: T = unwrapped_value >> array.shift(); + let primitive_value: T = value.cast(array.dtype())?.as_ref().try_into()?; // Make sure that smaller values are still smaller and not larger than (which they would be after wrapping_sub) - if shifted_value < shifted_min { + if primitive_value < min { return Ok(SearchResult::NotFound(0)); } @@ -110,9 +108,9 @@ where // // For values that are lossy compressed we know they wouldn't be found in the array // in order to find index they would be inserted at we search for next value in the compressed space - let mut translated_value = shifted_value.wrapping_sub(&shifted_min); + let mut translated_value = primitive_value.wrapping_sub(&min) >> array.shift(); let mut lossy_compressed = false; - if (translated_value << array.shift()).wrapping_add(&min) != unwrapped_value { + if (translated_value << array.shift()).wrapping_add(&min) != primitive_value { translated_value += T::from(1).unwrap(); lossy_compressed = true; } diff --git a/vortex-array/src/array/primitive/compute/search_sorted.rs b/vortex-array/src/array/primitive/compute/search_sorted.rs index eccf0b09fe..f63592427f 100644 --- a/vortex-array/src/array/primitive/compute/search_sorted.rs +++ b/vortex-array/src/array/primitive/compute/search_sorted.rs @@ -20,19 +20,19 @@ impl SearchSortedFn for PrimitiveArray { Validity::AllInvalid => Ok(SearchResult::NotFound(0)), Validity::Array(_) => { let pvalue: $T = value.try_into()?; - Ok(NullableSearchSorted::new(self).search_sorted(&pvalue, side)) + Ok(SearchSortedNullsLast::new(self).search_sorted(&pvalue, side)) } } }) } } -struct NullableSearchSorted<'a, T> { +struct SearchSortedNullsLast<'a, T> { values: &'a [T], validity: Validity, } -impl<'a, T: NativePType> NullableSearchSorted<'a, T> { +impl<'a, T: NativePType> SearchSortedNullsLast<'a, T> { pub fn new(array: &'a PrimitiveArray) -> Self { Self { values: array.maybe_null_slice(), @@ -41,7 +41,7 @@ impl<'a, T: NativePType> NullableSearchSorted<'a, T> { } } -impl<'a, T: NativePType> IndexOrd for NullableSearchSorted<'a, T> { +impl<'a, T: NativePType> IndexOrd for SearchSortedNullsLast<'a, T> { fn index_cmp(&self, idx: usize, elem: &T) -> Option { if self.validity.is_null(idx) { return Some(Greater); @@ -51,7 +51,7 @@ impl<'a, T: NativePType> IndexOrd for NullableSearchSorted<'a, T> { } } -impl<'a, T> Len for NullableSearchSorted<'a, T> { +impl<'a, T> Len for SearchSortedNullsLast<'a, T> { fn len(&self) -> usize { self.values.len() } diff --git a/vortex-array/src/compute/search_sorted.rs b/vortex-array/src/compute/search_sorted.rs index a8b567e6a1..5e33eda478 100644 --- a/vortex-array/src/compute/search_sorted.rs +++ b/vortex-array/src/compute/search_sorted.rs @@ -45,6 +45,9 @@ impl SearchResult { } } +/// Searches for value assuming the array is sorted. +/// +/// For nullable arrays we assume that the nulls are sorted last, i.e. they're the greatest value pub trait SearchSortedFn { fn search_sorted(&self, value: &Scalar, side: SearchSortedSide) -> VortexResult; }