diff --git a/bench-vortex/src/bin/notimplemented.rs b/bench-vortex/src/bin/notimplemented.rs index 88e253b155..96062ed275 100644 --- a/bench-vortex/src/bin/notimplemented.rs +++ b/bench-vortex/src/bin/notimplemented.rs @@ -122,7 +122,6 @@ fn enc_impls() -> Vec { RunEndArray::try_new( PrimitiveArray::from(vec![5u32, 8]).into_array(), PrimitiveArray::from(vec![0, 1]).into_array(), - Validity::NonNullable, ) .unwrap() .into_array(), diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index f270245352..008754088e 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -7,9 +7,7 @@ use vortex_array::compute::{ }; use vortex_array::encoding::ids; use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; -use vortex_array::validity::{ - ArrayValidity, LogicalValidity, Validity, ValidityMetadata, ValidityVTable, -}; +use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; use vortex_array::variants::{BoolArrayTrait, PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ @@ -26,7 +24,6 @@ impl_encoding!("vortex.runend", ids::RUN_END, RunEnd); #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RunEndMetadata { - validity: ValidityMetadata, ends_ptype: PType, num_runs: usize, offset: usize, @@ -39,19 +36,18 @@ impl Display for RunEndMetadata { } impl RunEndArray { - pub fn try_new(ends: ArrayData, values: ArrayData, validity: Validity) -> VortexResult { + pub fn try_new(ends: ArrayData, values: ArrayData) -> VortexResult { let length = if ends.is_empty() { 0 } else { scalar_at(&ends, ends.len() - 1)?.as_ref().try_into()? }; - Self::with_offset_and_length(ends, values, validity, 0, length) + Self::with_offset_and_length(ends, values, 0, length) } pub(crate) fn with_offset_and_length( ends: ArrayData, values: ArrayData, - validity: Validity, offset: usize, length: usize, ) -> VortexResult { @@ -62,14 +58,6 @@ impl RunEndArray { ); } - if values.dtype().nullability() != validity.nullability() { - vortex_bail!( - "invalid validity {:?} for dtype {}", - validity, - values.dtype() - ); - } - if offset != 0 { let first_run_end: usize = scalar_at(&ends, 0)?.as_ref().try_into()?; if first_run_end <= offset { @@ -86,33 +74,18 @@ impl RunEndArray { let dtype = values.dtype().clone(); let metadata = RunEndMetadata { - validity: validity.to_metadata(length)?, ends_ptype: PType::try_from(ends.dtype())?, num_runs: ends.len(), offset, }; - let stats = if matches!(validity, Validity::AllValid | Validity::NonNullable) { - let ends_len = ends.len(); - let is_constant = ends_len <= 1; - StatsSet::from_iter([ - (Stat::IsConstant, is_constant.into()), - (Stat::RunCount, (ends_len as u64).into()), - ]) - } else if matches!(validity, Validity::AllInvalid) { - StatsSet::nulls(length, &dtype) - } else { - StatsSet::default() - }; - - let mut children = Vec::with_capacity(3); - children.push(ends); - children.push(values); - if let Some(a) = validity.into_array() { - children.push(a) - } - - Self::try_from_parts(dtype, length, metadata, children.into(), stats) + Self::try_from_parts( + dtype, + length, + metadata, + vec![ends, values].into(), + StatsSet::default(), + ) } /// Convert the given logical index to an index into the `values` array @@ -137,21 +110,13 @@ impl RunEndArray { /// Run the array through run-end encoding. pub fn encode(array: ArrayData) -> VortexResult { if let Ok(parray) = PrimitiveArray::try_from(array) { - let (ends, values) = runend_encode(&parray); - Self::try_new(ends.into_array(), values.into_array(), parray.validity()) + let (ends, values) = runend_encode(&parray)?; + Self::try_new(ends.into_array(), values) } else { vortex_bail!("REE can only encode primitive arrays") } } - pub fn validity(&self) -> Validity { - self.metadata().validity.to_validity(|| { - self.as_ref() - .child(2, &Validity::DTYPE, self.len()) - .vortex_expect("RunEndArray: validity child") - }) - } - /// The offset that the `ends` is relative to. /// /// This is generally zero for a "new" array, and non-zero after a slicing operation. @@ -208,11 +173,27 @@ impl BoolArrayTrait for RunEndArray {} impl ValidityVTable for RunEndEncoding { fn is_valid(&self, array: &RunEndArray, index: usize) -> bool { - array.validity().is_valid(index) + let physical_idx = array + .find_physical_index(index) + .vortex_expect("Invalid index"); + array.values().is_valid(physical_idx) } fn logical_validity(&self, array: &RunEndArray) -> LogicalValidity { - array.validity().to_logical(array.len()) + match array.values().logical_validity() { + LogicalValidity::AllValid(_) => LogicalValidity::AllValid(array.len()), + LogicalValidity::AllInvalid(_) => LogicalValidity::AllInvalid(array.len()), + LogicalValidity::Array(validity) => LogicalValidity::Array( + RunEndArray::with_offset_and_length( + array.ends(), + validity, + array.offset(), + array.len(), + ) + .vortex_expect("invalid array") + .into_array(), + ), + } } } @@ -222,12 +203,11 @@ impl IntoCanonical for RunEndArray { match self.dtype() { DType::Bool(_) => { let bools = self.values().into_bool()?; - runend_decode_bools(pends, bools, self.validity(), self.offset(), self.len()) - .map(Canonical::Bool) + runend_decode_bools(pends, bools, self.offset(), self.len()).map(Canonical::Bool) } DType::Primitive(..) => { let pvalues = self.values().into_primitive()?; - runend_decode_primitive(pends, pvalues, self.validity(), self.offset(), self.len()) + runend_decode_primitive(pends, pvalues, self.offset(), self.len()) .map(Canonical::Primitive) } _ => vortex_bail!("Only Primitive and Bool values are supported"), @@ -238,8 +218,7 @@ impl IntoCanonical for RunEndArray { impl VisitorVTable for RunEndEncoding { fn accept(&self, array: &RunEndArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { visitor.visit_child("ends", &array.ends())?; - visitor.visit_child("values", &array.values())?; - visitor.visit_validity(&array.validity()) + visitor.visit_child("values", &array.values()) } } @@ -247,7 +226,6 @@ impl StatisticsVTable for RunEndEncoding { fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult { let maybe_stat = match stat { Stat::Min | Stat::Max => array.values().statistics().compute(stat), - Stat::NullCount => Some(Scalar::from(array.validity().null_count(array.len())?)), Stat::IsSorted => Some(Scalar::from( array .values() @@ -270,7 +248,6 @@ impl StatisticsVTable for RunEndEncoding { #[cfg(test)] mod tests { use vortex_array::compute::scalar_at; - use vortex_array::validity::Validity; use vortex_array::{ArrayDType, ArrayLen, IntoArrayData}; use vortex_dtype::{DType, Nullability, PType}; @@ -281,7 +258,6 @@ mod tests { let arr = RunEndArray::try_new( vec![2u32, 5, 10].into_array(), vec![1i32, 2, 3].into_array(), - Validity::NonNullable, ) .unwrap(); assert_eq!(arr.len(), 10); diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index 11531e03d7..887015f0ae 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -1,28 +1,50 @@ use arrow_buffer::BooleanBufferBuilder; use itertools::Itertools; -use vortex_array::array::{BoolArray, BooleanBuffer, PrimitiveArray}; -use vortex_array::validity::Validity; +use vortex_array::array::{BoolArray, BooleanBuffer, ConstantArray, PrimitiveArray}; +use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex_array::variants::PrimitiveArrayTrait; -use vortex_array::ArrayDType; +use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType, Nullability}; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect, VortexResult}; +use vortex_scalar::Scalar; use crate::iter::trimmed_ends_iter; -pub fn runend_encode(array: &PrimitiveArray) -> (PrimitiveArray, PrimitiveArray) { - let validity = if array.dtype().nullability() == Nullability::NonNullable { - Validity::NonNullable - } else { - Validity::AllValid +pub fn runend_encode(array: &PrimitiveArray) -> VortexResult<(PrimitiveArray, ArrayData)> { + let validity = match array.validity() { + Validity::NonNullable => None, + Validity::AllValid => None, + Validity::AllInvalid => { + // We can trivially return an all-null REE array + return Ok(( + PrimitiveArray::from(vec![array.len() as u64]), + ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(), + )); + } + Validity::Array(a) => Some(a.into_bool()?.boolean_buffer()), }; - let (compressed_ends, compressed_values) = match_each_native_ptype!(array.ptype(), |$P| { - let (ends, values) = runend_encode_primitive(array.maybe_null_slice::<$P>()); - (PrimitiveArray::from_vec(ends, Validity::NonNullable), PrimitiveArray::from_vec(values, validity)) - }); - - assert_eq!(array.dtype(), compressed_values.dtype()); - (compressed_ends, compressed_values) + Ok(match validity { + None => { + match_each_native_ptype!(array.ptype(), |$P| { + let (ends, values) = runend_encode_primitive(array.maybe_null_slice::<$P>()); + ( + PrimitiveArray::from_vec(ends, Validity::NonNullable), + PrimitiveArray::from_vec(values, array.dtype().nullability().into()).into_array(), + ) + }) + } + Some(validity) => { + match_each_native_ptype!(array.ptype(), |$P| { + let (ends, values) = + runend_encode_nullable_primitive(array.maybe_null_slice::<$P>(), validity); + ( + PrimitiveArray::from_vec(ends, Validity::NonNullable), + values.into_array(), + ) + }) + } + }) } fn runend_encode_primitive(elements: &[T]) -> (Vec, Vec) { @@ -50,20 +72,86 @@ fn runend_encode_primitive(elements: &[T]) -> (Vec, Vec) (ends, values) } +fn runend_encode_nullable_primitive( + elements: &[T], + element_validity: BooleanBuffer, +) -> (Vec, PrimitiveArray) { + let mut ends = Vec::new(); + let mut values = Vec::new(); + let mut validity = BooleanBufferBuilder::new(values.capacity()); + + if elements.is_empty() { + return ( + ends, + PrimitiveArray::from_vec( + values, + Validity::Array(BoolArray::from(validity.finish()).into_array()), + ), + ); + } + + // Run-end encode the values + let mut last = element_validity.value(0).then(|| elements[0]); + let mut end = 1; + for e in elements + .iter() + .zip(element_validity.iter()) + .map(|(&e, is_valid)| is_valid.then_some(e)) + .skip(1) + { + if e != last { + ends.push(end); + match e { + None => { + validity.append(false); + values.push(T::default()); + } + Some(e) => { + validity.append(true); + values.push(e); + } + } + } + last = e; + end += 1; + } + ends.push(end); + + match last { + None => { + validity.append(false); + values.push(T::default()); + } + Some(e) => { + validity.append(true); + values.push(e); + } + } + + ( + ends, + PrimitiveArray::from_vec( + values, + Validity::Array(BoolArray::from(validity.finish()).into_array()), + ), + ) +} + pub fn runend_decode_primitive( ends: PrimitiveArray, values: PrimitiveArray, - validity: Validity, offset: usize, length: usize, ) -> VortexResult { match_each_native_ptype!(values.ptype(), |$P| { match_each_integer_ptype!(ends.ptype(), |$E| { - Ok(PrimitiveArray::from_vec(runend_decode_typed_primitive( + runend_decode_typed_primitive( trimmed_ends_iter(ends.maybe_null_slice::<$E>(), offset, length), values.maybe_null_slice::<$P>(), + values.logical_validity(), + values.dtype().nullability(), length, - ), validity)) + ) }) }) } @@ -71,56 +159,129 @@ pub fn runend_decode_primitive( pub fn runend_decode_bools( ends: PrimitiveArray, values: BoolArray, - validity: Validity, offset: usize, length: usize, ) -> VortexResult { match_each_integer_ptype!(ends.ptype(), |$E| { - BoolArray::try_new(runend_decode_typed_bool( + runend_decode_typed_bool( trimmed_ends_iter(ends.maybe_null_slice::<$E>(), offset, length), values.boolean_buffer(), + values.logical_validity(), + values.dtype().nullability(), length, - ), validity) + ) }) } pub fn runend_decode_typed_primitive( run_ends: impl Iterator, values: &[T], + values_validity: LogicalValidity, + values_nullability: Nullability, length: usize, -) -> Vec { - let mut decoded = Vec::with_capacity(length); - for (end, value) in run_ends.zip_eq(values) { - decoded.extend(std::iter::repeat_n(value, end - decoded.len())); - } - decoded +) -> VortexResult { + Ok(match values_validity { + LogicalValidity::AllValid(_) => { + let mut decoded: Vec = Vec::with_capacity(length); + for (end, value) in run_ends.zip_eq(values) { + decoded.extend(std::iter::repeat_n(value, end - decoded.len())); + } + PrimitiveArray::from_vec(decoded, values_nullability.into()) + } + LogicalValidity::AllInvalid(_) => PrimitiveArray::from_vec( + vec![T::default(); length], + Validity::Array(BoolArray::from(BooleanBuffer::new_unset(length)).into_array()), + ), + LogicalValidity::Array(array) => { + let validity = array.into_bool()?.boolean_buffer(); + let mut decoded = Vec::with_capacity(length); + let mut decoded_validity = BooleanBufferBuilder::new(length); + for (end, value) in run_ends.zip_eq( + values + .iter() + .zip(validity.iter()) + .map(|(&v, is_valid)| is_valid.then_some(v)), + ) { + match value { + None => { + decoded_validity.append_n(end - decoded.len(), false); + decoded.extend(std::iter::repeat_n(T::default(), end - decoded.len())); + } + Some(value) => { + decoded_validity.append_n(end - decoded.len(), true); + decoded.extend(std::iter::repeat_n(value, end - decoded.len())); + } + } + } + PrimitiveArray::from_vec( + decoded, + Validity::Array(BoolArray::from(decoded_validity.finish()).into_array()), + ) + } + }) } pub fn runend_decode_typed_bool( run_ends: impl Iterator, values: BooleanBuffer, + values_validity: LogicalValidity, + values_nullability: Nullability, length: usize, -) -> BooleanBuffer { - let mut decoded = BooleanBufferBuilder::new(length); - for (end, value) in run_ends.zip_eq(values.iter()) { - decoded.append_n(end - decoded.len(), value); - } - decoded.finish() +) -> VortexResult { + Ok(match values_validity { + LogicalValidity::AllValid(_) => { + let mut decoded = BooleanBufferBuilder::new(length); + for (end, value) in run_ends.zip_eq(values.iter()) { + decoded.append_n(end - decoded.len(), value); + } + BoolArray::new(decoded.finish(), values_nullability) + } + LogicalValidity::AllInvalid(_) => BoolArray::try_new( + BooleanBuffer::new_unset(length), + Validity::Array(BoolArray::from(BooleanBuffer::new_unset(length)).into_array()), + ) + .vortex_expect("invalid array"), + LogicalValidity::Array(array) => { + let validity = array.into_bool()?.boolean_buffer(); + let mut decoded = BooleanBufferBuilder::new(length); + let mut decoded_validity = BooleanBufferBuilder::new(length); + for (end, value) in run_ends.zip_eq( + values + .iter() + .zip(validity.iter()) + .map(|(v, is_valid)| is_valid.then_some(v)), + ) { + match value { + None => { + decoded_validity.append_n(end - decoded.len(), false); + decoded.append_n(end - decoded.len(), false); + } + Some(value) => { + decoded_validity.append_n(end - decoded.len(), true); + decoded.append_n(end - decoded.len(), value); + } + } + } + BoolArray::try_new( + decoded.finish(), + Validity::Array(BoolArray::from(decoded_validity.finish()).into_array()), + )? + } + }) } #[cfg(test)] mod test { use vortex_array::array::PrimitiveArray; - use vortex_array::validity::{ArrayValidity, Validity}; - use vortex_array::{ArrayLen, IntoArrayData, IntoArrayVariant}; + use vortex_array::IntoArrayVariant; use crate::compress::{runend_decode_primitive, runend_encode}; - use crate::RunEndArray; #[test] fn encode() { let arr = PrimitiveArray::from(vec![1i32, 1, 2, 2, 2, 3, 3, 3, 3, 3]); - let (ends, values) = runend_encode(&arr); + let (ends, values) = runend_encode(&arr).unwrap(); + let values = values.into_primitive().unwrap(); assert_eq!(ends.maybe_null_slice::(), vec![2, 5, 10]); assert_eq!(values.maybe_null_slice::(), vec![1, 2, 3]); @@ -130,45 +291,11 @@ mod test { fn decode() { let ends = PrimitiveArray::from(vec![2, 5, 10]); let values = PrimitiveArray::from(vec![1i32, 2, 3]); - let decoded = runend_decode_primitive(ends, values, Validity::NonNullable, 0, 10).unwrap(); + let decoded = runend_decode_primitive(ends, values, 0, 10).unwrap(); assert_eq!( decoded.maybe_null_slice::(), vec![1i32, 1, 2, 2, 2, 3, 3, 3, 3, 3] ); } - - #[test] - fn decode_nullable() { - let validity = { - let mut validity = vec![true; 10]; - validity[2] = false; - validity[7] = false; - Validity::from_iter(validity) - }; - let arr = RunEndArray::try_new( - vec![2u32, 5, 10].into_array(), - PrimitiveArray::from_vec(vec![1i32, 2, 3], Validity::AllValid).into_array(), - validity, - ) - .unwrap(); - - let decoded = runend_decode_primitive( - arr.ends().into_primitive().unwrap(), - arr.values().into_primitive().unwrap(), - arr.validity(), - 0, - arr.len(), - ) - .unwrap(); - - assert_eq!( - decoded.maybe_null_slice::(), - vec![1i32, 1, 2, 2, 2, 3, 3, 3, 3, 3].as_slice() - ); - assert_eq!( - decoded.logical_validity().into_validity(), - Validity::from_iter([true, true, false, true, true, true, true, false, true, true,]) - ); - } } diff --git a/encodings/runend/src/compute/compare.rs b/encodings/runend/src/compute/compare.rs index 77388216ff..fe1fef5059 100644 --- a/encodings/runend/src/compute/compare.rs +++ b/encodings/runend/src/compute/compare.rs @@ -20,13 +20,7 @@ impl CompareFn for RunEndEncoding { operator, ) .and_then(|values| { - RunEndArray::with_offset_and_length( - lhs.ends(), - values, - lhs.validity().into_nullable(), - lhs.offset(), - lhs.len(), - ) + RunEndArray::with_offset_and_length(lhs.ends(), values, lhs.offset(), lhs.len()) }) .map(|a| a.into_array()) .map(Some); diff --git a/encodings/runend/src/compute/invert.rs b/encodings/runend/src/compute/invert.rs index d239e3805a..834498d8a4 100644 --- a/encodings/runend/src/compute/invert.rs +++ b/encodings/runend/src/compute/invert.rs @@ -9,7 +9,6 @@ impl InvertFn for RunEndEncoding { RunEndArray::with_offset_and_length( array.ends(), invert(&array.values())?, - array.validity(), array.len(), array.offset(), ) diff --git a/encodings/runend/src/compute/mod.rs b/encodings/runend/src/compute/mod.rs index 6786045f7d..76b508ada9 100644 --- a/encodings/runend/src/compute/mod.rs +++ b/encodings/runend/src/compute/mod.rs @@ -5,14 +5,13 @@ use std::cmp::min; use std::ops::AddAssign; use num_traits::AsPrimitive; -use vortex_array::array::{BooleanBuffer, ConstantArray, PrimitiveArray, SparseArray}; +use vortex_array::array::{BooleanBuffer, PrimitiveArray}; use vortex_array::compute::{ filter, scalar_at, slice, take, CompareFn, ComputeVTable, FilterFn, FilterMask, InvertFn, ScalarAtFn, SliceFn, TakeFn, }; -use vortex_array::validity::Validity; use vortex_array::variants::PrimitiveArrayTrait; -use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; +use vortex_array::{ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; use vortex_dtype::{match_each_integer_ptype, match_each_unsigned_integer_ptype, NativePType}; use vortex_error::{VortexResult, VortexUnwrap}; use vortex_scalar::Scalar; @@ -75,36 +74,7 @@ impl TakeFn for RunEndEncoding { .map(|idx| idx as u64) .collect::>(); let physical_indices_array = PrimitiveArray::from(physical_indices).into_array(); - let dense_values = take(array.values(), &physical_indices_array)?; - - Ok(match array.validity() { - Validity::NonNullable => dense_values, - Validity::AllValid => dense_values, - Validity::AllInvalid => { - ConstantArray::new(Scalar::null(array.dtype().clone()), indices.len()).into_array() - } - Validity::Array(original_validity) => { - let dense_validity = FilterMask::try_from(take(&original_validity, indices)?)?; - let length = dense_validity.len(); - let dense_nonnull_indices = PrimitiveArray::from( - dense_validity - .iter_indices()? - .map(|idx| idx as u64) - .collect::>(), - ) - .into_array(); - let filtered_values = filter(&dense_values, dense_validity)?; - let dtype = filtered_values.dtype().clone(); - - SparseArray::try_new( - dense_nonnull_indices, - filtered_values, - length, - Scalar::null(dtype), - )? - .into_array() - } - }) + take(array.values(), &physical_indices_array) } } @@ -125,7 +95,6 @@ impl SliceFn for RunEndEncoding { Ok(RunEndArray::with_offset_and_length( slice(array.ends(), slice_begin, slice_end)?, slice(array.values(), slice_begin, slice_end)?, - array.validity().slice(start, stop)?, if new_length == 0 { 0 } else { @@ -139,14 +108,13 @@ impl SliceFn for RunEndEncoding { impl FilterFn for RunEndEncoding { fn filter(&self, array: &RunEndArray, mask: FilterMask) -> VortexResult { - let validity = array.validity().filter(&mask)?; let primitive_run_ends = array.ends().into_primitive()?; let (run_ends, values_mask) = match_each_unsigned_integer_ptype!(primitive_run_ends.ptype(), |$P| { filter_run_ends(primitive_run_ends.maybe_null_slice::<$P>(), array.offset() as u64, array.len() as u64, mask)? }); let values = filter(&array.values(), values_mask)?; - RunEndArray::try_new(run_ends.into_array(), values, validity).map(|a| a.into_array()) + RunEndArray::try_new(run_ends.into_array(), values).map(|a| a.into_array()) } } @@ -190,12 +158,10 @@ fn filter_run_ends + AsPrimitive>( #[cfg(test)] mod test { - use vortex_array::array::{BoolArray, PrimitiveArray}; - use vortex_array::compute::{filter, scalar_at, slice, take, try_cast, FilterMask}; - use vortex_array::validity::{ArrayValidity, Validity}; + use vortex_array::array::PrimitiveArray; + use vortex_array::compute::{filter, scalar_at, slice, take, FilterMask}; use vortex_array::{ArrayDType, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData}; use vortex_dtype::{DType, Nullability, PType}; - use vortex_scalar::Scalar; use crate::RunEndArray; @@ -248,55 +214,12 @@ mod test { assert_eq!(scalar, 5.into()); } - #[test] - fn ree_null_scalar() { - let array = ree_array(); - let null_ree = RunEndArray::try_new( - array.ends(), - try_cast(array.values(), &array.values().dtype().as_nullable()).unwrap(), - Validity::AllInvalid, - ) - .unwrap(); - let scalar = scalar_at(null_ree.as_ref(), 11).unwrap(); - assert_eq!(scalar, Scalar::null(null_ree.dtype().clone())); - } - - #[test] - fn slice_with_nulls() { - let array = RunEndArray::try_new( - PrimitiveArray::from(vec![3u32, 6, 8, 12]).into_array(), - PrimitiveArray::from_vec(vec![1, 4, 2, 5], Validity::AllValid).into_array(), - Validity::from_iter([ - false, false, false, false, true, true, false, false, false, false, true, true, - ]), - ) - .unwrap(); - let sliced = slice(array.as_ref(), 4, 10).unwrap(); - let sliced_primitive = sliced.into_primitive().unwrap(); - assert_eq!( - sliced_primitive.maybe_null_slice::(), - vec![4, 4, 2, 2, 5, 5] - ); - assert_eq!( - sliced_primitive - .logical_validity() - .into_array() - .into_bool() - .unwrap() - .boolean_buffer() - .iter() - .collect::>(), - vec![true, true, false, false, false, false] - ) - } - #[test] fn slice_array() { let arr = slice( RunEndArray::try_new( vec![2u32, 5, 10].into_array(), vec![1i32, 2, 3].into_array(), - Validity::NonNullable, ) .unwrap() .as_ref(), @@ -322,7 +245,6 @@ mod test { RunEndArray::try_new( vec![2u32, 5, 10].into_array(), vec![1i32, 2, 3].into_array(), - Validity::NonNullable, ) .unwrap() .as_ref(), @@ -349,7 +271,6 @@ mod test { RunEndArray::try_new( vec![2u32, 5, 10].into_array(), vec![1i32, 2, 3].into_array(), - Validity::NonNullable, ) .unwrap() .as_ref(), @@ -374,7 +295,6 @@ mod test { let arr = RunEndArray::try_new( vec![2u32, 5, 10].into_array(), vec![1i32, 2, 3].into_array(), - Validity::NonNullable, ) .unwrap(); @@ -384,41 +304,11 @@ mod test { ); } - #[test] - fn take_with_nulls() { - let uncompressed = PrimitiveArray::from_vec(vec![1i32, 0, 3], Validity::AllValid); - let validity = BoolArray::from_iter([ - true, true, false, false, false, true, true, true, true, true, - ]); - let arr = RunEndArray::try_new( - vec![2u32, 5, 10].into_array(), - uncompressed.into_array(), - Validity::Array(validity.into_array()), - ) - .unwrap(); - - let test_indices = PrimitiveArray::from_vec(vec![0, 2, 4, 6], Validity::NonNullable); - let taken = take(arr.as_ref(), test_indices.as_ref()).unwrap(); - - assert_eq!(taken.len(), test_indices.len()); - - let parray = taken.into_primitive().unwrap(); - assert_eq!( - (0..4) - .map(|idx| parray.is_valid(idx).then(|| parray.get_as_cast::(idx))) - .collect::>>(), - vec![Some(1), None, None, Some(3),] - ); - } - #[test] fn slice_at_end() { - let re_array = RunEndArray::try_new( - vec![7_u64, 10].into_array(), - vec![2_u64, 3].into_array(), - Validity::NonNullable, - ) - .unwrap(); + let re_array = + RunEndArray::try_new(vec![7_u64, 10].into_array(), vec![2_u64, 3].into_array()) + .unwrap(); assert_eq!(re_array.len(), 10); diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 9e011cbd9a..dfc1a9a323 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -3,7 +3,7 @@ use std::ptr; use std::sync::Arc; mod accessor; -use arrow_buffer::{ArrowNativeType, Buffer as ArrowBuffer, MutableBuffer}; +use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, Buffer as ArrowBuffer, MutableBuffer}; use bytes::Bytes; use itertools::Itertools; use serde::{Deserialize, Serialize}; @@ -11,6 +11,7 @@ use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType, PType}; use vortex_error::{VortexExpect as _, VortexResult}; +use crate::array::BoolArray; use crate::encoding::ids; use crate::iter::Accessor; use crate::stats::StatsSet; @@ -213,6 +214,31 @@ impl Accessor for PrimitiveArray { impl PrimitiveArrayTrait for PrimitiveArray {} +impl FromIterator> for PrimitiveArray { + fn from_iter>>(iter: I) -> Self { + let iter = iter.into_iter(); + let mut values = Vec::with_capacity(iter.size_hint().0); + let mut validity = BooleanBufferBuilder::new(values.capacity()); + + for i in iter { + match i { + None => { + validity.append(false); + values.push(T::default()); + } + Some(e) => { + validity.append(true); + values.push(e); + } + } + } + Self::from_vec( + values, + Validity::Array(BoolArray::from(validity.finish()).into_array()), + ) + } +} + impl From> for PrimitiveArray { fn from(values: Vec) -> Self { Self::from_vec(values, Validity::NonNullable) diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 42f7f6573f..2d6f5cb7d5 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -16,9 +16,9 @@ pub struct PrimitiveBuilder { dtype: DType, } -impl PrimitiveBuilder +impl PrimitiveBuilder where - T: NativePType + 'static, + T: NativePType, ::Native: NativePType, { pub fn new(nullability: Nullability) -> Self { diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 8021eb3a2e..7fbedaa704 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -5,7 +5,6 @@ use std::ops::BitAnd; use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use serde::{Deserialize, Serialize}; -use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{DType, Nullability}; use vortex_error::{ vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult, @@ -100,7 +99,7 @@ pub enum Validity { impl Validity { /// The [`DType`] of the underlying validity array (if it exists). - pub const DTYPE: DType = DType::Bool(NonNullable); + pub const DTYPE: DType = DType::Bool(Nullability::NonNullable); pub fn to_metadata(&self, length: usize) -> VortexResult { match self { @@ -161,7 +160,7 @@ impl Validity { pub fn nullability(&self) -> Nullability { match self { - Self::NonNullable => NonNullable, + Self::NonNullable => Nullability::NonNullable, _ => Nullability::Nullable, } } @@ -426,6 +425,15 @@ impl FromIterator for Validity { } } +impl From for Validity { + fn from(value: Nullability) -> Self { + match value { + Nullability::NonNullable => Validity::NonNullable, + Nullability::Nullable => Validity::AllValid, + } + } +} + #[derive(Clone, Debug)] pub enum LogicalValidity { AllValid(usize), diff --git a/vortex-sampling-compressor/src/compressors/runend.rs b/vortex-sampling-compressor/src/compressors/runend.rs index 03933fd245..4f1b19932f 100644 --- a/vortex-sampling-compressor/src/compressors/runend.rs +++ b/vortex-sampling-compressor/src/compressors/runend.rs @@ -51,7 +51,7 @@ impl EncodingCompressor for RunEndCompressor { ctx: SamplingCompressor<'a>, ) -> VortexResult> { let primitive_array = array.clone().into_primitive()?; - let (ends, values) = runend_encode(&primitive_array); + let (ends, values) = runend_encode(&primitive_array)?; let ends = downscale_integer_array(ends.into_array())?.into_primitive()?; let compressed_ends = ctx @@ -60,15 +60,11 @@ impl EncodingCompressor for RunEndCompressor { let compressed_values = ctx .named("values") .excluding(self) - .compress(&values.into_array(), like.as_ref().and_then(|l| l.child(1)))?; + .compress(&values, like.as_ref().and_then(|l| l.child(1)))?; Ok(CompressedArray::compressed( - RunEndArray::try_new( - compressed_ends.array, - compressed_values.array, - ctx.compress_validity(primitive_array.validity())?, - ) - .map(|a| a.into_array())?, + RunEndArray::try_new(compressed_ends.array, compressed_values.array) + .map(|a| a.into_array())?, Some(CompressionTree::new( self, vec![compressed_ends.path, compressed_values.path],