From 4344712ef6da2d6f134faf47b40f690b50db1126 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 13 Dec 2024 21:30:16 +0000 Subject: [PATCH] fix: RunEndBool array take respects validity --- encodings/runend-bool/src/compute/mod.rs | 36 ++++++++++++++++++++---- vortex-array/src/array/bool/mod.rs | 3 +- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/encodings/runend-bool/src/compute/mod.rs b/encodings/runend-bool/src/compute/mod.rs index 8213dd44a6..74583ad099 100644 --- a/encodings/runend-bool/src/compute/mod.rs +++ b/encodings/runend-bool/src/compute/mod.rs @@ -1,9 +1,10 @@ mod invert; +use arrow_buffer::BooleanBuffer; use vortex_array::array::BoolArray; use vortex_array::compute::{slice, ComputeVTable, InvertFn, ScalarAtFn, SliceFn, TakeFn}; use vortex_array::variants::PrimitiveArrayTrait; -use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData}; +use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; use vortex_dtype::match_each_integer_ptype; use vortex_error::{vortex_bail, VortexResult}; use vortex_scalar::Scalar; @@ -53,10 +54,11 @@ impl TakeFn for RunEndBoolEncoding { .collect::>>()? }); let start = array.start(); - Ok( - BoolArray::from_iter(physical_indices.iter().map(|&it| value_at_index(it, start))) - .to_array(), + BoolArray::try_new( + BooleanBuffer::from_iter(physical_indices.iter().map(|&it| value_at_index(it, start))), + array.validity().take(indices)?, ) + .map(|a| a.into_array()) } } @@ -90,9 +92,11 @@ impl SliceFn for RunEndBoolEncoding { #[cfg(test)] mod tests { - use vortex_array::compute::{scalar_at, slice}; + use arrow_buffer::BooleanBuffer; + use vortex_array::array::PrimitiveArray; + use vortex_array::compute::{scalar_at, slice, take}; use vortex_array::validity::Validity; - use vortex_array::{ArrayLen, IntoArrayData}; + use vortex_array::{ArrayDType, ArrayLen, IntoArrayData, IntoArrayVariant}; use vortex_dtype::Nullability; use vortex_scalar::Scalar; @@ -124,4 +128,24 @@ mod tests { Scalar::bool(false, Nullability::Nullable) ); } + + #[test] + fn take_nullable() { + let re_array = RunEndBoolArray::try_new( + vec![7_u64, 10].into_array(), + false, + Validity::from(BooleanBuffer::from(vec![ + false, false, true, true, true, true, true, true, false, false, + ])), + ) + .unwrap(); + + let taken = take(&re_array, PrimitiveArray::from(vec![6, 9])).unwrap(); + let taken_bool = taken.into_bool().unwrap(); + assert_eq!(taken_bool.dtype(), re_array.dtype()); + assert_eq!( + taken_bool.boolean_buffer(), + BooleanBuffer::from(vec![false, true]) + ); + } } diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 2113adebe3..bf8eca07a0 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use arrow_array::BooleanArray; use arrow_buffer::{BooleanBufferBuilder, MutableBuffer}; -use itertools::Itertools; use serde::{Deserialize, Serialize}; use vortex_buffer::Buffer; use vortex_dtype::{DType, Nullability}; @@ -129,7 +128,7 @@ impl BoolArray { first_byte_bit_offset, }), Some(Buffer::from(inner)), - validity.into_array().into_iter().collect_vec().into(), + validity.into_array().into_iter().collect(), StatsSet::default(), )? .try_into()