From 8af1865b55c80c462df9d2732077d56cc4f0a4fa Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 6 Apr 2024 21:52:17 +0100 Subject: [PATCH] Validity --- vortex-array2/src/array/bool/mod.rs | 61 ++++++++++++----------------- vortex-array2/src/validity.rs | 61 +++++++++++++++++------------ 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/vortex-array2/src/array/bool/mod.rs b/vortex-array2/src/array/bool/mod.rs index e1e25d651e..fc928d3f83 100644 --- a/vortex-array2/src/array/bool/mod.rs +++ b/vortex-array2/src/array/bool/mod.rs @@ -16,14 +16,13 @@ impl_encoding!("vortex.bool", Bool); #[derive(Clone, Debug)] pub struct BoolMetadata { - // TODO(ngates): push option inside the metadata? - validity: Option, + validity: ValidityMetadata, length: usize, } impl BoolMetadata { - pub fn validity(&self) -> Option<&ValidityMetadata> { - self.validity.as_ref() + pub fn validity(&self) -> &ValidityMetadata { + &self.validity } pub fn len(&self) -> usize { @@ -31,6 +30,7 @@ impl BoolMetadata { } } +// TODO(ngates): I think this could be a struct? pub trait BoolArray { fn buffer(&self) -> &Buffer; fn len(&self) -> usize; @@ -38,23 +38,22 @@ pub trait BoolArray { } impl BoolData { - pub fn try_new(buffer: BooleanBuffer, validity: Option) -> Self { + pub fn try_new(buffer: BooleanBuffer, validity: Option) -> VortexResult { if let Some(v) = &validity { assert_eq!(v.len(), buffer.len()); } - Self::new_unchecked( - DType::Bool(validity.is_some().into()), - Arc::new(BoolMetadata { - validity: validity.as_ref().map(|v| ValidityMetadata::from(v)), - length: buffer.len(), - }), + let dtype = DType::Bool(validity.is_some().into()); + let metadata = BoolMetadata { + validity: ValidityMetadata::try_from_validity(validity.as_ref(), &dtype)?, + length: buffer.len(), + }; + let validity_array = validity.and_then(|v| v.into_array_data()); + Ok(Self::new_unchecked( + dtype, + Arc::new(metadata), vec![buffer.into_inner()].into(), - // Hmmmm - vec![validity - .and_then(|v| v.into_array()) - .map(|a| a.to_array_data())] - .into(), - ) + vec![validity_array].into(), + )) } } @@ -68,14 +67,10 @@ impl BoolArray for BoolData { } fn validity(&self) -> Option { - self.metadata().validity().map(|v| { - Validity::try_from_validity_meta( - v, - self.metadata().len(), - self.data().child(0).map(|a| a.to_array()), - ) - .unwrap() - }) + self.metadata().validity().to_validity( + self.metadata().len(), + self.data().child(0).map(|data| data.to_array()), + ) } } @@ -92,16 +87,12 @@ impl BoolArray for BoolView<'_> { } fn validity(&self) -> Option { - self.metadata().validity().map(|v| { - Validity::try_from_validity_meta( - v, - self.metadata().len(), - self.view() - .child(0, &Validity::DTYPE) - .map(|a| a.into_array()), - ) - .unwrap() - }) + self.metadata().validity().to_validity( + self.metadata().len(), + self.view() + .child(0, &Validity::DTYPE) + .map(|a| a.into_array()), + ) } } diff --git a/vortex-array2/src/validity.rs b/vortex-array2/src/validity.rs index 4dc98214d8..3508ff9982 100644 --- a/vortex-array2/src/validity.rs +++ b/vortex-array2/src/validity.rs @@ -1,12 +1,13 @@ -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::{vortex_bail, VortexResult}; use vortex_schema::{DType, Nullability}; use crate::array::validity::ValidityArray; use crate::compute::scalar_at; -use crate::{Array, WithArray}; +use crate::{Array, ArrayData, ToArrayData, WithArray}; pub trait ArrayValidity { fn is_valid(&self, index: usize) -> bool; + // Maybe add to_bool_array() here? } impl ArrayValidity for &dyn ValidityArray { @@ -17,17 +18,43 @@ impl ArrayValidity for &dyn ValidityArray { #[derive(Clone, Debug)] pub enum ValidityMetadata { + NonNullable, Valid, Invalid, Array, } -impl<'v> From<&Validity<'v>> for ValidityMetadata { - fn from(value: &Validity<'v>) -> Self { - match value { - Validity::Valid(_) => ValidityMetadata::Valid, - Validity::Invalid(_) => ValidityMetadata::Invalid, - Validity::Array(_) => ValidityMetadata::Array, +impl ValidityMetadata { + pub fn try_from_validity(validity: Option<&Validity>, dtype: &DType) -> VortexResult { + // We don't really need dtype for this conversion, but it's a good place to check + // that the nullability and validity are consistent. + match validity { + None => { + if dtype.nullability() != Nullability::NonNullable { + vortex_bail!("DType must be NonNullable if validity is absent") + } + Ok(ValidityMetadata::NonNullable) + } + Some(v) => { + if dtype.nullability() != Nullability::Nullable { + vortex_bail!("DType must be Nullable if validity is present") + } + Ok(match v { + Validity::Valid(_) => ValidityMetadata::Valid, + Validity::Invalid(_) => ValidityMetadata::Invalid, + Validity::Array(_) => ValidityMetadata::Array, + }) + } + } + } + + pub fn to_validity<'v>(&self, len: usize, array: Option>) -> Option> { + match self { + ValidityMetadata::NonNullable => None, + ValidityMetadata::Valid => Some(Validity::Valid(len)), + ValidityMetadata::Invalid => Some(Validity::Invalid(len)), + // TODO(ngates): should we return a result for this? + ValidityMetadata::Array => Some(Validity::Array(array.unwrap())), } } } @@ -42,23 +69,9 @@ pub enum Validity<'v> { impl<'v> Validity<'v> { pub const DTYPE: DType = DType::Bool(Nullability::NonNullable); - pub fn try_from_validity_meta( - meta: &ValidityMetadata, - length: usize, - array: Option>, - ) -> VortexResult> { - match meta { - ValidityMetadata::Valid => Ok(Validity::Valid(length)), - ValidityMetadata::Invalid => Ok(Validity::Invalid(length)), - ValidityMetadata::Array => array - .map(|v| Validity::Array(v)) - .ok_or(vortex_err!("Expected validity array")), - } - } - - pub fn into_array(self) -> Option> { + pub fn into_array_data(self) -> Option { match self { - Validity::Array(a) => Some(a), + Validity::Array(a) => Some(a.to_array_data()), _ => None, } }