From 55dcd022d0ceb1aef0aa419628bdecaef1f55aec Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 6 Apr 2024 22:51:10 +0100 Subject: [PATCH 1/4] Use struct not trait --- vortex-array2/Cargo.toml | 2 +- vortex-array2/src/array/bool/compute.rs | 4 +- vortex-array2/src/array/bool/mod.rs | 134 +++++++++++------------- vortex-array2/src/array/mod.rs | 4 +- vortex-array2/src/compute.rs | 26 ++--- vortex-array2/src/data.rs | 29 +++-- vortex-array2/src/implementation.rs | 66 +++++------- vortex-array2/src/lib.rs | 33 +++--- vortex-array2/src/view.rs | 56 ++-------- 9 files changed, 149 insertions(+), 205 deletions(-) diff --git a/vortex-array2/Cargo.toml b/vortex-array2/Cargo.toml index 28c3fdcb49..ffae652ac2 100644 --- a/vortex-array2/Cargo.toml +++ b/vortex-array2/Cargo.toml @@ -21,4 +21,4 @@ vortex-flatbuffers = { path = "../vortex-flatbuffers" } vortex-schema = { path = "../vortex-schema" } [lints] -workspace = true +# workspace = true diff --git a/vortex-array2/src/array/bool/compute.rs b/vortex-array2/src/array/bool/compute.rs index 0355d4d330..4276cddc7f 100644 --- a/vortex-array2/src/array/bool/compute.rs +++ b/vortex-array2/src/array/bool/compute.rs @@ -4,13 +4,13 @@ use vortex_error::VortexResult; use crate::array::bool::BoolArray; use crate::compute::{ArrayCompute, ScalarAtFn}; -impl ArrayCompute for &dyn BoolArray { +impl ArrayCompute for BoolArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } } -impl ScalarAtFn for &dyn BoolArray { +impl ScalarAtFn for BoolArray<'_> { fn scalar_at(&self, _index: usize) -> VortexResult { todo!() } diff --git a/vortex-array2/src/array/bool/mod.rs b/vortex-array2/src/array/bool/mod.rs index b924384fb7..19fbd5ddb6 100644 --- a/vortex-array2/src/array/bool/mod.rs +++ b/vortex-array2/src/array/bool/mod.rs @@ -1,16 +1,15 @@ mod compute; use arrow_buffer::{BooleanBuffer, Buffer}; -use vortex_error::VortexResult; +use vortex_error::{vortex_err, VortexResult}; use vortex_schema::DType; +use crate::impl_encoding; use crate::validity::Validity; use crate::validity::{ArrayValidity, ValidityMetadata}; -use crate::{impl_encoding, IntoArray}; +use crate::ArrayMetadata; use crate::{ArrayData, TypedArrayData}; -use crate::{ArrayMetadata, TryFromArrayMetadata}; use crate::{ArrayView, ToArrayData}; -use crate::{ToArray, TypedArrayView}; impl_encoding!("vortex.bool", Bool); @@ -31,12 +30,48 @@ impl BoolMetadata { } } -// TODO(ngates): I think this could be a struct? -#[allow(clippy::len_without_is_empty)] -pub trait BoolArray { - fn buffer(&self) -> &Buffer; - fn len(&self) -> usize; - fn validity(&self) -> Option; +impl TryParseArrayMetadata for BoolMetadata { + fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { + todo!() + } +} + +pub struct BoolArray<'a> { + buffer: &'a Buffer, + validity: Option>, + metadata: BoolMetadata, +} + +impl BoolArray<'_> { + pub fn buffer(&self) -> &Buffer { + &self.buffer + } + + pub fn validity(&self) -> Option<&Validity> { + self.validity.as_ref() + } + + pub fn metadata(&self) -> &BoolMetadata { + &self.metadata + } + + pub fn boolean_buffer(&self) -> BooleanBuffer { + BooleanBuffer::new(self.buffer.clone(), 0, self.metadata.len()) + } +} + +impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> { + fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: BoolMetadata) -> VortexResult { + Ok(BoolArray { + buffer: parts + .buffer(0) + .ok_or(vortex_err!("BoolArray requires a buffer"))?, + validity: metadata + .validity() + .to_validity(metadata.len(), parts.child(0, &Validity::DTYPE)), + metadata, + }) + } } impl BoolData { @@ -59,82 +94,31 @@ impl BoolData { } } -impl BoolArray for BoolData { - fn buffer(&self) -> &Buffer { - self.data().buffers().first().unwrap() - } - +impl ArrayTrait for BoolArray<'_> { fn len(&self) -> usize { - self.metadata().len() - } - - fn validity(&self) -> Option { - self.metadata().validity().to_validity( - self.metadata().len(), - self.data().child(0).map(|data| data.to_array()), - ) - } -} - -impl BoolArray for BoolView<'_> { - fn buffer(&self) -> &Buffer { - self.view() - .buffers() - .first() - .expect("BoolView must have a single buffer") - } - - fn len(&self) -> usize { - self.metadata().len() - } - - fn validity(&self) -> Option { - self.metadata().validity().to_validity( - self.metadata().len(), - self.view() - .child(0, &Validity::DTYPE) - .map(|a| a.into_array()), - ) - } -} - -impl TryFromArrayMetadata for BoolMetadata { - fn try_from_metadata(_metadata: Option<&[u8]>) -> VortexResult { todo!() } } -impl<'v> TryFromArrayView<'v> for BoolView<'v> { - fn try_from_view(view: &'v ArrayView<'v>) -> VortexResult { - // TODO(ngates): validate the view. - Ok(BoolView::new_unchecked( - view.clone(), - BoolMetadata::try_from_metadata(view.metadata())?, - )) - } -} - -impl TryFromArrayData for BoolData { - fn try_from_data(data: &ArrayData) -> VortexResult { - // TODO(ngates): validate the array data. - Ok(Self::from_data_unchecked(data.clone())) - } -} - -impl ArrayTrait for &dyn BoolArray { - fn len(&self) -> usize { - (**self).len() - } -} - -impl ArrayValidity for &dyn BoolArray { +impl ArrayValidity for BoolArray<'_> { fn is_valid(&self, index: usize) -> bool { self.validity().map(|v| v.is_valid(index)).unwrap_or(true) } } -impl ToArrayData for &dyn BoolArray { +impl ToArrayData for BoolArray<'_> { fn to_array_data(&self) -> ArrayData { todo!() } } + +#[cfg(test)] +mod tests { + use crate::array::bool::BoolData; + + #[test] + fn bool_array() { + let arr = BoolData::try_new(vec![true, false, true].into(), None).unwrap(); + println!("Array {:?}", &arr); + } +} diff --git a/vortex-array2/src/array/mod.rs b/vortex-array2/src/array/mod.rs index b125b0cc74..588bdd8a28 100644 --- a/vortex-array2/src/array/mod.rs +++ b/vortex-array2/src/array/mod.rs @@ -1,3 +1,3 @@ pub mod bool; -pub mod primitive; -pub mod ree; +// pub mod primitive; +// pub mod ree; diff --git a/vortex-array2/src/compute.rs b/vortex-array2/src/compute.rs index e404bddea9..636a5825b9 100644 --- a/vortex-array2/src/compute.rs +++ b/vortex-array2/src/compute.rs @@ -2,7 +2,7 @@ use vortex::scalar::Scalar; use vortex_error::{vortex_err, VortexResult}; use crate::array::bool::BoolData; -use crate::array::primitive::PrimitiveData; +// use crate::array::primitive::PrimitiveData; use crate::{Array, WithArray}; pub trait ArrayCompute { @@ -32,7 +32,7 @@ pub trait FlattenFn { pub enum FlattenedArray { Bool(BoolData), - Primitive(PrimitiveData), + // Primitive(PrimitiveData), // Just to introduce a second variant for now Other(String), } @@ -44,14 +44,14 @@ pub fn flatten(array: &Array) -> VortexResult { .flatten() }) } - -pub fn flatten_primitive(array: &Array) -> VortexResult { - if let FlattenedArray::Primitive(p) = flatten(array)? { - Ok(p) - } else { - Err(vortex_err!( - "Cannot flatten array {:?} into primitive", - array - )) - } -} +// +// pub fn flatten_primitive(array: &Array) -> VortexResult { +// if let FlattenedArray::Primitive(p) = flatten(array)? { +// Ok(p) +// } else { +// Err(vortex_err!( +// "Cannot flatten array {:?} into primitive", +// array +// )) +// } +// } diff --git a/vortex-array2/src/data.rs b/vortex-array2/src/data.rs index e487f456f2..2f81fca3e5 100644 --- a/vortex-array2/src/data.rs +++ b/vortex-array2/src/data.rs @@ -6,7 +6,7 @@ use vortex_error::{vortex_bail, VortexError, VortexResult}; use vortex_schema::DType; use crate::encoding::EncodingRef; -use crate::{Array, ArrayDef, ArrayMetadata, IntoArray, ToArray}; +use crate::{Array, ArrayDef, ArrayMetadata, ArrayParts, IntoArray, ToArray}; #[allow(dead_code)] #[derive(Clone, Debug)] @@ -36,7 +36,7 @@ impl ArrayData { // Validate here that the metadata correctly parses, so that an encoding can infallibly // implement Encoding::with_data(). - encoding.with_data_mut(&data, &mut |_| Ok(()))?; + // encoding.with_data_mut(&data, &mut |_| Ok(()))?; Ok(data) } @@ -76,15 +76,13 @@ impl IntoArray<'static> for ArrayData { } } +#[derive(Debug)] pub struct TypedArrayData { data: ArrayData, phantom: PhantomData, } -impl TypedArrayData -where - Self: for<'a> AsRef>, -{ +impl TypedArrayData { pub fn new_unchecked( dtype: DType, metadata: Arc, @@ -126,10 +124,6 @@ where .downcast::() .unwrap() } - - pub fn as_array(&self) -> &D::Array<'_> { - self.as_ref() - } } impl ToArray for TypedArrayData { @@ -157,3 +151,18 @@ impl TryFrom for TypedArrayData { }) } } + +impl ArrayParts<'_> for ArrayData { + fn buffer(&self, idx: usize) -> Option<&Buffer> { + self.buffers().get(idx) + } + + fn child(&self, idx: usize, _dtype: &DType) -> Option { + self.child(idx).map(|a| { + let array = a.to_array(); + // FIXME(ngates): can we ask an array its dtype? + // assert_eq!(array.dtype(), dtype); + array + }) + } +} diff --git a/vortex-array2/src/implementation.rs b/vortex-array2/src/implementation.rs index bdbad3fa07..3883de87c3 100644 --- a/vortex-array2/src/implementation.rs +++ b/vortex-array2/src/implementation.rs @@ -1,10 +1,7 @@ -use vortex_error::VortexResult; - use crate::encoding::EncodingId; use crate::encoding::{ArrayEncoding, EncodingRef}; -use crate::ArrayData; -use crate::ArrayMetadata; -use crate::ArrayView; +use crate::ArrayTrait; +use crate::{ArrayMetadata, TryFromArrayParts, TryParseArrayMetadata}; /// Trait the defines the set of types relating to an array. /// Because it has associated types it can't be used as a trait object. @@ -12,47 +9,36 @@ pub trait ArrayDef { const ID: EncodingId; const ENCODING: EncodingRef; - type Array<'a>: ?Sized + 'a; - type Metadata: ArrayMetadata; + type Array<'a>: ArrayTrait + TryFromArrayParts<'a, Self::Metadata> + 'a; + type Metadata: ArrayMetadata + TryParseArrayMetadata; type Encoding: ArrayEncoding; } -pub trait TryFromArrayMetadata: Sized { - fn try_from_metadata(metadata: Option<&[u8]>) -> VortexResult; -} - -pub trait TryFromArrayData: Sized { - fn try_from_data(data: &ArrayData) -> VortexResult; -} - -pub trait TryFromArrayView<'v>: Sized + 'v { - fn try_from_view(view: &'v ArrayView<'v>) -> VortexResult; -} - #[macro_export] macro_rules! impl_encoding { ($id:literal, $Name:ident) => { use paste::paste; paste! { - use $crate::{ArrayDef, TryFromArrayData, TryFromArrayView, ArrayTrait}; + use $crate::{ArrayDef, ArrayParts, ArrayTrait, TryFromArrayParts, TryParseArrayMetadata}; use $crate::encoding::{ArrayEncoding, EncodingId, EncodingRef}; use std::any::Any; + use std::fmt::Debug; use std::sync::Arc; use std::marker::{Send, Sync}; /// The array definition trait + #[derive(Debug)] pub struct [<$Name Def>]; impl ArrayDef for [<$Name Def>] { const ID: EncodingId = EncodingId::new($id); const ENCODING: EncodingRef = &[<$Name Encoding>]; - type Array<'a> = dyn [<$Name Array>] + 'a; + type Array<'a> = [<$Name Array>]<'a>; type Metadata = [<$Name Metadata>]; type Encoding = [<$Name Encoding>]; } pub type [<$Name Data>] = TypedArrayData<[<$Name Def>]>; - pub type [<$Name View>]<'v> = TypedArrayView<'v, [<$Name Def>]>; /// The array encoding pub struct [<$Name Encoding>]; @@ -67,8 +53,9 @@ macro_rules! impl_encoding { f: &mut dyn FnMut(&dyn ArrayTrait) -> VortexResult<()>, ) -> VortexResult<()> { // Convert ArrayView -> PrimitiveArray, then call compute. - let typed_view = <[<$Name View>] as TryFromArrayView>::try_from_view(view)?; - f(&typed_view.as_array()) + let metadata = [<$Name Metadata>]::try_parse_metadata(view.metadata())?; + let array = [<$Name Array>]::try_from_parts(view as &dyn ArrayParts, metadata)?; + f(&array) } fn with_data_mut( @@ -76,8 +63,13 @@ macro_rules! impl_encoding { data: &ArrayData, f: &mut dyn FnMut(&dyn ArrayTrait) -> VortexResult<()>, ) -> VortexResult<()> { - let data = <[<$Name Data>] as TryFromArrayData>::try_from_data(data)?; - f(&data.as_array()) + let metadata = data.metadata() + .as_any() + .downcast_ref::<[<$Name Metadata>]>() + .ok_or_else(|| vortex_err!("Failed to downcast metadata"))? + .clone(); + let array = [<$Name Array>]::try_from_parts(data as &dyn ArrayParts, metadata)?; + f(&array) } } @@ -100,17 +92,17 @@ macro_rules! impl_encoding { } } - /// Implement AsRef for both the data and view types - impl<'a> AsRef] + 'a> for [<$Name Data>] { - fn as_ref(&self) -> &(dyn [<$Name Array>] + 'a) { - self - } - } - impl<'a> AsRef] + 'a> for [<$Name View>]<'a> { - fn as_ref(&self) -> &(dyn [<$Name Array>] + 'a) { - self - } - } + // /// Implement AsRef for both the data and view types + // impl<'a> AsRef<[<$Name Array>]<'a>> for [<$Name Data>] { + // fn as_ref(&self) -> &[<$Name Array>]<'a> { + // self + // } + // } + // impl<'a> AsRef<[<$Name Array>]<'a>> for [<$Name View>]<'a> { + // fn as_ref(&self) -> &[<$Name Array>]<'a> { + // self + // } + // } } }; } diff --git a/vortex-array2/src/lib.rs b/vortex-array2/src/lib.rs index 2231df9126..dc9db7348c 100644 --- a/vortex-array2/src/lib.rs +++ b/vortex-array2/src/lib.rs @@ -12,11 +12,14 @@ mod view; use std::fmt::Debug; +use arrow_buffer::Buffer; pub use context::*; pub use data::*; pub use implementation::*; pub use metadata::*; pub use view::*; +use vortex_error::VortexResult; +use vortex_schema::DType; use crate::compute::ArrayCompute; use crate::validity::ArrayValidity; @@ -44,6 +47,19 @@ pub trait WithArray { fn with_array R>(&self, f: F) -> R; } +pub trait ArrayParts<'a> { + fn buffer(&'a self, idx: usize) -> Option<&'a Buffer>; + fn child(&'a self, idx: usize, dtype: &'a DType) -> Option>; +} + +pub trait TryFromArrayParts<'v, M: ArrayMetadata>: Sized + 'v { + fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: M) -> VortexResult; +} + +pub trait TryParseArrayMetadata: Sized + ArrayMetadata { + fn try_parse_metadata(metadata: Option<&[u8]>) -> VortexResult; +} + /// Collects together the behaviour of an array. pub trait ArrayTrait: ArrayCompute + ArrayValidity + ToArrayData { fn len(&self) -> usize; @@ -73,20 +89,3 @@ impl WithArray for Array<'_> { } } } - -#[cfg(test)] -mod test { - use vortex_error::VortexResult; - - use crate::array::primitive::PrimitiveData; - use crate::compute::*; - use crate::ToArray; - - #[test] - fn test_primitive() -> VortexResult<()> { - let array = PrimitiveData::from_vec(vec![1i32, 2, 3, 4, 5]); - let scalar: i32 = scalar_at(&array.to_array(), 3)?.try_into()?; - assert_eq!(scalar, 4); - Ok(()) - } -} diff --git a/vortex-array2/src/view.rs b/vortex-array2/src/view.rs index f5fbe6c32f..72336142b4 100644 --- a/vortex-array2/src/view.rs +++ b/vortex-array2/src/view.rs @@ -2,12 +2,12 @@ use std::fmt::{Debug, Formatter}; use arrow_buffer::Buffer; use vortex::flatbuffers::array as fb; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_schema::DType; use crate::encoding::EncodingRef; -use crate::{Array, ArrayDef, IntoArray, ToArray}; -use crate::{SerdeContext, TryFromArrayMetadata}; +use crate::{Array, IntoArray, ToArray}; +use crate::{ArrayParts, SerdeContext}; #[derive(Clone)] pub struct ArrayView<'v> { @@ -146,52 +146,12 @@ impl<'v> IntoArray<'v> for ArrayView<'v> { } } -pub struct TypedArrayView<'v, D: ArrayDef> { - view: ArrayView<'v>, - metadata: D::Metadata, -} - -impl<'v, D: ArrayDef> TypedArrayView<'v, D> { - pub fn new_unchecked(view: ArrayView<'v>, metadata: D::Metadata) -> Self { - Self { view, metadata } - } - - pub fn metadata(&self) -> &D::Metadata { - &self.metadata +impl<'v> ArrayParts<'v> for ArrayView<'v> { + fn buffer(&'v self, idx: usize) -> Option<&'v Buffer> { + self.buffers().get(idx) } - pub fn view(&'v self) -> &'v ArrayView<'v> { - &self.view - } - - pub fn as_array(&self) -> &D::Array<'v> - where - Self: AsRef>, - { - self.as_ref() - } -} - -impl ToArray for TypedArrayView<'_, D> { - fn to_array(&self) -> Array { - Array::View(self.view().clone()) - } -} - -/// Convert an ArrayView into a TypedArrayView. -impl<'v, D: ArrayDef> TryFrom> for TypedArrayView<'v, D> -where - D::Metadata: TryFromArrayMetadata, -{ - type Error = VortexError; - - fn try_from(view: ArrayView<'v>) -> Result { - if view.encoding().id() != D::ID { - vortex_bail!("Invalid encoding for array") - } - let metadata = <::Metadata as TryFromArrayMetadata>::try_from_metadata( - view.metadata(), - )?; - Ok(Self { view, metadata }) + fn child(&'v self, idx: usize, dtype: &'v DType) -> Option> { + self.child(idx, dtype).map(|a| a.into_array()) } } From 8f569df9cc9ab66ad6b42f84bd3c71dd7c49e3ec Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 6 Apr 2024 23:00:10 +0100 Subject: [PATCH 2/4] Use struct not trait --- vortex-array2/src/array/bool/compute.rs | 15 ++++++++++++--- vortex-array2/src/array/bool/mod.rs | 20 ++++++++++++++++---- vortex-array2/src/data.rs | 4 ++++ vortex-array2/src/lib.rs | 3 +++ vortex-array2/src/view.rs | 4 ++++ 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/vortex-array2/src/array/bool/compute.rs b/vortex-array2/src/array/bool/compute.rs index 4276cddc7f..0e172887ad 100644 --- a/vortex-array2/src/array/bool/compute.rs +++ b/vortex-array2/src/array/bool/compute.rs @@ -1,8 +1,10 @@ -use vortex::scalar::Scalar; +use vortex::scalar::{BoolScalar, Scalar}; use vortex_error::VortexResult; use crate::array::bool::BoolArray; use crate::compute::{ArrayCompute, ScalarAtFn}; +use crate::validity::ArrayValidity; +use crate::ArrayTrait; impl ArrayCompute for BoolArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { @@ -11,7 +13,14 @@ impl ArrayCompute for BoolArray<'_> { } impl ScalarAtFn for BoolArray<'_> { - fn scalar_at(&self, _index: usize) -> VortexResult { - todo!() + fn scalar_at(&self, index: usize) -> VortexResult { + if self.is_valid(index) { + let value = self.boolean_buffer().value(index); + Ok(Scalar::Bool( + BoolScalar::try_new(Some(value), self.dtype().nullability()).unwrap(), + )) + } else { + Ok(Scalar::null(self.dtype())) + } } } diff --git a/vortex-array2/src/array/bool/mod.rs b/vortex-array2/src/array/bool/mod.rs index 19fbd5ddb6..6a5bab78b4 100644 --- a/vortex-array2/src/array/bool/mod.rs +++ b/vortex-array2/src/array/bool/mod.rs @@ -37,6 +37,7 @@ impl TryParseArrayMetadata for BoolMetadata { } pub struct BoolArray<'a> { + dtype: &'a DType, buffer: &'a Buffer, validity: Option>, metadata: BoolMetadata, @@ -44,7 +45,7 @@ pub struct BoolArray<'a> { impl BoolArray<'_> { pub fn buffer(&self) -> &Buffer { - &self.buffer + self.buffer } pub fn validity(&self) -> Option<&Validity> { @@ -63,6 +64,7 @@ impl BoolArray<'_> { impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> { fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: BoolMetadata) -> VortexResult { Ok(BoolArray { + dtype: parts.dtype(), buffer: parts .buffer(0) .ok_or(vortex_err!("BoolArray requires a buffer"))?, @@ -95,8 +97,12 @@ impl BoolData { } impl ArrayTrait for BoolArray<'_> { + fn dtype(&self) -> &DType { + self.dtype + } + fn len(&self) -> usize { - todo!() + self.metadata().len() } } @@ -115,10 +121,16 @@ impl ToArrayData for BoolArray<'_> { #[cfg(test)] mod tests { use crate::array::bool::BoolData; + use crate::compute::scalar_at; + use crate::IntoArray; #[test] fn bool_array() { - let arr = BoolData::try_new(vec![true, false, true].into(), None).unwrap(); - println!("Array {:?}", &arr); + let arr = BoolData::try_new(vec![true, false, true].into(), None) + .unwrap() + .into_array(); + + let scalar: bool = scalar_at(&arr, 0).unwrap().try_into().unwrap(); + assert_eq!(scalar, true); } } diff --git a/vortex-array2/src/data.rs b/vortex-array2/src/data.rs index 2f81fca3e5..49cab7cbe6 100644 --- a/vortex-array2/src/data.rs +++ b/vortex-array2/src/data.rs @@ -153,6 +153,10 @@ impl TryFrom for TypedArrayData { } impl ArrayParts<'_> for ArrayData { + fn dtype(&'_ self) -> &'_ DType { + &self.dtype + } + fn buffer(&self, idx: usize) -> Option<&Buffer> { self.buffers().get(idx) } diff --git a/vortex-array2/src/lib.rs b/vortex-array2/src/lib.rs index dc9db7348c..d296b9db4e 100644 --- a/vortex-array2/src/lib.rs +++ b/vortex-array2/src/lib.rs @@ -48,6 +48,7 @@ pub trait WithArray { } pub trait ArrayParts<'a> { + fn dtype(&'a self) -> &'a DType; fn buffer(&'a self, idx: usize) -> Option<&'a Buffer>; fn child(&'a self, idx: usize, dtype: &'a DType) -> Option>; } @@ -62,6 +63,8 @@ pub trait TryParseArrayMetadata: Sized + ArrayMetadata { /// Collects together the behaviour of an array. pub trait ArrayTrait: ArrayCompute + ArrayValidity + ToArrayData { + fn dtype(&self) -> &DType; + fn len(&self) -> usize; fn is_empty(&self) -> bool { diff --git a/vortex-array2/src/view.rs b/vortex-array2/src/view.rs index 72336142b4..ba8d3f1436 100644 --- a/vortex-array2/src/view.rs +++ b/vortex-array2/src/view.rs @@ -147,6 +147,10 @@ impl<'v> IntoArray<'v> for ArrayView<'v> { } impl<'v> ArrayParts<'v> for ArrayView<'v> { + fn dtype(&'v self) -> &'v DType { + self.dtype + } + fn buffer(&'v self, idx: usize) -> Option<&'v Buffer> { self.buffers().get(idx) } From c5b4842c3ea07070b8ae26b0b7883c162dbe2cfb Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 6 Apr 2024 23:07:39 +0100 Subject: [PATCH 3/4] Use struct not trait --- vortex-array2/src/array/bool/mod.rs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/vortex-array2/src/array/bool/mod.rs b/vortex-array2/src/array/bool/mod.rs index 6a5bab78b4..407b3e504f 100644 --- a/vortex-array2/src/array/bool/mod.rs +++ b/vortex-array2/src/array/bool/mod.rs @@ -19,17 +19,6 @@ pub struct BoolMetadata { length: usize, } -#[allow(clippy::len_without_is_empty)] -impl BoolMetadata { - pub fn validity(&self) -> &ValidityMetadata { - &self.validity - } - - pub fn len(&self) -> usize { - self.length - } -} - impl TryParseArrayMetadata for BoolMetadata { fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { todo!() @@ -41,6 +30,10 @@ pub struct BoolArray<'a> { buffer: &'a Buffer, validity: Option>, metadata: BoolMetadata, + // TODO(ngates): we support statistics by reference to a dyn trait. + // This trait is implemented for ArrayView and ArrayData and is passed into here as part + // of ArrayParts. + // e.g. stats: &dyn Statistics, } impl BoolArray<'_> { @@ -57,7 +50,7 @@ impl BoolArray<'_> { } pub fn boolean_buffer(&self) -> BooleanBuffer { - BooleanBuffer::new(self.buffer.clone(), 0, self.metadata.len()) + BooleanBuffer::new(self.buffer.clone(), 0, self.metadata.length) } } @@ -69,8 +62,8 @@ impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> { .buffer(0) .ok_or(vortex_err!("BoolArray requires a buffer"))?, validity: metadata - .validity() - .to_validity(metadata.len(), parts.child(0, &Validity::DTYPE)), + .validity + .to_validity(metadata.length, parts.child(0, &Validity::DTYPE)), metadata, }) } @@ -102,7 +95,7 @@ impl ArrayTrait for BoolArray<'_> { } fn len(&self) -> usize { - self.metadata().len() + self.metadata().length } } From 052be794ac91f9030fbc58b7b851f61cd571ff92 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Sat, 6 Apr 2024 23:20:02 +0100 Subject: [PATCH 4/4] Use struct not trait --- vortex-array2/src/array/bool/mod.rs | 12 +- vortex-array2/src/array/mod.rs | 4 +- vortex-array2/src/array/primitive/compute.rs | 5 +- vortex-array2/src/array/primitive/mod.rs | 129 +++++++------------ vortex-array2/src/array/ree/compute.rs | 4 +- vortex-array2/src/array/ree/mod.rs | 81 ++++-------- vortex-array2/src/implementation.rs | 5 +- vortex-array2/src/lib.rs | 12 +- 8 files changed, 100 insertions(+), 152 deletions(-) diff --git a/vortex-array2/src/array/bool/mod.rs b/vortex-array2/src/array/bool/mod.rs index 407b3e504f..ba03e9c872 100644 --- a/vortex-array2/src/array/bool/mod.rs +++ b/vortex-array2/src/array/bool/mod.rs @@ -1,7 +1,7 @@ mod compute; use arrow_buffer::{BooleanBuffer, Buffer}; -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::VortexResult; use vortex_schema::DType; use crate::impl_encoding; @@ -29,7 +29,8 @@ pub struct BoolArray<'a> { dtype: &'a DType, buffer: &'a Buffer, validity: Option>, - metadata: BoolMetadata, + // TODO(ngates): unpack metadata? + metadata: &'a BoolMetadata, // TODO(ngates): we support statistics by reference to a dyn trait. // This trait is implemented for ArrayView and ArrayData and is passed into here as part // of ArrayParts. @@ -46,7 +47,7 @@ impl BoolArray<'_> { } pub fn metadata(&self) -> &BoolMetadata { - &self.metadata + self.metadata } pub fn boolean_buffer(&self) -> BooleanBuffer { @@ -55,7 +56,10 @@ impl BoolArray<'_> { } impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> { - fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: BoolMetadata) -> VortexResult { + fn try_from_parts( + parts: &'v dyn ArrayParts<'v>, + metadata: &'v BoolMetadata, + ) -> VortexResult { Ok(BoolArray { dtype: parts.dtype(), buffer: parts diff --git a/vortex-array2/src/array/mod.rs b/vortex-array2/src/array/mod.rs index 588bdd8a28..b125b0cc74 100644 --- a/vortex-array2/src/array/mod.rs +++ b/vortex-array2/src/array/mod.rs @@ -1,3 +1,3 @@ pub mod bool; -// pub mod primitive; -// pub mod ree; +pub mod primitive; +pub mod ree; diff --git a/vortex-array2/src/array/primitive/compute.rs b/vortex-array2/src/array/primitive/compute.rs index 363a7bca2d..09763a1db6 100644 --- a/vortex-array2/src/array/primitive/compute.rs +++ b/vortex-array2/src/array/primitive/compute.rs @@ -5,14 +5,15 @@ use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::{ArrayCompute, ScalarAtFn}; use crate::validity::ArrayValidity; +use crate::ArrayTrait; -impl ArrayCompute for &dyn PrimitiveArray { +impl ArrayCompute for PrimitiveArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } } -impl ScalarAtFn for &dyn PrimitiveArray { +impl ScalarAtFn for PrimitiveArray<'_> { fn scalar_at(&self, index: usize) -> VortexResult { if self.is_valid(index) { match_each_native_ptype!(self.ptype(), |$T| { diff --git a/vortex-array2/src/array/primitive/mod.rs b/vortex-array2/src/array/primitive/mod.rs index 570130689c..de856acfbc 100644 --- a/vortex-array2/src/array/primitive/mod.rs +++ b/vortex-array2/src/array/primitive/mod.rs @@ -5,12 +5,11 @@ use vortex::ptype::{NativePType, PType}; use vortex_error::VortexResult; use vortex_schema::DType; +use crate::impl_encoding; use crate::validity::{ArrayValidity, Validity, ValidityMetadata}; -use crate::{impl_encoding, IntoArray}; +use crate::ArrayMetadata; use crate::{ArrayData, TypedArrayData}; -use crate::{ArrayMetadata, TryFromArrayMetadata}; use crate::{ArrayView, ToArrayData}; -use crate::{ToArray, TypedArrayView}; impl_encoding!("vortex.primitive", Primitive); @@ -20,24 +19,49 @@ pub struct PrimitiveMetadata { validity: ValidityMetadata, } -impl PrimitiveMetadata { +impl TryParseArrayMetadata for PrimitiveMetadata { + fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { + todo!() + } +} + +pub struct PrimitiveArray<'a> { + ptype: PType, + dtype: &'a DType, + buffer: &'a Buffer, + validity: Option>, +} + +impl PrimitiveArray<'_> { + pub fn buffer(&self) -> &Buffer { + self.buffer + } + + pub fn validity(&self) -> Option<&Validity> { + self.validity.as_ref() + } + pub fn ptype(&self) -> PType { self.ptype } - pub fn validity(&self) -> &ValidityMetadata { - &self.validity - } } -#[allow(clippy::len_without_is_empty)] -pub trait PrimitiveArray { - fn dtype(&self) -> &DType; - fn ptype(&self) -> PType; - fn buffer(&self) -> &Buffer; - fn len(&self) -> usize { - self.buffer().len() / self.ptype().byte_width() +impl<'a> TryFromArrayParts<'a, PrimitiveMetadata> for PrimitiveArray<'a> { + fn try_from_parts( + parts: &'a dyn ArrayParts<'a>, + metadata: &'a PrimitiveMetadata, + ) -> VortexResult { + let buffer = parts.buffer(0).unwrap(); + let length = buffer.len() / metadata.ptype.byte_width(); + Ok(PrimitiveArray { + ptype: metadata.ptype, + dtype: parts.dtype(), + buffer, + validity: metadata + .validity + .to_validity(length, parts.child(0, parts.dtype())), + }) } - fn validity(&self) -> Option; } impl PrimitiveData { @@ -58,88 +82,23 @@ impl PrimitiveData { } } -impl PrimitiveArray for PrimitiveData { +impl ArrayTrait for PrimitiveArray<'_> { fn dtype(&self) -> &DType { - self.data().dtype() + self.dtype } - fn ptype(&self) -> PType { - self.metadata().ptype() - } - - fn buffer(&self) -> &Buffer { - self.data().buffers().first().unwrap() - } - - fn validity(&self) -> Option { - self.metadata() - .validity() - .to_validity(self.len(), self.data().child(0).map(|data| data.to_array())) - } -} - -impl PrimitiveArray for PrimitiveView<'_> { - fn dtype(&self) -> &DType { - self.view().dtype() - } - - fn ptype(&self) -> PType { - self.metadata().ptype() - } - - fn buffer(&self) -> &Buffer { - self.view() - .buffers() - .first() - .expect("PrimitiveView must have a single buffer") - } - - fn validity(&self) -> Option { - self.metadata().validity().to_validity( - self.len(), - self.view() - .child(0, &Validity::DTYPE) - .map(|view| view.into_array()), - ) - } -} - -impl TryFromArrayMetadata for PrimitiveMetadata { - fn try_from_metadata(_metadata: Option<&[u8]>) -> VortexResult { - todo!() - } -} - -impl<'v> TryFromArrayView<'v> for PrimitiveView<'v> { - fn try_from_view(view: &'v ArrayView<'v>) -> VortexResult { - // TODO(ngates): validate the view. - Ok(PrimitiveView::new_unchecked( - view.clone(), - PrimitiveMetadata::try_from_metadata(view.metadata())?, - )) - } -} - -impl TryFromArrayData for PrimitiveData { - fn try_from_data(data: &ArrayData) -> VortexResult { - // TODO(ngates): validate the array data. - Ok(Self::from_data_unchecked(data.clone())) - } -} - -impl ArrayTrait for &dyn PrimitiveArray { fn len(&self) -> usize { - (**self).len() + self.buffer().len() / self.ptype().byte_width() } } -impl ArrayValidity for &dyn PrimitiveArray { +impl ArrayValidity for PrimitiveArray<'_> { fn is_valid(&self, index: usize) -> bool { self.validity().map(|v| v.is_valid(index)).unwrap_or(true) } } -impl ToArrayData for &dyn PrimitiveArray { +impl ToArrayData for PrimitiveArray<'_> { fn to_array_data(&self) -> ArrayData { todo!() } diff --git a/vortex-array2/src/array/ree/compute.rs b/vortex-array2/src/array/ree/compute.rs index d19d0fe7b4..d4e78e7e0f 100644 --- a/vortex-array2/src/array/ree/compute.rs +++ b/vortex-array2/src/array/ree/compute.rs @@ -4,13 +4,13 @@ use vortex_error::VortexResult; use crate::array::ree::REEArray; use crate::compute::{ArrayCompute, ScalarAtFn}; -impl ArrayCompute for &dyn REEArray { +impl ArrayCompute for REEArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } } -impl ScalarAtFn for &dyn REEArray { +impl ScalarAtFn for REEArray<'_> { fn scalar_at(&self, _index: usize) -> VortexResult { todo!() } diff --git a/vortex-array2/src/array/ree/mod.rs b/vortex-array2/src/array/ree/mod.rs index 57ffe1b262..0f8ae21aa6 100644 --- a/vortex-array2/src/array/ree/mod.rs +++ b/vortex-array2/src/array/ree/mod.rs @@ -5,10 +5,9 @@ use vortex_schema::DType; use crate::impl_encoding; use crate::validity::ArrayValidity; -use crate::{Array, ArrayMetadata, TryFromArrayMetadata}; +use crate::{Array, ArrayMetadata}; use crate::{ArrayData, TypedArrayData}; use crate::{ArrayView, ToArrayData}; -use crate::{IntoArray, TypedArrayView}; impl_encoding!("vortex.ree", REE); @@ -18,19 +17,16 @@ pub struct REEMetadata { ends_dtype: DType, } -#[allow(clippy::len_without_is_empty)] -impl REEMetadata { - pub fn len(&self) -> usize { - self.length - } - pub fn ends_dtype(&self) -> &DType { - &self.ends_dtype +impl TryParseArrayMetadata for REEMetadata { + fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { + todo!() } } -pub trait REEArray { - fn run_ends(&self) -> Array; - fn values(&self) -> Array; +pub struct REEArray<'a> { + dtype: &'a DType, + values: Array<'a>, + run_ends: Array<'a>, } impl REEData { @@ -52,63 +48,40 @@ impl REEData { } } -impl REEArray for REEData { - fn run_ends(&self) -> Array { - Array::DataRef(self.data().child(0).unwrap()) - } - - fn values(&self) -> Array { - Array::DataRef(self.data().child(1).unwrap()) - } -} - -impl REEArray for REEView<'_> { - fn run_ends(&self) -> Array { - self.view() - .child(0, self.metadata().ends_dtype()) - .unwrap() - .into_array() - } - - fn values(&self) -> Array { - self.view() - .child(1, self.view().dtype()) - .unwrap() - .into_array() +impl<'v> TryFromArrayParts<'v, REEMetadata> for REEArray<'v> { + fn try_from_parts( + parts: &'v dyn ArrayParts<'v>, + metadata: &'v REEMetadata, + ) -> VortexResult { + Ok(REEArray { + dtype: parts.dtype(), + values: parts + .child(0, parts.dtype()) + .ok_or_else(|| vortex_err!("REEArray missing values"))?, + run_ends: parts + .child(1, &metadata.ends_dtype) + .ok_or_else(|| vortex_err!("REEArray missing run_ends"))?, + }) } } -impl TryFromArrayMetadata for REEMetadata { - fn try_from_metadata(_metadata: Option<&[u8]>) -> VortexResult { - todo!() +impl ArrayTrait for REEArray<'_> { + fn dtype(&self) -> &DType { + self.values.dtype() } -} - -impl<'v> TryFromArrayView<'v> for REEView<'v> { - fn try_from_view(_view: &'v ArrayView<'v>) -> VortexResult { - todo!() - } -} - -impl TryFromArrayData for REEData { - fn try_from_data(_data: &ArrayData) -> VortexResult { - todo!() - } -} -impl ArrayTrait for &dyn REEArray { fn len(&self) -> usize { todo!() } } -impl ArrayValidity for &dyn REEArray { +impl ArrayValidity for REEArray<'_> { fn is_valid(&self, _index: usize) -> bool { todo!() } } -impl ToArrayData for &dyn REEArray { +impl ToArrayData for REEArray<'_> { fn to_array_data(&self) -> ArrayData { todo!() } diff --git a/vortex-array2/src/implementation.rs b/vortex-array2/src/implementation.rs index 3883de87c3..3cfc1eed93 100644 --- a/vortex-array2/src/implementation.rs +++ b/vortex-array2/src/implementation.rs @@ -22,6 +22,7 @@ macro_rules! impl_encoding { paste! { use $crate::{ArrayDef, ArrayParts, ArrayTrait, TryFromArrayParts, TryParseArrayMetadata}; use $crate::encoding::{ArrayEncoding, EncodingId, EncodingRef}; + use vortex_error::vortex_err; use std::any::Any; use std::fmt::Debug; use std::sync::Arc; @@ -54,7 +55,7 @@ macro_rules! impl_encoding { ) -> VortexResult<()> { // Convert ArrayView -> PrimitiveArray, then call compute. let metadata = [<$Name Metadata>]::try_parse_metadata(view.metadata())?; - let array = [<$Name Array>]::try_from_parts(view as &dyn ArrayParts, metadata)?; + let array = [<$Name Array>]::try_from_parts(view as &dyn ArrayParts, &metadata)?; f(&array) } @@ -68,7 +69,7 @@ macro_rules! impl_encoding { .downcast_ref::<[<$Name Metadata>]>() .ok_or_else(|| vortex_err!("Failed to downcast metadata"))? .clone(); - let array = [<$Name Array>]::try_from_parts(data as &dyn ArrayParts, metadata)?; + let array = [<$Name Array>]::try_from_parts(data as &dyn ArrayParts, &metadata)?; f(&array) } } diff --git a/vortex-array2/src/lib.rs b/vortex-array2/src/lib.rs index d296b9db4e..79b1de88ba 100644 --- a/vortex-array2/src/lib.rs +++ b/vortex-array2/src/lib.rs @@ -31,6 +31,16 @@ pub enum Array<'v> { View(ArrayView<'v>), } +impl Array<'_> { + pub fn dtype(&self) -> &DType { + match self { + Array::Data(d) => d.dtype(), + Array::DataRef(d) => d.dtype(), + Array::View(v) => v.dtype(), + } + } +} + pub trait ToArray { fn to_array(&self) -> Array; } @@ -54,7 +64,7 @@ pub trait ArrayParts<'a> { } pub trait TryFromArrayParts<'v, M: ArrayMetadata>: Sized + 'v { - fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: M) -> VortexResult; + fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: &'v M) -> VortexResult; } pub trait TryParseArrayMetadata: Sized + ArrayMetadata {