diff --git a/vortex-array2/src/array/bool/compute.rs b/vortex-array2/src/array/bool/compute.rs new file mode 100644 index 0000000000..0e172887ad --- /dev/null +++ b/vortex-array2/src/array/bool/compute.rs @@ -0,0 +1,26 @@ +use vortex::scalar::{BoolScalar, Scalar}; +use vortex_error::VortexResult; + +use crate::array::bool::BoolArray; +use crate::compute::{ArrayCompute, ScalarAtFn}; +use crate::validity::ArrayValidity; +use crate::ArrayTrait; + +impl ArrayCompute for BoolArray<'_> { + fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { + Some(self) + } +} + +impl ScalarAtFn for BoolArray<'_> { + fn scalar_at(&self, index: usize) -> VortexResult { + if self.is_valid(index) { + let value = self.boolean_buffer().value(index); + Ok(Scalar::Bool( + BoolScalar::try_new(Some(value), self.dtype().nullability()).unwrap(), + )) + } else { + Ok(Scalar::null(self.dtype())) + } + } +} diff --git a/vortex-array2/src/array/bool/mod.rs b/vortex-array2/src/array/bool/mod.rs new file mode 100644 index 0000000000..5643bef9d5 --- /dev/null +++ b/vortex-array2/src/array/bool/mod.rs @@ -0,0 +1,133 @@ +mod compute; + +use arrow_buffer::{BooleanBuffer, Buffer}; +use vortex_error::VortexResult; +use vortex_schema::DType; + +use crate::impl_encoding; +use crate::validity::Validity; +use crate::validity::{ArrayValidity, ValidityMetadata}; +use crate::ArrayMetadata; +use crate::{ArrayData, TypedArrayData}; +use crate::{ArrayView, ToArrayData}; + +impl_encoding!("vortex.bool", Bool); + +#[derive(Clone, Debug)] +pub struct BoolMetadata { + validity: ValidityMetadata, + length: usize, +} + +impl TryParseArrayMetadata for BoolMetadata { + fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { + todo!() + } +} + +pub struct BoolArray<'a> { + dtype: &'a DType, + buffer: &'a Buffer, + validity: Option>, + // TODO(ngates): unpack metadata? + metadata: &'a BoolMetadata, + // TODO(ngates): we support statistics by reference to a dyn trait. + // This trait is implemented for ArrayView and ArrayData and is passed into here as part + // of ArrayParts. + // e.g. stats: &dyn Statistics, +} + +impl BoolArray<'_> { + pub fn buffer(&self) -> &Buffer { + self.buffer + } + + pub fn validity(&self) -> Option<&Validity> { + self.validity.as_ref() + } + + pub fn metadata(&self) -> &BoolMetadata { + self.metadata + } + + pub fn boolean_buffer(&self) -> BooleanBuffer { + BooleanBuffer::new(self.buffer.clone(), 0, self.metadata.length) + } +} + +impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> { + fn try_from_parts( + parts: &'v dyn ArrayParts<'v>, + metadata: &'v BoolMetadata, + ) -> VortexResult { + Ok(BoolArray { + dtype: parts.dtype(), + buffer: parts + .buffer(0) + .ok_or(vortex_err!("BoolArray requires a buffer"))?, + validity: metadata + .validity + .to_validity(metadata.length, parts.child(0, &Validity::DTYPE)), + metadata, + }) + } +} + +impl BoolData { + pub fn try_new(buffer: BooleanBuffer, validity: Option) -> VortexResult { + if let Some(v) = &validity { + assert_eq!(v.len(), buffer.len()); + } + let dtype = DType::Bool(validity.is_some().into()); + let metadata = BoolMetadata { + validity: ValidityMetadata::try_from_validity(validity.as_ref(), &dtype)?, + length: buffer.len(), + }; + let validity_array = validity.and_then(|v| v.into_array_data()); + Ok(Self::new_unchecked( + dtype, + Arc::new(metadata), + vec![buffer.into_inner()].into(), + vec![validity_array].into(), + )) + } +} + +impl ArrayTrait for BoolArray<'_> { + fn dtype(&self) -> &DType { + self.dtype + } + + fn len(&self) -> usize { + self.metadata().length + } +} + +impl ArrayValidity for BoolArray<'_> { + fn is_valid(&self, index: usize) -> bool { + self.validity().map(|v| v.is_valid(index)).unwrap_or(true) + } +} + +impl ToArrayData for BoolArray<'_> { + fn to_array_data(&self) -> ArrayData { + todo!() + } +} + +#[cfg(test)] +mod tests { + use crate::array::bool::BoolData; + use crate::compute::scalar_at; + use crate::IntoArray; + + #[test] + fn bool_array() { + let arr = BoolData::try_new(vec![true, false, true].into(), None) + .unwrap() + .into_array(); + + let scalar: bool = scalar_at(&arr, 0).unwrap().try_into().unwrap(); + assert!(scalar); + } +} diff --git a/vortex-array2/src/array/mod.rs b/vortex-array2/src/array/mod.rs new file mode 100644 index 0000000000..b125b0cc74 --- /dev/null +++ b/vortex-array2/src/array/mod.rs @@ -0,0 +1,3 @@ +pub mod bool; +pub mod primitive; +pub mod ree; diff --git a/vortex-array2/src/primitive/compute.rs b/vortex-array2/src/array/primitive/compute.rs similarity index 75% rename from vortex-array2/src/primitive/compute.rs rename to vortex-array2/src/array/primitive/compute.rs index 9344f150aa..09763a1db6 100644 --- a/vortex-array2/src/primitive/compute.rs +++ b/vortex-array2/src/array/primitive/compute.rs @@ -2,17 +2,18 @@ use vortex::match_each_native_ptype; use vortex::scalar::Scalar; use vortex_error::VortexResult; +use crate::array::primitive::PrimitiveArray; use crate::compute::{ArrayCompute, ScalarAtFn}; -use crate::primitive::PrimitiveArray; -use crate::ArrayValidity; +use crate::validity::ArrayValidity; +use crate::ArrayTrait; -impl ArrayCompute for &dyn PrimitiveArray { +impl ArrayCompute for PrimitiveArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } } -impl ScalarAtFn for &dyn PrimitiveArray { +impl ScalarAtFn for PrimitiveArray<'_> { fn scalar_at(&self, index: usize) -> VortexResult { if self.is_valid(index) { match_each_native_ptype!(self.ptype(), |$T| { diff --git a/vortex-array2/src/array/primitive/mod.rs b/vortex-array2/src/array/primitive/mod.rs new file mode 100644 index 0000000000..de856acfbc --- /dev/null +++ b/vortex-array2/src/array/primitive/mod.rs @@ -0,0 +1,105 @@ +mod compute; + +use arrow_buffer::Buffer; +use vortex::ptype::{NativePType, PType}; +use vortex_error::VortexResult; +use vortex_schema::DType; + +use crate::impl_encoding; +use crate::validity::{ArrayValidity, Validity, ValidityMetadata}; +use crate::ArrayMetadata; +use crate::{ArrayData, TypedArrayData}; +use crate::{ArrayView, ToArrayData}; + +impl_encoding!("vortex.primitive", Primitive); + +#[derive(Clone, Debug)] +pub struct PrimitiveMetadata { + ptype: PType, + validity: ValidityMetadata, +} + +impl TryParseArrayMetadata for PrimitiveMetadata { + fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { + todo!() + } +} + +pub struct PrimitiveArray<'a> { + ptype: PType, + dtype: &'a DType, + buffer: &'a Buffer, + validity: Option>, +} + +impl PrimitiveArray<'_> { + pub fn buffer(&self) -> &Buffer { + self.buffer + } + + pub fn validity(&self) -> Option<&Validity> { + self.validity.as_ref() + } + + pub fn ptype(&self) -> PType { + self.ptype + } +} + +impl<'a> TryFromArrayParts<'a, PrimitiveMetadata> for PrimitiveArray<'a> { + fn try_from_parts( + parts: &'a dyn ArrayParts<'a>, + metadata: &'a PrimitiveMetadata, + ) -> VortexResult { + let buffer = parts.buffer(0).unwrap(); + let length = buffer.len() / metadata.ptype.byte_width(); + Ok(PrimitiveArray { + ptype: metadata.ptype, + dtype: parts.dtype(), + buffer, + validity: metadata + .validity + .to_validity(length, parts.child(0, parts.dtype())), + }) + } +} + +impl PrimitiveData { + pub fn from_vec(values: Vec) -> Self { + ArrayData::try_new( + &PrimitiveEncoding, + DType::from(T::PTYPE), + Arc::new(PrimitiveMetadata { + ptype: T::PTYPE, + validity: ValidityMetadata::NonNullable, + }), + vec![Buffer::from_vec(values)].into(), + vec![].into(), + ) + .unwrap() + .try_into() + .unwrap() + } +} + +impl ArrayTrait for PrimitiveArray<'_> { + fn dtype(&self) -> &DType { + self.dtype + } + + fn len(&self) -> usize { + self.buffer().len() / self.ptype().byte_width() + } +} + +impl ArrayValidity for PrimitiveArray<'_> { + fn is_valid(&self, index: usize) -> bool { + self.validity().map(|v| v.is_valid(index)).unwrap_or(true) + } +} + +impl ToArrayData for PrimitiveArray<'_> { + fn to_array_data(&self) -> ArrayData { + todo!() + } +} diff --git a/vortex-array2/src/ree/compute.rs b/vortex-array2/src/array/ree/compute.rs similarity index 72% rename from vortex-array2/src/ree/compute.rs rename to vortex-array2/src/array/ree/compute.rs index 8301db7c15..d4e78e7e0f 100644 --- a/vortex-array2/src/ree/compute.rs +++ b/vortex-array2/src/array/ree/compute.rs @@ -1,16 +1,16 @@ use vortex::scalar::Scalar; use vortex_error::VortexResult; +use crate::array::ree::REEArray; use crate::compute::{ArrayCompute, ScalarAtFn}; -use crate::ree::REEArray; -impl ArrayCompute for &dyn REEArray { +impl ArrayCompute for REEArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } } -impl ScalarAtFn for &dyn REEArray { +impl ScalarAtFn for REEArray<'_> { fn scalar_at(&self, _index: usize) -> VortexResult { todo!() } diff --git a/vortex-array2/src/array/ree/mod.rs b/vortex-array2/src/array/ree/mod.rs new file mode 100644 index 0000000000..0f8ae21aa6 --- /dev/null +++ b/vortex-array2/src/array/ree/mod.rs @@ -0,0 +1,88 @@ +mod compute; + +use vortex_error::VortexResult; +use vortex_schema::DType; + +use crate::impl_encoding; +use crate::validity::ArrayValidity; +use crate::{Array, ArrayMetadata}; +use crate::{ArrayData, TypedArrayData}; +use crate::{ArrayView, ToArrayData}; + +impl_encoding!("vortex.ree", REE); + +#[derive(Clone, Debug)] +pub struct REEMetadata { + length: usize, + ends_dtype: DType, +} + +impl TryParseArrayMetadata for REEMetadata { + fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult { + todo!() + } +} + +pub struct REEArray<'a> { + dtype: &'a DType, + values: Array<'a>, + run_ends: Array<'a>, +} + +impl REEData { + pub fn new(ends: ArrayData, values: ArrayData, length: usize) -> Self { + ArrayData::try_new( + &REEEncoding, + values.dtype().clone(), + REEMetadata { + length, + ends_dtype: ends.dtype().clone(), + } + .into_arc(), + vec![].into(), + vec![Some(ends), Some(values)].into(), + ) + .unwrap() + .try_into() + .unwrap() + } +} + +impl<'v> TryFromArrayParts<'v, REEMetadata> for REEArray<'v> { + fn try_from_parts( + parts: &'v dyn ArrayParts<'v>, + metadata: &'v REEMetadata, + ) -> VortexResult { + Ok(REEArray { + dtype: parts.dtype(), + values: parts + .child(0, parts.dtype()) + .ok_or_else(|| vortex_err!("REEArray missing values"))?, + run_ends: parts + .child(1, &metadata.ends_dtype) + .ok_or_else(|| vortex_err!("REEArray missing run_ends"))?, + }) + } +} + +impl ArrayTrait for REEArray<'_> { + fn dtype(&self) -> &DType { + self.values.dtype() + } + + fn len(&self) -> usize { + todo!() + } +} + +impl ArrayValidity for REEArray<'_> { + fn is_valid(&self, _index: usize) -> bool { + todo!() + } +} + +impl ToArrayData for REEArray<'_> { + fn to_array_data(&self) -> ArrayData { + todo!() + } +} diff --git a/vortex-array2/src/compute.rs b/vortex-array2/src/compute.rs index 79f4f539f2..636a5825b9 100644 --- a/vortex-array2/src/compute.rs +++ b/vortex-array2/src/compute.rs @@ -1,7 +1,8 @@ use vortex::scalar::Scalar; use vortex_error::{vortex_err, VortexResult}; -use crate::primitive::PrimitiveData; +use crate::array::bool::BoolData; +// use crate::array::primitive::PrimitiveData; use crate::{Array, WithArray}; pub trait ArrayCompute { @@ -30,7 +31,8 @@ pub trait FlattenFn { } pub enum FlattenedArray { - Primitive(PrimitiveData), + Bool(BoolData), + // Primitive(PrimitiveData), // Just to introduce a second variant for now Other(String), } @@ -42,14 +44,14 @@ pub fn flatten(array: &Array) -> VortexResult { .flatten() }) } - -pub fn flatten_primitive(array: &Array) -> VortexResult { - if let FlattenedArray::Primitive(p) = flatten(array)? { - Ok(p) - } else { - Err(vortex_err!( - "Cannot flatten array {:?} into primitive", - array - )) - } -} +// +// pub fn flatten_primitive(array: &Array) -> VortexResult { +// if let FlattenedArray::Primitive(p) = flatten(array)? { +// Ok(p) +// } else { +// Err(vortex_err!( +// "Cannot flatten array {:?} into primitive", +// array +// )) +// } +// } diff --git a/vortex-array2/src/data.rs b/vortex-array2/src/data.rs index 4772a5f36c..49cab7cbe6 100644 --- a/vortex-array2/src/data.rs +++ b/vortex-array2/src/data.rs @@ -6,7 +6,7 @@ use vortex_error::{vortex_bail, VortexError, VortexResult}; use vortex_schema::DType; use crate::encoding::EncodingRef; -use crate::{Array, ArrayDef, ArrayMetadata, IntoArray, ToArray}; +use crate::{Array, ArrayDef, ArrayMetadata, ArrayParts, IntoArray, ToArray}; #[allow(dead_code)] #[derive(Clone, Debug)] @@ -14,8 +14,8 @@ pub struct ArrayData { encoding: EncodingRef, dtype: DType, metadata: Arc, - buffers: Arc<[Buffer]>, - children: Arc<[ArrayData]>, + buffers: Arc<[Buffer]>, // Should this just be an Option, not an Arc? + children: Arc<[Option]>, } impl ArrayData { @@ -24,7 +24,7 @@ impl ArrayData { dtype: DType, metadata: Arc, buffers: Arc<[Buffer]>, - children: Arc<[ArrayData]>, + children: Arc<[Option]>, ) -> VortexResult { let data = Self { encoding, @@ -36,7 +36,7 @@ impl ArrayData { // Validate here that the metadata correctly parses, so that an encoding can infallibly // implement Encoding::with_data(). - encoding.with_data_mut(&data, &mut |_| Ok(()))?; + // encoding.with_data_mut(&data, &mut |_| Ok(()))?; Ok(data) } @@ -59,8 +59,8 @@ impl ArrayData { &self.buffers } - pub fn children(&self) -> &[ArrayData] { - &self.children + pub fn child(&self, index: usize) -> Option<&ArrayData> { + self.children.get(index).and_then(|c| c.as_ref()) } } @@ -76,16 +76,25 @@ impl IntoArray<'static> for ArrayData { } } +#[derive(Debug)] pub struct TypedArrayData { data: ArrayData, phantom: PhantomData, } -impl TypedArrayData -where - Self: for<'a> AsRef>, -{ - pub fn new_unchecked(data: ArrayData) -> Self { +impl TypedArrayData { + pub fn new_unchecked( + dtype: DType, + metadata: Arc, + buffers: Arc<[Buffer]>, + children: Arc<[Option]>, + ) -> Self { + Self::from_data_unchecked( + ArrayData::try_new(D::ENCODING, dtype, metadata, buffers, children).unwrap(), + ) + } + + pub fn from_data_unchecked(data: ArrayData) -> Self { Self { data, phantom: PhantomData, @@ -115,10 +124,6 @@ where .downcast::() .unwrap() } - - pub fn as_array(&self) -> &D::Array<'_> { - self.as_ref() - } } impl ToArray for TypedArrayData { @@ -146,3 +151,22 @@ impl TryFrom for TypedArrayData { }) } } + +impl ArrayParts<'_> for ArrayData { + fn dtype(&'_ self) -> &'_ DType { + &self.dtype + } + + fn buffer(&self, idx: usize) -> Option<&Buffer> { + self.buffers().get(idx) + } + + fn child(&self, idx: usize, _dtype: &DType) -> Option { + self.child(idx).map(|a| { + let array = a.to_array(); + // FIXME(ngates): can we ask an array its dtype? + // assert_eq!(array.dtype(), dtype); + array + }) + } +} diff --git a/vortex-array2/src/implementation.rs b/vortex-array2/src/implementation.rs index c2385dd30b..3cfc1eed93 100644 --- a/vortex-array2/src/implementation.rs +++ b/vortex-array2/src/implementation.rs @@ -1,30 +1,17 @@ -use vortex_error::VortexResult; - -use crate::encoding::ArrayEncoding; use crate::encoding::EncodingId; -use crate::ArrayData; -use crate::ArrayMetadata; -use crate::ArrayView; +use crate::encoding::{ArrayEncoding, EncodingRef}; +use crate::ArrayTrait; +use crate::{ArrayMetadata, TryFromArrayParts, TryParseArrayMetadata}; /// Trait the defines the set of types relating to an array. /// Because it has associated types it can't be used as a trait object. pub trait ArrayDef { const ID: EncodingId; - type Array<'a>: ?Sized + 'a; - type Metadata: ArrayMetadata; - type Encoding: ArrayEncoding; -} - -pub trait TryFromArrayMetadata: Sized { - fn try_from_metadata(metadata: Option<&[u8]>) -> VortexResult; -} - -pub trait TryFromArrayData: Sized { - fn try_from_data(data: &ArrayData) -> VortexResult; -} + const ENCODING: EncodingRef; -pub trait TryFromArrayView<'v>: Sized + 'v { - fn try_from_view(view: &'v ArrayView<'v>) -> VortexResult; + type Array<'a>: ArrayTrait + TryFromArrayParts<'a, Self::Metadata> + 'a; + type Metadata: ArrayMetadata + TryParseArrayMetadata; + type Encoding: ArrayEncoding; } #[macro_export] @@ -33,23 +20,26 @@ macro_rules! impl_encoding { use paste::paste; paste! { - use $crate::{ArrayDef, TryFromArrayData, TryFromArrayView, ArrayTrait}; - use $crate::encoding::{ArrayEncoding, EncodingId}; + use $crate::{ArrayDef, ArrayParts, ArrayTrait, TryFromArrayParts, TryParseArrayMetadata}; + use $crate::encoding::{ArrayEncoding, EncodingId, EncodingRef}; + use vortex_error::vortex_err; use std::any::Any; + use std::fmt::Debug; use std::sync::Arc; use std::marker::{Send, Sync}; /// The array definition trait + #[derive(Debug)] pub struct [<$Name Def>]; impl ArrayDef for [<$Name Def>] { const ID: EncodingId = EncodingId::new($id); - type Array<'a> = dyn [<$Name Array>] + 'a; + const ENCODING: EncodingRef = &[<$Name Encoding>]; + type Array<'a> = [<$Name Array>]<'a>; type Metadata = [<$Name Metadata>]; type Encoding = [<$Name Encoding>]; } pub type [<$Name Data>] = TypedArrayData<[<$Name Def>]>; - pub type [<$Name View>]<'v> = TypedArrayView<'v, [<$Name Def>]>; /// The array encoding pub struct [<$Name Encoding>]; @@ -64,8 +54,9 @@ macro_rules! impl_encoding { f: &mut dyn FnMut(&dyn ArrayTrait) -> VortexResult<()>, ) -> VortexResult<()> { // Convert ArrayView -> PrimitiveArray, then call compute. - let typed_view = <[<$Name View>] as TryFromArrayView>::try_from_view(view)?; - f(&typed_view.as_array()) + let metadata = [<$Name Metadata>]::try_parse_metadata(view.metadata())?; + let array = [<$Name Array>]::try_from_parts(view as &dyn ArrayParts, &metadata)?; + f(&array) } fn with_data_mut( @@ -73,8 +64,13 @@ macro_rules! impl_encoding { data: &ArrayData, f: &mut dyn FnMut(&dyn ArrayTrait) -> VortexResult<()>, ) -> VortexResult<()> { - let data = <[<$Name Data>] as TryFromArrayData>::try_from_data(data)?; - f(&data.as_array()) + let metadata = data.metadata() + .as_any() + .downcast_ref::<[<$Name Metadata>]>() + .ok_or_else(|| vortex_err!("Failed to downcast metadata"))? + .clone(); + let array = [<$Name Array>]::try_from_parts(data as &dyn ArrayParts, &metadata)?; + f(&array) } } @@ -97,17 +93,17 @@ macro_rules! impl_encoding { } } - /// Implement AsRef for both the data and view types - impl<'a> AsRef] + 'a> for [<$Name Data>] { - fn as_ref(&self) -> &(dyn [<$Name Array>] + 'a) { - self - } - } - impl<'a> AsRef] + 'a> for [<$Name View>]<'a> { - fn as_ref(&self) -> &(dyn [<$Name Array>] + 'a) { - self - } - } + // /// Implement AsRef for both the data and view types + // impl<'a> AsRef<[<$Name Array>]<'a>> for [<$Name Data>] { + // fn as_ref(&self) -> &[<$Name Array>]<'a> { + // self + // } + // } + // impl<'a> AsRef<[<$Name Array>]<'a>> for [<$Name View>]<'a> { + // fn as_ref(&self) -> &[<$Name Array>]<'a> { + // self + // } + // } } }; } diff --git a/vortex-array2/src/lib.rs b/vortex-array2/src/lib.rs index d3d9c7c993..79b1de88ba 100644 --- a/vortex-array2/src/lib.rs +++ b/vortex-array2/src/lib.rs @@ -1,26 +1,28 @@ #![allow(dead_code)] +pub mod array; pub mod compute; mod context; mod data; pub mod encoding; mod implementation; mod metadata; -mod primitive; -mod ree; mod validity; mod view; use std::fmt::Debug; +use arrow_buffer::Buffer; pub use context::*; pub use data::*; pub use implementation::*; pub use metadata::*; -pub use validity::*; pub use view::*; +use vortex_error::VortexResult; +use vortex_schema::DType; use crate::compute::ArrayCompute; +use crate::validity::ArrayValidity; #[derive(Debug, Clone)] pub enum Array<'v> { @@ -29,6 +31,16 @@ pub enum Array<'v> { View(ArrayView<'v>), } +impl Array<'_> { + pub fn dtype(&self) -> &DType { + match self { + Array::Data(d) => d.dtype(), + Array::DataRef(d) => d.dtype(), + Array::View(v) => v.dtype(), + } + } +} + pub trait ToArray { fn to_array(&self) -> Array; } @@ -45,8 +57,24 @@ pub trait WithArray { fn with_array R>(&self, f: F) -> R; } +pub trait ArrayParts<'a> { + fn dtype(&'a self) -> &'a DType; + fn buffer(&'a self, idx: usize) -> Option<&'a Buffer>; + fn child(&'a self, idx: usize, dtype: &'a DType) -> Option>; +} + +pub trait TryFromArrayParts<'v, M: ArrayMetadata>: Sized + 'v { + fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: &'v M) -> VortexResult; +} + +pub trait TryParseArrayMetadata: Sized + ArrayMetadata { + fn try_parse_metadata(metadata: Option<&[u8]>) -> VortexResult; +} + /// Collects together the behaviour of an array. pub trait ArrayTrait: ArrayCompute + ArrayValidity + ToArrayData { + fn dtype(&self) -> &DType; + fn len(&self) -> usize; fn is_empty(&self) -> bool { @@ -74,20 +102,3 @@ impl WithArray for Array<'_> { } } } - -#[cfg(test)] -mod test { - use vortex_error::VortexResult; - - use crate::compute::*; - use crate::primitive::PrimitiveData; - use crate::ToArray; - - #[test] - fn test_primitive() -> VortexResult<()> { - let array = PrimitiveData::from_vec(vec![1i32, 2, 3, 4, 5]); - let scalar: i32 = scalar_at(&array.to_array(), 3)?.try_into()?; - assert_eq!(scalar, 4); - Ok(()) - } -} diff --git a/vortex-array2/src/primitive/mod.rs b/vortex-array2/src/primitive/mod.rs deleted file mode 100644 index 959966c90a..0000000000 --- a/vortex-array2/src/primitive/mod.rs +++ /dev/null @@ -1,138 +0,0 @@ -mod compute; - -use arrow_buffer::Buffer; -use vortex::ptype::{NativePType, PType}; -use vortex_error::VortexResult; -use vortex_schema::{DType, Nullability}; - -use crate::compute::scalar_at; -use crate::impl_encoding; -use crate::{Array, ArrayValidity, IntoArray, Validity}; -use crate::{ArrayData, TypedArrayData}; -use crate::{ArrayMetadata, TryFromArrayMetadata}; -use crate::{ArrayView, ToArrayData}; -use crate::{ToArray, TypedArrayView}; - -impl_encoding!("vortex.primitive", Primitive); - -#[derive(Clone, Debug)] -pub struct PrimitiveMetadata(PType); -impl PrimitiveMetadata { - pub fn ptype(&self) -> PType { - self.0 - } -} - -pub trait PrimitiveArray { - fn dtype(&self) -> &DType; - fn ptype(&self) -> PType; - fn buffer(&self) -> &Buffer; - fn validity(&self) -> Option; -} - -impl PrimitiveData { - pub fn from_vec(values: Vec) -> Self { - ArrayData::try_new( - &PrimitiveEncoding, - DType::from(T::PTYPE), - Arc::new(PrimitiveMetadata(T::PTYPE)), - vec![Buffer::from_vec(values)].into(), - vec![].into(), - ) - .unwrap() - .try_into() - .unwrap() - } -} - -impl PrimitiveArray for PrimitiveData { - fn dtype(&self) -> &DType { - self.data().dtype() - } - - fn ptype(&self) -> PType { - self.metadata().ptype() - } - - fn buffer(&self) -> &Buffer { - self.data().buffers().first().unwrap() - } - - fn validity(&self) -> Option { - match self.dtype().nullability() { - Nullability::NonNullable => None, - Nullability::Nullable => Some(self.data().children().first().unwrap().to_array()), - } - } -} - -impl PrimitiveArray for PrimitiveView<'_> { - fn dtype(&self) -> &DType { - self.view().dtype() - } - - fn ptype(&self) -> PType { - self.metadata().ptype() - } - - fn buffer(&self) -> &Buffer { - self.view() - .buffers() - .first() - .expect("PrimitiveView must have a single buffer") - } - - fn validity(&self) -> Option { - match self.dtype().nullability() { - Nullability::NonNullable => None, - Nullability::Nullable => { - Some(self.view().child(0, &Validity::DTYPE).unwrap().into_array()) - } - } - } -} - -impl TryFromArrayMetadata for PrimitiveMetadata { - fn try_from_metadata(_metadata: Option<&[u8]>) -> VortexResult { - todo!() - } -} - -impl<'v> TryFromArrayView<'v> for PrimitiveView<'v> { - fn try_from_view(view: &'v ArrayView<'v>) -> VortexResult { - // TODO(ngates): validate the view. - Ok(PrimitiveView::new_unchecked( - view.clone(), - PrimitiveMetadata::try_from_metadata(view.metadata())?, - )) - } -} - -impl TryFromArrayData for PrimitiveData { - fn try_from_data(data: &ArrayData) -> VortexResult { - // TODO(ngates): validate the array data. - Ok(Self::new_unchecked(data.clone())) - } -} - -impl ArrayTrait for &dyn PrimitiveArray { - fn len(&self) -> usize { - self.buffer().len() / self.ptype().byte_width() - } -} - -impl ArrayValidity for &dyn PrimitiveArray { - fn is_valid(&self, index: usize) -> bool { - if let Some(v) = self.validity() { - scalar_at(&v, index).unwrap().try_into().unwrap() - } else { - true - } - } -} - -impl ToArrayData for &dyn PrimitiveArray { - fn to_array_data(&self) -> ArrayData { - todo!() - } -} diff --git a/vortex-array2/src/ree/mod.rs b/vortex-array2/src/ree/mod.rs deleted file mode 100644 index 5ee98164b0..0000000000 --- a/vortex-array2/src/ree/mod.rs +++ /dev/null @@ -1,114 +0,0 @@ -mod compute; - -use vortex_error::VortexResult; -use vortex_schema::DType; - -use crate::impl_encoding; -use crate::validity::ArrayValidity; -use crate::{Array, ArrayMetadata, TryFromArrayMetadata}; -use crate::{ArrayData, TypedArrayData}; -use crate::{ArrayView, ToArrayData}; -use crate::{IntoArray, TypedArrayView}; - -impl_encoding!("vortex.ree", REE); - -#[derive(Clone, Debug)] -pub struct REEMetadata { - length: usize, - ends_dtype: DType, -} - -impl REEMetadata { - pub fn len(&self) -> usize { - self.length - } - pub fn ends_dtype(&self) -> &DType { - &self.ends_dtype - } -} - -pub trait REEArray { - fn run_ends(&self) -> Array; - fn values(&self) -> Array; -} - -impl REEData { - pub fn new(ends: ArrayData, values: ArrayData, length: usize) -> Self { - ArrayData::try_new( - &REEEncoding, - values.dtype().clone(), - REEMetadata { - length, - ends_dtype: ends.dtype().clone(), - } - .into_arc(), - vec![].into(), - vec![ends, values].into(), - ) - .unwrap() - .try_into() - .unwrap() - } -} - -impl REEArray for REEData { - fn run_ends(&self) -> Array { - Array::DataRef(self.data().children().first().unwrap()) - } - - fn values(&self) -> Array { - Array::DataRef(self.data().children().get(1).unwrap()) - } -} - -impl REEArray for REEView<'_> { - fn run_ends(&self) -> Array { - self.view() - .child(0, self.metadata().ends_dtype()) - .unwrap() - .into_array() - } - - fn values(&self) -> Array { - self.view() - .child(1, self.view().dtype()) - .unwrap() - .into_array() - } -} - -impl TryFromArrayMetadata for REEMetadata { - fn try_from_metadata(_metadata: Option<&[u8]>) -> VortexResult { - todo!() - } -} - -impl<'v> TryFromArrayView<'v> for REEView<'v> { - fn try_from_view(_view: &'v ArrayView<'v>) -> VortexResult { - todo!() - } -} - -impl TryFromArrayData for REEData { - fn try_from_data(_data: &ArrayData) -> VortexResult { - todo!() - } -} - -impl ArrayTrait for &dyn REEArray { - fn len(&self) -> usize { - todo!() - } -} - -impl ArrayValidity for &dyn REEArray { - fn is_valid(&self, _index: usize) -> bool { - todo!() - } -} - -impl ToArrayData for &dyn REEArray { - fn to_array_data(&self) -> ArrayData { - todo!() - } -} diff --git a/vortex-array2/src/validity.rs b/vortex-array2/src/validity.rs new file mode 100644 index 0000000000..3ca3311e82 --- /dev/null +++ b/vortex-array2/src/validity.rs @@ -0,0 +1,94 @@ +use vortex_error::{vortex_bail, VortexResult}; +use vortex_schema::{DType, Nullability}; + +use crate::compute::scalar_at; +use crate::{Array, ArrayData, ToArrayData, WithArray}; + +pub trait ArrayValidity { + fn is_valid(&self, index: usize) -> bool; + // Maybe add to_bool_array() here? +} + +#[derive(Clone, Debug)] +pub enum ValidityMetadata { + NonNullable, + Valid, + Invalid, + Array, +} + +impl ValidityMetadata { + pub fn try_from_validity(validity: Option<&Validity>, dtype: &DType) -> VortexResult { + // We don't really need dtype for this conversion, but it's a good place to check + // that the nullability and validity are consistent. + match validity { + None => { + if dtype.nullability() != Nullability::NonNullable { + vortex_bail!("DType must be NonNullable if validity is absent") + } + Ok(ValidityMetadata::NonNullable) + } + Some(v) => { + if dtype.nullability() != Nullability::Nullable { + vortex_bail!("DType must be Nullable if validity is present") + } + Ok(match v { + Validity::Valid(_) => ValidityMetadata::Valid, + Validity::Invalid(_) => ValidityMetadata::Invalid, + Validity::Array(_) => ValidityMetadata::Array, + }) + } + } + } + + pub fn to_validity<'v>(&self, len: usize, array: Option>) -> Option> { + match self { + ValidityMetadata::NonNullable => None, + ValidityMetadata::Valid => Some(Validity::Valid(len)), + ValidityMetadata::Invalid => Some(Validity::Invalid(len)), + // TODO(ngates): should we return a result for this? + ValidityMetadata::Array => Some(Validity::Array(array.unwrap())), + } + } +} + +#[derive(Clone, Debug)] +pub enum Validity<'v> { + Valid(usize), + Invalid(usize), + Array(Array<'v>), +} + +impl<'v> Validity<'v> { + pub const DTYPE: DType = DType::Bool(Nullability::NonNullable); + + pub fn into_array_data(self) -> Option { + match self { + Validity::Array(a) => Some(a.to_array_data()), + _ => None, + } + } + + pub fn array(&self) -> Option<&Array> { + match self { + Validity::Array(a) => Some(a), + _ => None, + } + } + + pub fn len(&self) -> usize { + match self { + Validity::Valid(l) => *l, + Validity::Invalid(l) => *l, + Validity::Array(a) => a.with_array(|a| a.len()), + } + } + + pub fn is_valid(&self, index: usize) -> bool { + match self { + Validity::Valid(_) => true, + Validity::Invalid(_) => false, + Validity::Array(a) => scalar_at(a, index).unwrap().try_into().unwrap(), + } + } +} diff --git a/vortex-array2/src/validity/mod.rs b/vortex-array2/src/validity/mod.rs deleted file mode 100644 index a609f26481..0000000000 --- a/vortex-array2/src/validity/mod.rs +++ /dev/null @@ -1,123 +0,0 @@ -use vortex_error::{vortex_bail, VortexResult}; -use vortex_schema::{DType, Nullability}; - -use crate::compute::ArrayCompute; -use crate::impl_encoding; -use crate::TypedArrayView; -use crate::{Array, ArrayMetadata, TryFromArrayMetadata}; -use crate::{ArrayData, TypedArrayData}; -use crate::{ArrayView, ToArrayData}; -use crate::{IntoArray, ToArray}; - -impl_encoding!("vortex.ree", Validity); - -pub trait ArrayValidity { - fn is_valid(&self, index: usize) -> bool; -} - -#[derive(Clone, Debug)] -pub enum ValidityMetadata { - Valid(usize), - Invalid(usize), - Array, -} - -pub enum Validity<'v> { - Valid(usize), - Invalid(usize), - Array(Array<'v>), -} - -impl Validity<'_> { - pub const DTYPE: DType = DType::Bool(Nullability::NonNullable); -} - -pub trait ValidityArray { - fn validity(&self) -> Validity; -} - -impl ValidityData { - pub fn new(validity: Validity) -> Self { - let (meta, children) = match validity { - Validity::Valid(l) => (ValidityMetadata::Valid(l), vec![]), - Validity::Invalid(l) => (ValidityMetadata::Invalid(l), vec![]), - Validity::Array(a) => (ValidityMetadata::Array, vec![a.to_array_data()]), - }; - - ArrayData::try_new( - &ValidityEncoding, - Validity::DTYPE, - meta.into_arc(), - vec![].into(), - children.into(), - ) - .unwrap() - .try_into() - .unwrap() - } -} - -impl ValidityArray for ValidityData { - fn validity(&self) -> Validity { - match self.metadata() { - ValidityMetadata::Valid(l) => Validity::Valid(*l), - ValidityMetadata::Invalid(l) => Validity::Invalid(*l), - ValidityMetadata::Array => { - Validity::Array(self.data().children().first().unwrap().to_array()) - } - } - } -} - -impl ValidityArray for ValidityView<'_> { - fn validity(&self) -> Validity { - match self.metadata() { - ValidityMetadata::Valid(l) => Validity::Valid(*l), - ValidityMetadata::Invalid(l) => Validity::Invalid(*l), - ValidityMetadata::Array => { - Validity::Array(self.view().child(0, &Validity::DTYPE).unwrap().into_array()) - } - } - } -} - -impl TryFromArrayMetadata for ValidityMetadata { - fn try_from_metadata(metadata: Option<&[u8]>) -> VortexResult { - let Some(_bytes) = metadata else { - vortex_bail!("Validity metadata is missing") - }; - todo!() - } -} - -impl<'v> TryFromArrayView<'v> for ValidityView<'v> { - fn try_from_view(_view: &'v ArrayView<'v>) -> VortexResult { - todo!() - } -} - -impl TryFromArrayData for ValidityData { - fn try_from_data(_data: &ArrayData) -> VortexResult { - todo!() - } -} - -impl ArrayTrait for &dyn ValidityArray { - fn len(&self) -> usize { - todo!() - } -} - -impl ArrayValidity for &dyn ValidityArray { - fn is_valid(&self, _index: usize) -> bool { - todo!() - } -} - -impl ToArrayData for &dyn ValidityArray { - fn to_array_data(&self) -> ArrayData { - todo!() - } -} - -impl ArrayCompute for &dyn ValidityArray {} diff --git a/vortex-array2/src/view.rs b/vortex-array2/src/view.rs index f5fbe6c32f..ba8d3f1436 100644 --- a/vortex-array2/src/view.rs +++ b/vortex-array2/src/view.rs @@ -2,12 +2,12 @@ use std::fmt::{Debug, Formatter}; use arrow_buffer::Buffer; use vortex::flatbuffers::array as fb; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_schema::DType; use crate::encoding::EncodingRef; -use crate::{Array, ArrayDef, IntoArray, ToArray}; -use crate::{SerdeContext, TryFromArrayMetadata}; +use crate::{Array, IntoArray, ToArray}; +use crate::{ArrayParts, SerdeContext}; #[derive(Clone)] pub struct ArrayView<'v> { @@ -146,52 +146,16 @@ impl<'v> IntoArray<'v> for ArrayView<'v> { } } -pub struct TypedArrayView<'v, D: ArrayDef> { - view: ArrayView<'v>, - metadata: D::Metadata, -} - -impl<'v, D: ArrayDef> TypedArrayView<'v, D> { - pub fn new_unchecked(view: ArrayView<'v>, metadata: D::Metadata) -> Self { - Self { view, metadata } - } - - pub fn metadata(&self) -> &D::Metadata { - &self.metadata - } - - pub fn view(&'v self) -> &'v ArrayView<'v> { - &self.view - } - - pub fn as_array(&self) -> &D::Array<'v> - where - Self: AsRef>, - { - self.as_ref() +impl<'v> ArrayParts<'v> for ArrayView<'v> { + fn dtype(&'v self) -> &'v DType { + self.dtype } -} -impl ToArray for TypedArrayView<'_, D> { - fn to_array(&self) -> Array { - Array::View(self.view().clone()) + fn buffer(&'v self, idx: usize) -> Option<&'v Buffer> { + self.buffers().get(idx) } -} - -/// Convert an ArrayView into a TypedArrayView. -impl<'v, D: ArrayDef> TryFrom> for TypedArrayView<'v, D> -where - D::Metadata: TryFromArrayMetadata, -{ - type Error = VortexError; - fn try_from(view: ArrayView<'v>) -> Result { - if view.encoding().id() != D::ID { - vortex_bail!("Invalid encoding for array") - } - let metadata = <::Metadata as TryFromArrayMetadata>::try_from_metadata( - view.metadata(), - )?; - Ok(Self { view, metadata }) + fn child(&'v self, idx: usize, dtype: &'v DType) -> Option> { + self.child(idx, dtype).map(|a| a.into_array()) } }