Skip to content

Commit

Permalink
Add bool array
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Apr 6, 2024
1 parent 025e5c7 commit 2cf2ab3
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 36 deletions.
17 changes: 17 additions & 0 deletions vortex-array2/src/array/bool/compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::bool::BoolArray;
use crate::compute::{ArrayCompute, ScalarAtFn};

impl ArrayCompute for &dyn BoolArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
}

impl ScalarAtFn for &dyn BoolArray {
fn scalar_at(&self, _index: usize) -> VortexResult<Scalar> {
todo!()
}
}
147 changes: 147 additions & 0 deletions vortex-array2/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
mod compute;

use arrow_buffer::{BooleanBuffer, Buffer};
use vortex_error::VortexResult;
use vortex_schema::DType;

use crate::validity::Validity;
use crate::validity::{ArrayValidity, ValidityMetadata};
use crate::{impl_encoding, IntoArray};
use crate::{ArrayData, TypedArrayData};
use crate::{ArrayMetadata, TryFromArrayMetadata};
use crate::{ArrayView, ToArrayData};
use crate::{ToArray, TypedArrayView};

impl_encoding!("vortex.bool", Bool);

#[derive(Clone, Debug)]
pub struct BoolMetadata {
// TODO(ngates): push option inside the metadata?
validity: Option<ValidityMetadata>,
length: usize,
}

impl BoolMetadata {
pub fn validity(&self) -> Option<&ValidityMetadata> {
self.validity.as_ref()
}

pub fn len(&self) -> usize {
self.length
}
}

pub trait BoolArray {
fn buffer(&self) -> &Buffer;
fn len(&self) -> usize;
fn validity(&self) -> Option<Validity>;
}

impl BoolData {
pub fn try_new(buffer: BooleanBuffer, validity: Option<Validity>) -> Self {
if let Some(v) = &validity {
assert_eq!(v.len(), buffer.len());
}
Self::new_unchecked(
DType::Bool(validity.is_some().into()),
Arc::new(BoolMetadata {
validity: validity.as_ref().map(|v| ValidityMetadata::from(v)),
length: buffer.len(),
}),
vec![buffer.into_inner()].into(),
// Hmmmm
vec![validity
.and_then(|v| v.into_array())
.map(|a| a.to_array_data())]
.into(),
)
}
}

impl BoolArray for BoolData {
fn buffer(&self) -> &Buffer {
self.data().buffers().first().unwrap()
}

fn len(&self) -> usize {
self.metadata().len()
}

fn validity(&self) -> Option<Validity> {
self.metadata().validity().map(|v| {
Validity::try_from_validity_meta(
v,
self.metadata().len(),
self.data().child(0).map(|a| a.to_array()),
)
.unwrap()
})
}
}

impl BoolArray for BoolView<'_> {
fn buffer(&self) -> &Buffer {
self.view()
.buffers()
.first()
.expect("BoolView must have a single buffer")
}

fn len(&self) -> usize {
self.metadata().len()
}

fn validity(&self) -> Option<Validity> {
self.metadata().validity().map(|v| {
Validity::try_from_validity_meta(
v,
self.metadata().len(),
self.view()
.child(0, &Validity::DTYPE)
.map(|a| a.into_array()),
)
.unwrap()
})
}
}

impl TryFromArrayMetadata for BoolMetadata {
fn try_from_metadata(_metadata: Option<&[u8]>) -> VortexResult<Self> {
todo!()
}
}

impl<'v> TryFromArrayView<'v> for BoolView<'v> {
fn try_from_view(view: &'v ArrayView<'v>) -> VortexResult<Self> {
// TODO(ngates): validate the view.
Ok(BoolView::new_unchecked(
view.clone(),
BoolMetadata::try_from_metadata(view.metadata())?,
))
}
}

impl TryFromArrayData for BoolData {
fn try_from_data(data: &ArrayData) -> VortexResult<Self> {
// TODO(ngates): validate the array data.
Ok(Self::from_data_unchecked(data.clone()))
}
}

impl ArrayTrait for &dyn BoolArray {
fn len(&self) -> usize {
(**self).len()
}
}

impl ArrayValidity for &dyn BoolArray {
fn is_valid(&self, index: usize) -> bool {
self.validity().map(|v| v.is_valid(index)).unwrap_or(true)
}
}

impl ToArrayData for &dyn BoolArray {
fn to_array_data(&self) -> ArrayData {
todo!()
}
}
4 changes: 4 additions & 0 deletions vortex-array2/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod bool;
pub mod primitive;
pub mod ree;
pub mod validity;
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use vortex::match_each_native_ptype;
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::primitive::PrimitiveArray;
use crate::compute::{ArrayCompute, ScalarAtFn};
use crate::primitive::PrimitiveArray;
use crate::ArrayValidity;
use crate::validity::ArrayValidity;

impl ArrayCompute for &dyn PrimitiveArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ use vortex::ptype::{NativePType, PType};
use vortex_error::VortexResult;
use vortex_schema::{DType, Nullability};

use crate::array::validity::Validity;
use crate::compute::scalar_at;
use crate::impl_encoding;
use crate::{Array, ArrayValidity, IntoArray, Validity};
use crate::validity::ArrayValidity;
use crate::{Array, IntoArray};
use crate::{ArrayData, TypedArrayData};
use crate::{ArrayMetadata, TryFromArrayMetadata};
use crate::{ArrayView, ToArrayData};
Expand Down Expand Up @@ -61,7 +63,7 @@ impl PrimitiveArray for PrimitiveData {
fn validity(&self) -> Option<Array> {
match self.dtype().nullability() {
Nullability::NonNullable => None,
Nullability::Nullable => Some(self.data().children().first().unwrap().to_array()),
Nullability::Nullable => Some(self.data().child(0).unwrap().to_array()),
}
}
}
Expand Down Expand Up @@ -111,7 +113,7 @@ impl<'v> TryFromArrayView<'v> for PrimitiveView<'v> {
impl TryFromArrayData for PrimitiveData {
fn try_from_data(data: &ArrayData) -> VortexResult<Self> {
// TODO(ngates): validate the array data.
Ok(Self::new_unchecked(data.clone()))
Ok(Self::from_data_unchecked(data.clone()))
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::ree::REEArray;
use crate::compute::{ArrayCompute, ScalarAtFn};
use crate::ree::REEArray;

impl ArrayCompute for &dyn REEArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl REEData {
}
.into_arc(),
vec![].into(),
vec![ends, values].into(),
vec![Some(ends), Some(values)].into(),
)
.unwrap()
.try_into()
Expand All @@ -53,11 +53,11 @@ impl REEData {

impl REEArray for REEData {
fn run_ends(&self) -> Array {
Array::DataRef(self.data().children().first().unwrap())
Array::DataRef(self.data().child(0).unwrap())
}

fn values(&self) -> Array {
Array::DataRef(self.data().children().get(1).unwrap())
Array::DataRef(self.data().child(1).unwrap())
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,15 @@ use crate::{IntoArray, ToArray};

impl_encoding!("vortex.ree", Validity);

pub trait ArrayValidity {
fn is_valid(&self, index: usize) -> bool;
}

#[derive(Clone, Debug)]
pub enum ValidityMetadata {
Valid(usize),
Invalid(usize),
Array,
}

/// TODO(ngates): I'm not sure validity actually has to be an array itself? I think it could just be
/// something that's returned from ArrayValidity.
pub enum Validity<'v> {
Valid(usize),
Invalid(usize),
Expand All @@ -41,7 +39,7 @@ impl ValidityData {
let (meta, children) = match validity {
Validity::Valid(l) => (ValidityMetadata::Valid(l), vec![]),
Validity::Invalid(l) => (ValidityMetadata::Invalid(l), vec![]),
Validity::Array(a) => (ValidityMetadata::Array, vec![a.to_array_data()]),
Validity::Array(a) => (ValidityMetadata::Array, vec![Some(a.to_array_data())]),
};

ArrayData::try_new(
Expand All @@ -62,9 +60,7 @@ impl ValidityArray for ValidityData {
match self.metadata() {
ValidityMetadata::Valid(l) => Validity::Valid(*l),
ValidityMetadata::Invalid(l) => Validity::Invalid(*l),
ValidityMetadata::Array => {
Validity::Array(self.data().children().first().unwrap().to_array())
}
ValidityMetadata::Array => Validity::Array(self.data().child(0).unwrap().to_array()),
}
}
}
Expand Down Expand Up @@ -108,12 +104,6 @@ impl ArrayTrait for &dyn ValidityArray {
}
}

impl ArrayValidity for &dyn ValidityArray {
fn is_valid(&self, _index: usize) -> bool {
todo!()
}
}

impl ToArrayData for &dyn ValidityArray {
fn to_array_data(&self) -> ArrayData {
todo!()
Expand Down
4 changes: 3 additions & 1 deletion vortex-array2/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use vortex::scalar::Scalar;
use vortex_error::{vortex_err, VortexResult};

use crate::primitive::PrimitiveData;
use crate::array::bool::BoolData;
use crate::array::primitive::PrimitiveData;
use crate::{Array, WithArray};

pub trait ArrayCompute {
Expand Down Expand Up @@ -30,6 +31,7 @@ pub trait FlattenFn {
}

pub enum FlattenedArray {
Bool(BoolData),
Primitive(PrimitiveData),
// Just to introduce a second variant for now
Other(String),
Expand Down
23 changes: 17 additions & 6 deletions vortex-array2/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ pub struct ArrayData {
encoding: EncodingRef,
dtype: DType,
metadata: Arc<dyn ArrayMetadata>,
buffers: Arc<[Buffer]>,
children: Arc<[ArrayData]>,
buffers: Arc<[Buffer]>, // Should this just be an Option, not an Arc?
children: Arc<[Option<ArrayData>]>,
}

impl ArrayData {
Expand All @@ -24,7 +24,7 @@ impl ArrayData {
dtype: DType,
metadata: Arc<dyn ArrayMetadata>,
buffers: Arc<[Buffer]>,
children: Arc<[ArrayData]>,
children: Arc<[Option<ArrayData>]>,
) -> VortexResult<Self> {
let data = Self {
encoding,
Expand Down Expand Up @@ -59,8 +59,8 @@ impl ArrayData {
&self.buffers
}

pub fn children(&self) -> &[ArrayData] {
&self.children
pub fn child(&self, index: usize) -> Option<&ArrayData> {
self.children.get(index).and_then(|c| c.as_ref())
}
}

Expand All @@ -85,7 +85,18 @@ impl<D: ArrayDef> TypedArrayData<D>
where
Self: for<'a> AsRef<D::Array<'a>>,
{
pub fn new_unchecked(data: ArrayData) -> Self {
pub fn new_unchecked(
dtype: DType,
metadata: Arc<D::Metadata>,
buffers: Arc<[Buffer]>,
children: Arc<[Option<ArrayData>]>,
) -> Self {
Self::from_data_unchecked(
ArrayData::try_new(D::ENCODING, dtype, metadata, buffers, children).unwrap(),
)
}

pub fn from_data_unchecked(data: ArrayData) -> Self {
Self {
data,
phantom: PhantomData,
Expand Down
Loading

0 comments on commit 2cf2ab3

Please sign in to comment.