Skip to content

Commit

Permalink
Add bool array (#214)
Browse files Browse the repository at this point in the history
Starting to add a few more implementations to the new structure.
Takeaways:

- [x] Validity probably doesn't need to be an array encoding. And now we
have `Array<'a>` we don't need Validity + ValidityView. Instead, we can
provide helper functions based on metadata + array data.

The next two are addressed in #215 
- [x] There's lots of duplication between the `impl &dyn FooArray for
Array{Data, View}`. Mostly it's just accessing children or buffers.
- [x] With the Array enum, we may not need each array to be a trait and
could be a struct that holds Arrays?
  • Loading branch information
gatesn authored Apr 7, 2024
1 parent 025e5c7 commit 4565524
Show file tree
Hide file tree
Showing 16 changed files with 588 additions and 516 deletions.
26 changes: 26 additions & 0 deletions vortex-array2/src/array/bool/compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use vortex::scalar::{BoolScalar, Scalar};
use vortex_error::VortexResult;

use crate::array::bool::BoolArray;
use crate::compute::{ArrayCompute, ScalarAtFn};
use crate::validity::ArrayValidity;
use crate::ArrayTrait;

impl ArrayCompute for BoolArray<'_> {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
}

impl ScalarAtFn for BoolArray<'_> {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
if self.is_valid(index) {
let value = self.boolean_buffer().value(index);
Ok(Scalar::Bool(
BoolScalar::try_new(Some(value), self.dtype().nullability()).unwrap(),
))
} else {
Ok(Scalar::null(self.dtype()))
}
}
}
133 changes: 133 additions & 0 deletions vortex-array2/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
mod compute;

use arrow_buffer::{BooleanBuffer, Buffer};
use vortex_error::VortexResult;
use vortex_schema::DType;

use crate::impl_encoding;
use crate::validity::Validity;
use crate::validity::{ArrayValidity, ValidityMetadata};
use crate::ArrayMetadata;
use crate::{ArrayData, TypedArrayData};
use crate::{ArrayView, ToArrayData};

impl_encoding!("vortex.bool", Bool);

#[derive(Clone, Debug)]
pub struct BoolMetadata {
validity: ValidityMetadata,
length: usize,
}

impl TryParseArrayMetadata for BoolMetadata {
fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult<Self> {
todo!()
}
}

pub struct BoolArray<'a> {
dtype: &'a DType,
buffer: &'a Buffer,
validity: Option<Validity<'a>>,
// TODO(ngates): unpack metadata?
metadata: &'a BoolMetadata,
// TODO(ngates): we support statistics by reference to a dyn trait.
// This trait is implemented for ArrayView and ArrayData and is passed into here as part
// of ArrayParts.
// e.g. stats: &dyn Statistics,
}

impl BoolArray<'_> {
pub fn buffer(&self) -> &Buffer {
self.buffer
}

pub fn validity(&self) -> Option<&Validity> {
self.validity.as_ref()
}

pub fn metadata(&self) -> &BoolMetadata {
self.metadata
}

pub fn boolean_buffer(&self) -> BooleanBuffer {
BooleanBuffer::new(self.buffer.clone(), 0, self.metadata.length)
}
}

impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> {
fn try_from_parts(
parts: &'v dyn ArrayParts<'v>,
metadata: &'v BoolMetadata,
) -> VortexResult<Self> {
Ok(BoolArray {
dtype: parts.dtype(),
buffer: parts
.buffer(0)
.ok_or(vortex_err!("BoolArray requires a buffer"))?,
validity: metadata
.validity
.to_validity(metadata.length, parts.child(0, &Validity::DTYPE)),
metadata,
})
}
}

impl BoolData {
pub fn try_new(buffer: BooleanBuffer, validity: Option<Validity>) -> VortexResult<Self> {
if let Some(v) = &validity {
assert_eq!(v.len(), buffer.len());
}
let dtype = DType::Bool(validity.is_some().into());
let metadata = BoolMetadata {
validity: ValidityMetadata::try_from_validity(validity.as_ref(), &dtype)?,
length: buffer.len(),
};
let validity_array = validity.and_then(|v| v.into_array_data());
Ok(Self::new_unchecked(
dtype,
Arc::new(metadata),
vec![buffer.into_inner()].into(),
vec![validity_array].into(),
))
}
}

impl ArrayTrait for BoolArray<'_> {
fn dtype(&self) -> &DType {
self.dtype
}

fn len(&self) -> usize {
self.metadata().length
}
}

impl ArrayValidity for BoolArray<'_> {
fn is_valid(&self, index: usize) -> bool {
self.validity().map(|v| v.is_valid(index)).unwrap_or(true)
}
}

impl ToArrayData for BoolArray<'_> {
fn to_array_data(&self) -> ArrayData {
todo!()
}
}

#[cfg(test)]
mod tests {
use crate::array::bool::BoolData;
use crate::compute::scalar_at;
use crate::IntoArray;

#[test]
fn bool_array() {
let arr = BoolData::try_new(vec![true, false, true].into(), None)
.unwrap()
.into_array();

let scalar: bool = scalar_at(&arr, 0).unwrap().try_into().unwrap();
assert!(scalar);
}
}
3 changes: 3 additions & 0 deletions vortex-array2/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod bool;
pub mod primitive;
pub mod ree;
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@ use vortex::match_each_native_ptype;
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::primitive::PrimitiveArray;
use crate::compute::{ArrayCompute, ScalarAtFn};
use crate::primitive::PrimitiveArray;
use crate::ArrayValidity;
use crate::validity::ArrayValidity;
use crate::ArrayTrait;

impl ArrayCompute for &dyn PrimitiveArray {
impl ArrayCompute for PrimitiveArray<'_> {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
}

impl ScalarAtFn for &dyn PrimitiveArray {
impl ScalarAtFn for PrimitiveArray<'_> {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
if self.is_valid(index) {
match_each_native_ptype!(self.ptype(), |$T| {
Expand Down
105 changes: 105 additions & 0 deletions vortex-array2/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
mod compute;

use arrow_buffer::Buffer;
use vortex::ptype::{NativePType, PType};
use vortex_error::VortexResult;
use vortex_schema::DType;

use crate::impl_encoding;
use crate::validity::{ArrayValidity, Validity, ValidityMetadata};
use crate::ArrayMetadata;
use crate::{ArrayData, TypedArrayData};
use crate::{ArrayView, ToArrayData};

impl_encoding!("vortex.primitive", Primitive);

#[derive(Clone, Debug)]
pub struct PrimitiveMetadata {
ptype: PType,
validity: ValidityMetadata,
}

impl TryParseArrayMetadata for PrimitiveMetadata {
fn try_parse_metadata(_metadata: Option<&[u8]>) -> VortexResult<Self> {
todo!()
}
}

pub struct PrimitiveArray<'a> {
ptype: PType,
dtype: &'a DType,
buffer: &'a Buffer,
validity: Option<Validity<'a>>,
}

impl PrimitiveArray<'_> {
pub fn buffer(&self) -> &Buffer {
self.buffer
}

pub fn validity(&self) -> Option<&Validity> {
self.validity.as_ref()
}

pub fn ptype(&self) -> PType {
self.ptype
}
}

impl<'a> TryFromArrayParts<'a, PrimitiveMetadata> for PrimitiveArray<'a> {
fn try_from_parts(
parts: &'a dyn ArrayParts<'a>,
metadata: &'a PrimitiveMetadata,
) -> VortexResult<Self> {
let buffer = parts.buffer(0).unwrap();
let length = buffer.len() / metadata.ptype.byte_width();
Ok(PrimitiveArray {
ptype: metadata.ptype,
dtype: parts.dtype(),
buffer,
validity: metadata
.validity
.to_validity(length, parts.child(0, parts.dtype())),
})
}
}

impl PrimitiveData {
pub fn from_vec<T: NativePType>(values: Vec<T>) -> Self {
ArrayData::try_new(
&PrimitiveEncoding,
DType::from(T::PTYPE),
Arc::new(PrimitiveMetadata {
ptype: T::PTYPE,
validity: ValidityMetadata::NonNullable,
}),
vec![Buffer::from_vec(values)].into(),
vec![].into(),
)
.unwrap()
.try_into()
.unwrap()
}
}

impl ArrayTrait for PrimitiveArray<'_> {
fn dtype(&self) -> &DType {
self.dtype
}

fn len(&self) -> usize {
self.buffer().len() / self.ptype().byte_width()
}
}

impl ArrayValidity for PrimitiveArray<'_> {
fn is_valid(&self, index: usize) -> bool {
self.validity().map(|v| v.is_valid(index)).unwrap_or(true)
}
}

impl ToArrayData for PrimitiveArray<'_> {
fn to_array_data(&self) -> ArrayData {
todo!()
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::ree::REEArray;
use crate::compute::{ArrayCompute, ScalarAtFn};
use crate::ree::REEArray;

impl ArrayCompute for &dyn REEArray {
impl ArrayCompute for REEArray<'_> {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
}

impl ScalarAtFn for &dyn REEArray {
impl ScalarAtFn for REEArray<'_> {
fn scalar_at(&self, _index: usize) -> VortexResult<Scalar> {
todo!()
}
Expand Down
Loading

0 comments on commit 4565524

Please sign in to comment.