Skip to content

Commit

Permalink
Struct Array
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Apr 8, 2024
1 parent 182678d commit 0cf5158
Show file tree
Hide file tree
Showing 15 changed files with 224 additions and 38 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions vortex-array2/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,7 @@ impl BoolArray<'_> {
}

impl<'v> TryFromArrayParts<'v, BoolMetadata> for BoolArray<'v> {
fn try_from_parts(
parts: &'v dyn ArrayParts<'v>,
metadata: &'v BoolMetadata,
) -> VortexResult<Self> {
fn try_from_parts(parts: &'v dyn ArrayParts, metadata: &'v BoolMetadata) -> VortexResult<Self> {
Ok(BoolArray {
dtype: parts.dtype(),
buffer: parts
Expand Down
1 change: 1 addition & 0 deletions vortex-array2/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod bool;
pub mod primitive;
pub mod ree;
pub mod r#struct;
2 changes: 1 addition & 1 deletion vortex-array2/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl PrimitiveArray<'_> {

impl<'a> TryFromArrayParts<'a, PrimitiveMetadata> for PrimitiveArray<'a> {
fn try_from_parts(
parts: &'a dyn ArrayParts<'a>,
parts: &'a dyn ArrayParts,
metadata: &'a PrimitiveMetadata,
) -> VortexResult<Self> {
let buffer = parts.buffer(0).unwrap();
Expand Down
9 changes: 3 additions & 6 deletions vortex-array2/src/array/ree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@ impl REEData {
}

impl<'v> TryFromArrayParts<'v, REEMetadata> for REEArray<'v> {
fn try_from_parts(
parts: &'v dyn ArrayParts<'v>,
metadata: &'v REEMetadata,
) -> VortexResult<Self> {
fn try_from_parts(parts: &'v dyn ArrayParts, metadata: &'v REEMetadata) -> VortexResult<Self> {
Ok(REEArray {
dtype: parts.dtype(),
values: parts
Expand Down Expand Up @@ -96,7 +93,7 @@ impl ToArrayData for REEArray<'_> {

impl AcceptArrayVisitor for REEArray<'_> {
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
visitor.visit_array("values", self.values())?;
visitor.visit_array("run_ends", self.run_ends())
visitor.visit_child("values", self.values())?;
visitor.visit_child("run_ends", self.run_ends())
}
}
17 changes: 17 additions & 0 deletions vortex-array2/src/array/struct/compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::r#struct::StructArray;
use crate::compute::{ArrayCompute, ScalarAtFn};

impl ArrayCompute for StructArray<'_> {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
}

impl ScalarAtFn for StructArray<'_> {
fn scalar_at(&self, _index: usize) -> VortexResult<Scalar> {
todo!()
}
}
115 changes: 115 additions & 0 deletions vortex-array2/src/array/struct/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
mod compute;

use serde::{Deserialize, Serialize};
use vortex_error::{vortex_bail, VortexResult};
use vortex_schema::{DType, FieldNames};

use crate::impl_encoding;
use crate::validity::ArrayValidity;
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
use crate::{Array, ArrayMetadata};
use crate::{ArrayData, TypedArrayData};
use crate::{ArrayView, ToArrayData};

impl_encoding!("vortex.struct", Struct);

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct StructMetadata {
length: usize,
}

#[derive(Clone)]
pub struct StructArray<'a> {
dtype: &'a DType,
// Note(ngates): for arrays with variable-length children, we don't want to
// allocate a Vec<Array>, so instead we defer child access by storing a reference to the parts.
parts: &'a dyn ArrayParts,
length: usize,
}

impl<'a> StructArray<'a> {
pub fn child(&'a self, idx: usize) -> Option<Array<'a>> {
let DType::Struct(_, fields) = self.dtype() else {
unreachable!()
};
let dtype = fields.get(idx)?;
self.parts.child(idx, dtype)
}

pub fn names(&self) -> &FieldNames {
let DType::Struct(names, _fields) = self.dtype() else {
unreachable!()
};
names
}

pub fn fields(&self) -> &[DType] {
let DType::Struct(_names, fields) = self.dtype() else {
unreachable!()
};
fields.as_slice()
}

pub fn ncolumns(&self) -> usize {
self.fields().len()
}

pub fn len(&self) -> usize {
self.length
}
}

impl<'v> TryFromArrayParts<'v, StructMetadata> for StructArray<'v> {
fn try_from_parts(
parts: &'v dyn ArrayParts,
metadata: &'v StructMetadata,
) -> VortexResult<Self> {
let DType::Struct(_names, dtypes) = parts.dtype() else {
unreachable!()
};
if parts.nchildren() != dtypes.len() {
vortex_bail!(
"Expected {} children, found {}",
dtypes.len(),
parts.nchildren()
);
}
Ok(StructArray {
dtype: parts.dtype(),
parts,
length: metadata.length,
})
}
}

impl ArrayTrait for StructArray<'_> {
fn dtype(&self) -> &DType {
self.dtype
}

fn len(&self) -> usize {
self.length
}
}

impl ArrayValidity for StructArray<'_> {
fn is_valid(&self, _index: usize) -> bool {
todo!()
}
}

impl ToArrayData for StructArray<'_> {
fn to_array_data(&self) -> ArrayData {
todo!()
}
}

impl AcceptArrayVisitor for StructArray<'_> {
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
for (idx, name) in self.names().iter().enumerate() {
let child = self.child(idx).unwrap();
visitor.visit_column(name, &child)?;
}
Ok(())
}
}
34 changes: 34 additions & 0 deletions vortex-array2/src/batch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use arrow_buffer::Buffer;
use vortex::serde::data::ArrayData;
use vortex_error::VortexResult;
use vortex_schema::DType;

use crate::visitor::ArrayVisitor;
use crate::Array;

/// A column batch contains the flattened list of columns.
pub struct ColumnBatch {
dtype: DType,
columns: Vec<Array<'static>>,
}

pub struct ColumnBatchBuilder {
columns: Vec<ArrayData>,
}

impl ArrayVisitor for ColumnBatchBuilder {
fn visit_column(&mut self, _name: &str, _array: &Array) -> VortexResult<()> {
todo!()
}

fn visit_child(&mut self, _name: &str, _array: &Array) -> VortexResult<()> {
// If the array is a struct, then pull out each column.
// But we can't do this in case some non-column child is a struct.
// Can we ask an array for column(idx)? Seems like a lot of work.
todo!()
}

fn visit_buffer(&mut self, _buffer: &Buffer) -> VortexResult<()> {
todo!()
}
}
2 changes: 2 additions & 0 deletions vortex-array2/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use vortex::encoding::EncodingId;

use crate::encoding::EncodingRef;

/// TODO(ngates): I'm not too sure about this construct. Where it should live, or what scope it
/// should have.
#[derive(Debug)]
pub struct SerdeContext {
encodings: Arc<[EncodingRef]>,
Expand Down
16 changes: 8 additions & 8 deletions vortex-array2/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ impl<D: ArrayDef> TryFrom<ArrayData> for TypedArrayData<D> {
}
}

impl ArrayParts<'_> for ArrayData {
fn dtype(&'_ self) -> &'_ DType {
impl ArrayParts for ArrayData {
fn dtype(&self) -> &DType {
&self.dtype
}

Expand All @@ -162,11 +162,11 @@ impl ArrayParts<'_> for ArrayData {
}

fn child(&self, idx: usize, _dtype: &DType) -> Option<Array> {
self.child(idx).map(|a| {
let array = a.to_array();
// FIXME(ngates): can we ask an array its dtype?
// assert_eq!(array.dtype(), dtype);
array
})
// TODO(ngates): validate the DType
self.child(idx).map(move |a| a.to_array())
}

fn nchildren(&self) -> usize {
self.children.len()
}
}
18 changes: 12 additions & 6 deletions vortex-array2/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![allow(dead_code)]

pub mod array;
mod batch;
pub mod compute;
mod context;
mod data;
Expand Down Expand Up @@ -73,14 +74,15 @@ pub trait WithArray {
fn with_array<R, F: FnMut(&dyn ArrayTrait) -> R>(&self, f: F) -> R;
}

pub trait ArrayParts<'a> {
fn dtype(&'a self) -> &'a DType;
fn buffer(&'a self, idx: usize) -> Option<&'a Buffer>;
fn child(&'a self, idx: usize, dtype: &'a DType) -> Option<Array<'a>>;
pub trait ArrayParts {
fn dtype(&self) -> &DType;
fn buffer(&self, idx: usize) -> Option<&Buffer>;
fn child<'a>(&'a self, idx: usize, dtype: &'a DType) -> Option<Array>;
fn nchildren(&self) -> usize;
}

pub trait TryFromArrayParts<'v, M: ArrayMetadata>: Sized + 'v {
fn try_from_parts(parts: &'v dyn ArrayParts<'v>, metadata: &'v M) -> VortexResult<Self>;
fn try_from_parts(parts: &'v dyn ArrayParts, metadata: &'v M) -> VortexResult<Self>;
}

/// Collects together the behaviour of an array.
Expand All @@ -103,7 +105,11 @@ pub trait ArrayTrait: ArrayCompute + ArrayValidity + AcceptArrayVisitor + ToArra

struct NBytesVisitor(usize);
impl ArrayVisitor for NBytesVisitor {
fn visit_array(&mut self, _name: &str, array: &Array) -> VortexResult<()> {
fn visit_column(&mut self, name: &str, array: &Array) -> VortexResult<()> {
self.visit_child(name, array)
}

fn visit_child(&mut self, _name: &str, array: &Array) -> VortexResult<()> {
self.0 += array.with_array(|a| a.nbytes());
Ok(())
}
Expand Down
8 changes: 6 additions & 2 deletions vortex-array2/src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl<'a, 'fmt: 'a> fmt::Display for TreeDisplayWrapper<'a> {
let nbytes = array.with_array(|a| a.nbytes());
let mut array_fmt = TreeFormatter::new(f, "".to_string(), nbytes);
array_fmt
.visit_array("root", array)
.visit_child("root", array)
.map_err(fmt::Error::custom)
}
}
Expand All @@ -41,7 +41,11 @@ pub struct TreeFormatter<'a, 'b: 'a> {
/// TODO(ngates): I think we want to go back to the old explicit style. It gives arrays more
/// control over how their metadata etc is displayed.
impl<'a, 'b: 'a> ArrayVisitor for TreeFormatter<'a, 'b> {
fn visit_array(&mut self, name: &str, array: &Array) -> VortexResult<()> {
fn visit_column(&mut self, name: &str, array: &Array) -> VortexResult<()> {
self.visit_child(name, array)
}

fn visit_child(&mut self, name: &str, array: &Array) -> VortexResult<()> {
array.with_array(|a| {
let nbytes = a.nbytes();
writeln!(
Expand Down
22 changes: 13 additions & 9 deletions vortex-array2/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl<'a> Debug for ArrayView<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ArrayView")
.field("encoding", &self.encoding)
.field("dtype", &self.dtype)
.field("dtype", self.dtype)
// .field("array", &self.array)
.field("buffers", &self.buffers)
.field("ctx", &self.ctx)
Expand Down Expand Up @@ -75,10 +75,10 @@ impl<'v> ArrayView<'v> {
pub fn metadata(&self) -> Option<&'v [u8]> {
self.array.metadata().map(|m| m.bytes())
}

pub fn nchildren(&self) -> usize {
self.array.children().map(|c| c.len()).unwrap_or_default()
}
//
// pub fn nchildren(&self) -> usize {
// self.array.children().map(|c| c.len()).unwrap_or_default()
// }

pub fn child(&self, idx: usize, dtype: &'v DType) -> Option<ArrayView<'v>> {
let child = self.array_child(idx)?;
Expand Down Expand Up @@ -146,16 +146,20 @@ impl<'v> IntoArray<'v> for ArrayView<'v> {
}
}

impl<'v> ArrayParts<'v> for ArrayView<'v> {
fn dtype(&'v self) -> &'v DType {
impl ArrayParts for ArrayView<'_> {
fn dtype(&self) -> &DType {
self.dtype
}

fn buffer(&'v self, idx: usize) -> Option<&'v Buffer> {
fn buffer(&self, idx: usize) -> Option<&Buffer> {
self.buffers().get(idx)
}

fn child(&'v self, idx: usize, dtype: &'v DType) -> Option<Array<'v>> {
fn child<'a>(&'a self, idx: usize, dtype: &'a DType) -> Option<Array> {
self.child(idx, dtype).map(|a| a.into_array())
}

fn nchildren(&self) -> usize {
self.array.children().map(|c| c.len()).unwrap_or_default()
}
}
11 changes: 9 additions & 2 deletions vortex-array2/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,20 @@ pub trait AcceptArrayVisitor {

// TODO(ngates): maybe we make this more like the inverse of TryFromParts?
pub trait ArrayVisitor {
fn visit_array(&mut self, name: &str, array: &Array) -> VortexResult<()>;
/// Visit a child column of this array.
fn visit_column(&mut self, name: &str, array: &Array) -> VortexResult<()>;

/// Visit a child of this array.
fn visit_child(&mut self, name: &str, array: &Array) -> VortexResult<()>;

/// Utility for visiting Array validity.
fn visit_validity(&mut self, validity: &Validity) -> VortexResult<()> {
if let Some(v) = validity.array() {
self.visit_array("validity", v)
self.visit_child("validity", v)
} else {
Ok(())
}
}

fn visit_buffer(&mut self, buffer: &Buffer) -> VortexResult<()>;
}
Loading

0 comments on commit 0cf5158

Please sign in to comment.