Skip to content

Commit

Permalink
Array2: ChunkedArray (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn authored Apr 12, 2024
1 parent b8bd7e6 commit b1226da
Show file tree
Hide file tree
Showing 19 changed files with 412 additions and 44 deletions.
2 changes: 1 addition & 1 deletion vortex-array2/src/array/bool/compute/as_contiguous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use vortex_error::VortexResult;
use crate::array::bool::BoolArray;
use crate::compute::as_contiguous::AsContiguousFn;
use crate::validity::Validity;
use crate::{Array, ArrayTrait, IntoArray};
use crate::{Array, IntoArray};

impl AsContiguousFn for BoolArray<'_> {
fn as_contiguous(&self, arrays: &[Array]) -> VortexResult<Array<'static>> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-array2/src/array/bool/compute/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use vortex_schema::Nullability;
use crate::array::bool::BoolArray;
use crate::compute::fill::FillForwardFn;
use crate::validity::ArrayValidity;
use crate::{Array, ArrayTrait, IntoArray, ToArrayData};
use crate::{Array, IntoArray, ToArrayData};

impl FillForwardFn for BoolArray<'_> {
fn fill_forward(&self) -> VortexResult<Array<'static>> {
Expand Down
1 change: 0 additions & 1 deletion vortex-array2/src/array/bool/compute/scalar_at.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use vortex_error::VortexResult;
use crate::array::bool::BoolArray;
use crate::compute::scalar_at::ScalarAtFn;
use crate::validity::ArrayValidity;
use crate::ArrayTrait;

impl ScalarAtFn for BoolArray<'_> {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
Expand Down
11 changes: 0 additions & 11 deletions vortex-array2/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use crate::buffer::Buffer;
use crate::validity::{ArrayValidity, ValidityMetadata};
use crate::validity::{LogicalValidity, Validity};
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
use crate::ArrayMetadata;
use crate::{impl_encoding, ArrayFlatten};

impl_encoding!("vortex.bool", Bool);
Expand Down Expand Up @@ -90,19 +89,9 @@ impl FromIterator<Option<bool>> for OwnedBoolArray {
}

impl ArrayTrait for BoolArray<'_> {
fn dtype(&self) -> &DType {
// FIXME(ngates): move this
self.array().dtype()
}

fn len(&self) -> usize {
self.metadata().length
}

fn metadata(&self) -> Arc<dyn ArrayMetadata> {
// FIXME(ngates): move this
Arc::new(self.metadata().clone())
}
}

impl ArrayFlatten for BoolArray<'_> {
Expand Down
3 changes: 1 addition & 2 deletions vortex-array2/src/array/bool/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@ use vortex_error::VortexResult;

use crate::array::bool::BoolArray;
use crate::stats::{ArrayStatisticsCompute, Stat};
use crate::ArrayTrait;

impl ArrayStatisticsCompute for BoolArray<'_> {
fn compute_statistics(&self, _stat: Stat) -> VortexResult<HashMap<Stat, Scalar>> {
if self.len() == 0 {
if self.is_empty() {
return Ok(HashMap::from([
(Stat::TrueCount, 0.into()),
(Stat::RunCount, 0.into()),
Expand Down
45 changes: 45 additions & 0 deletions vortex-array2/src/array/chunked/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::array::chunked::ChunkedArray;
use crate::compute::as_contiguous::{as_contiguous, AsContiguousFn};
use crate::compute::scalar_at::{scalar_at, ScalarAtFn};
use crate::compute::take::TakeFn;
use crate::compute::ArrayCompute;
use crate::{Array, OwnedArray, ToStatic};

mod take;

impl ArrayCompute for ChunkedArray<'_> {
fn as_contiguous(&self) -> Option<&dyn AsContiguousFn> {
Some(self)
}

fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}

fn take(&self) -> Option<&dyn TakeFn> {
Some(self)
}
}

impl AsContiguousFn for ChunkedArray<'_> {
fn as_contiguous(&self, arrays: &[Array]) -> VortexResult<OwnedArray> {
// Combine all the chunks into one, then call as_contiguous again.
let mut chunks = Vec::with_capacity(self.nchunks());
for array in arrays {
for chunk in ChunkedArray::try_from(array).unwrap().chunks() {
chunks.push(chunk.to_static());
}
}
as_contiguous(&chunks)
}
}

impl ScalarAtFn for ChunkedArray<'_> {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
let (chunk_index, chunk_offset) = self.find_chunk_idx(index);
scalar_at(&self.chunk(chunk_index).unwrap(), chunk_offset)
}
}
82 changes: 82 additions & 0 deletions vortex-array2/src/array/chunked/compute/take.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use vortex::ptype::PType;
use vortex_error::VortexResult;

use crate::array::chunked::ChunkedArray;
use crate::compute::cast::cast;
use crate::compute::take::{take, TakeFn};
use crate::{Array, IntoArray, OwnedArray, ToArray, ToStatic};

impl TakeFn for ChunkedArray<'_> {
fn take(&self, indices: &Array) -> VortexResult<OwnedArray> {
if self.len() == indices.len() {
return Ok(self.to_array().to_static());
}

let indices = cast(indices, PType::U64.into())?.flatten_primitive()?;

// While the chunk idx remains the same, accumulate a list of chunk indices.
let mut chunks = Vec::new();
let mut indices_in_chunk = Vec::new();
let mut prev_chunk_idx = self
.find_chunk_idx(indices.typed_data::<u64>()[0] as usize)
.0;
for idx in indices.typed_data::<u64>() {
let (chunk_idx, idx_in_chunk) = self.find_chunk_idx(*idx as usize);

if chunk_idx != prev_chunk_idx {
// Start a new chunk
let indices_in_chunk_array = indices_in_chunk.clone().into_array();
chunks.push(take(
&self.chunk(prev_chunk_idx).unwrap(),
&indices_in_chunk_array,
)?);
indices_in_chunk = Vec::new();
}

indices_in_chunk.push(idx_in_chunk as u64);
prev_chunk_idx = chunk_idx;
}

if !indices_in_chunk.is_empty() {
let indices_in_chunk_array = indices_in_chunk.into_array();
chunks.push(take(
&self.chunk(prev_chunk_idx).unwrap(),
&indices_in_chunk_array,
)?);
}

Ok(ChunkedArray::new(chunks, self.dtype().clone()).into_array())
}
}

#[cfg(test)]
mod test {
use itertools::Itertools;

use crate::array::chunked::ChunkedArray;
use crate::array::primitive::PrimitiveArray;
use crate::compute::as_contiguous::as_contiguous;
use crate::compute::take::take;
use crate::IntoArray;

#[test]
fn test_take() {
let a = vec![1i32, 2, 3].into_array();
let arr = ChunkedArray::new(vec![a.clone(), a.clone(), a.clone()], a.dtype().clone());
assert_eq!(arr.nchunks(), 3);
assert_eq!(arr.len(), 9);
let indices = vec![0, 0, 6, 4].into_array();

let result = PrimitiveArray::try_from(
as_contiguous(
&ChunkedArray::try_from(take(arr.as_ref(), &indices).unwrap())
.unwrap()
.chunks()
.collect_vec(),
)
.unwrap(),
)
.unwrap();
assert_eq!(result.typed_data::<i32>(), &[1, 1, 1, 2]);
}
}
Loading

0 comments on commit b1226da

Please sign in to comment.