Skip to content

Commit

Permalink
Run end fill null (#1660)
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn authored Dec 12, 2024
1 parent ecd891b commit e81435b
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 97 deletions.
18 changes: 18 additions & 0 deletions encodings/runend/src/compute/fill_null.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use vortex_array::compute::{fill_null, FillNullFn};
use vortex_array::{ArrayData, ArrayLen, IntoArrayData};
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::{RunEndArray, RunEndEncoding};

impl FillNullFn<RunEndArray> for RunEndEncoding {
fn fill_null(&self, array: &RunEndArray, fill_value: Scalar) -> VortexResult<ArrayData> {
Ok(RunEndArray::with_offset_and_length(
array.ends(),
fill_null(array.values(), fill_value)?,
array.len(),
array.offset(),
)?
.into_array())
}
}
91 changes: 9 additions & 82 deletions encodings/runend/src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
mod compare;
mod fill_null;
mod invert;
mod take;

use std::cmp::min;
use std::ops::AddAssign;

use num_traits::AsPrimitive;
use vortex_array::array::{BooleanBuffer, PrimitiveArray};
use vortex_array::compute::{
filter, scalar_at, slice, take, CompareFn, ComputeVTable, FilterFn, FilterMask, InvertFn,
filter, scalar_at, slice, CompareFn, ComputeVTable, FillNullFn, FilterFn, FilterMask, InvertFn,
ScalarAtFn, SliceFn, TakeFn,
};
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_integer_ptype, match_each_unsigned_integer_ptype, NativePType};
use vortex_dtype::{match_each_unsigned_integer_ptype, NativePType};
use vortex_error::{VortexResult, VortexUnwrap};
use vortex_scalar::Scalar;

Expand All @@ -23,6 +25,10 @@ impl ComputeVTable for RunEndEncoding {
Some(self)
}

fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
Some(self)
}

fn filter_fn(&self) -> Option<&dyn FilterFn<ArrayData>> {
Some(self)
}
Expand Down Expand Up @@ -50,34 +56,6 @@ impl ScalarAtFn<RunEndArray> for RunEndEncoding {
}
}

impl TakeFn<RunEndArray> for RunEndEncoding {
#[allow(deprecated)]
fn take(&self, array: &RunEndArray, indices: &ArrayData) -> VortexResult<ArrayData> {
let primitive_indices = indices.clone().into_primitive()?;
let usize_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| {
primitive_indices
.into_maybe_null_slice::<$P>()
.into_iter()
.map(|idx| {
let usize_idx = idx as usize;
if usize_idx >= array.len() {
vortex_error::vortex_bail!(OutOfBounds: usize_idx, 0, array.len());
}

Ok(usize_idx + array.offset())
})
.collect::<VortexResult<Vec<usize>>>()?
});
let physical_indices = array
.find_physical_indices(&usize_indices)?
.into_iter()
.map(|idx| idx as u64)
.collect::<Vec<_>>();
let physical_indices_array = PrimitiveArray::from(physical_indices).into_array();
take(array.values(), &physical_indices_array)
}
}

impl SliceFn<RunEndArray> for RunEndEncoding {
fn slice(&self, array: &RunEndArray, start: usize, stop: usize) -> VortexResult<ArrayData> {
let new_length = stop - start;
Expand Down Expand Up @@ -159,7 +137,7 @@ fn filter_run_ends<R: NativePType + AddAssign + From<bool> + AsPrimitive<u64>>(
#[cfg(test)]
mod test {
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::{filter, scalar_at, slice, take, FilterMask};
use vortex_array::compute::{filter, scalar_at, slice, FilterMask};
use vortex_array::{ArrayDType, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData};
use vortex_dtype::{DType, Nullability, PType};

Expand All @@ -172,42 +150,6 @@ mod test {
.unwrap()
}

#[test]
fn ree_take() {
let taken = take(
ree_array().as_ref(),
PrimitiveArray::from(vec![9, 8, 1, 3]).as_ref(),
)
.unwrap();
assert_eq!(
taken.into_primitive().unwrap().maybe_null_slice::<i32>(),
&[5, 5, 1, 4]
);
}

#[test]
fn ree_take_end() {
let taken = take(
ree_array().as_ref(),
PrimitiveArray::from(vec![11]).as_ref(),
)
.unwrap();
assert_eq!(
taken.into_primitive().unwrap().maybe_null_slice::<i32>(),
&[5]
);
}

#[test]
#[should_panic]
fn ree_take_out_of_bounds() {
take(
ree_array().as_ref(),
PrimitiveArray::from(vec![12]).as_ref(),
)
.unwrap();
}

#[test]
fn ree_scalar_at_end() {
let scalar = scalar_at(ree_array().as_ref(), 11).unwrap();
Expand Down Expand Up @@ -320,21 +262,6 @@ mod test {
assert!(re_slice.values().is_empty())
}

#[test]
fn sliced_take() {
let sliced = slice(ree_array().as_ref(), 4, 9).unwrap();
let taken = take(
sliced.as_ref(),
PrimitiveArray::from(vec![1, 3, 4]).as_ref(),
)
.unwrap();

assert_eq!(taken.len(), 3);
assert_eq!(scalar_at(taken.as_ref(), 0).unwrap(), 4.into());
assert_eq!(scalar_at(taken.as_ref(), 1).unwrap(), 2.into());
assert_eq!(scalar_at(taken.as_ref(), 2).unwrap(), 5.into());
}

#[test]
fn filter_run_end() {
let arr = ree_array();
Expand Down
103 changes: 103 additions & 0 deletions encodings/runend/src/compute/take.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::{take, TakeFn};
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
use vortex_dtype::match_each_integer_ptype;
use vortex_error::VortexResult;

use crate::{RunEndArray, RunEndEncoding};

impl TakeFn<RunEndArray> for RunEndEncoding {
#[allow(deprecated)]
fn take(&self, array: &RunEndArray, indices: &ArrayData) -> VortexResult<ArrayData> {
let primitive_indices = indices.clone().into_primitive()?;
let usize_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| {
primitive_indices
.into_maybe_null_slice::<$P>()
.into_iter()
.map(|idx| {
let usize_idx = idx as usize;
if usize_idx >= array.len() {
vortex_error::vortex_bail!(OutOfBounds: usize_idx, 0, array.len());
}

Ok(usize_idx + array.offset())
})
.collect::<VortexResult<Vec<usize>>>()?
});
let physical_indices = array
.find_physical_indices(&usize_indices)?
.into_iter()
.map(|idx| idx as u64)
.collect::<Vec<_>>();
let physical_indices_array = PrimitiveArray::from(physical_indices).into_array();
take(array.values(), &physical_indices_array)
}
}

#[cfg(test)]
mod test {
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::{scalar_at, slice, take};
use vortex_array::{IntoArrayVariant, ToArrayData};

use crate::RunEndArray;

pub(crate) fn ree_array() -> RunEndArray {
RunEndArray::encode(
PrimitiveArray::from(vec![1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5]).to_array(),
)
.unwrap()
}

#[test]
fn ree_take() {
let taken = take(
ree_array().as_ref(),
PrimitiveArray::from(vec![9, 8, 1, 3]).as_ref(),
)
.unwrap();
assert_eq!(
taken.into_primitive().unwrap().maybe_null_slice::<i32>(),
&[5, 5, 1, 4]
);
}

#[test]
fn ree_take_end() {
let taken = take(
ree_array().as_ref(),
PrimitiveArray::from(vec![11]).as_ref(),
)
.unwrap();
assert_eq!(
taken.into_primitive().unwrap().maybe_null_slice::<i32>(),
&[5]
);
}

#[test]
#[should_panic]
fn ree_take_out_of_bounds() {
take(
ree_array().as_ref(),
PrimitiveArray::from(vec![12]).as_ref(),
)
.unwrap();
}

#[test]
fn sliced_take() {
let sliced = slice(ree_array().as_ref(), 4, 9).unwrap();
let taken = take(
sliced.as_ref(),
PrimitiveArray::from(vec![1, 3, 4]).as_ref(),
)
.unwrap();

assert_eq!(taken.len(), 3);
assert_eq!(scalar_at(taken.as_ref(), 0).unwrap(), 4.into());
assert_eq!(scalar_at(taken.as_ref(), 1).unwrap(), 2.into());
assert_eq!(scalar_at(taken.as_ref(), 2).unwrap(), 5.into());
}
}
8 changes: 4 additions & 4 deletions vortex-array/src/array/bool/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ impl ComputeVTable for BoolEncoding {
Some(self)
}

fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
Some(self)
}

fn filter_fn(&self) -> Option<&dyn FilterFn<ArrayData>> {
Some(self)
}
Expand All @@ -46,8 +50,4 @@ impl ComputeVTable for BoolEncoding {
fn take_fn(&self) -> Option<&dyn TakeFn<ArrayData>> {
Some(self)
}

fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
Some(self)
}
}
8 changes: 4 additions & 4 deletions vortex-array/src/array/chunked/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ impl ComputeVTable for ChunkedEncoding {
Some(self)
}

fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
Some(self)
}

fn filter_fn(&self) -> Option<&dyn FilterFn<ArrayData>> {
Some(self)
}
Expand All @@ -54,10 +58,6 @@ impl ComputeVTable for ChunkedEncoding {
fn take_fn(&self) -> Option<&dyn TakeFn<ArrayData>> {
Some(self)
}

fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
Some(self)
}
}

impl CastFn<ChunkedArray> for ChunkedEncoding {
Expand Down
14 changes: 7 additions & 7 deletions vortex-array/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ pub trait ComputeVTable {
None
}

/// Fill null values with given desired value. Resulting array is NonNullable
///
/// See: [FillNullFn]
fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
None
}

/// Filter an array with a given mask.
///
/// See: [FilterFn].
Expand Down Expand Up @@ -132,11 +139,4 @@ pub trait ComputeVTable {
fn take_fn(&self) -> Option<&dyn TakeFn<ArrayData>> {
None
}

/// Fill null values with given desired value. Resulting array is NonNullable
///
/// See: [FillNullFn]
fn fill_null_fn(&self) -> Option<&dyn FillNullFn<ArrayData>> {
None
}
}

0 comments on commit e81435b

Please sign in to comment.