Skip to content

Commit

Permalink
Fuzzer performs multiple operations on the underlying array instead o…
Browse files Browse the repository at this point in the history
…f just one (#766)

Includes fixes for
- ZigZagArray scalar_at returns wrong nullability
- RunEndArray slice wrongly slicing validity
- Scalar comparison is nullability invariant
  • Loading branch information
robert3005 authored Sep 10, 2024
1 parent 90e0beb commit edb014b
Show file tree
Hide file tree
Showing 19 changed files with 737 additions and 302 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

137 changes: 132 additions & 5 deletions encodings/runend/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ impl SliceFn for RunEndArray {
Ok(Self::with_offset_and_size(
slice(&self.ends(), slice_begin, slice_end + 1)?,
slice(&self.values(), slice_begin, slice_end + 1)?,
self.validity().slice(slice_begin, slice_end + 1)?,
self.validity().slice(start, stop)?,
stop - start,
start,
)?
Expand All @@ -106,11 +106,12 @@ impl SliceFn for RunEndArray {

#[cfg(test)]
mod test {
use vortex::array::PrimitiveArray;
use vortex::compute::take;
use vortex::array::{BoolArray, PrimitiveArray};
use vortex::compute::unary::{scalar_at, try_cast};
use vortex::validity::Validity;
use vortex::{ArrayDType, IntoArrayVariant, ToArray};
use vortex::compute::{slice, take};
use vortex::validity::{ArrayValidity, Validity};
use vortex::{ArrayDType, IntoArray, IntoArrayVariant, ToArray};
use vortex_dtype::{DType, Nullability, PType};
use vortex_scalar::Scalar;

use crate::RunEndArray;
Expand Down Expand Up @@ -168,4 +169,130 @@ mod test {
let scalar = scalar_at(null_ree.array(), 11).unwrap();
assert_eq!(scalar, Scalar::null(null_ree.dtype().clone()));
}

#[test]
fn slice_with_nulls() {
let array = RunEndArray::try_new(
PrimitiveArray::from(vec![3, 6, 8, 12]).into_array(),
PrimitiveArray::from_vec(vec![1, 4, 2, 5], Validity::AllValid).into_array(),
Validity::from(vec![
false, false, false, false, true, true, false, false, false, false, true, true,
]),
)
.unwrap();
let sliced = slice(array.array(), 4, 10).unwrap();
let sliced_primitive = sliced.into_primitive().unwrap();
assert_eq!(
sliced_primitive.maybe_null_slice::<i32>(),
vec![4, 4, 2, 2, 5, 5]
);
assert_eq!(
sliced_primitive
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer()
.iter()
.collect::<Vec<_>>(),
vec![true, true, false, false, false, false]
)
}

#[test]
fn slice_array() {
let arr = slice(
RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
vec![1i32, 2, 3].into_array(),
Validity::NonNullable,
)
.unwrap()
.array(),
3,
8,
)
.unwrap();
assert_eq!(
arr.dtype(),
&DType::Primitive(PType::I32, Nullability::NonNullable)
);
assert_eq!(arr.len(), 5);

assert_eq!(
arr.into_primitive().unwrap().maybe_null_slice::<i32>(),
vec![2, 2, 3, 3, 3]
);
}

#[test]
fn slice_end_inclusive() {
let arr = slice(
RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
vec![1i32, 2, 3].into_array(),
Validity::NonNullable,
)
.unwrap()
.array(),
4,
10,
)
.unwrap();
assert_eq!(
arr.dtype(),
&DType::Primitive(PType::I32, Nullability::NonNullable)
);
assert_eq!(arr.len(), 6);

assert_eq!(
arr.into_primitive().unwrap().maybe_null_slice::<i32>(),
vec![2, 3, 3, 3, 3, 3]
);
}

#[test]
fn decompress() {
let arr = RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
vec![1i32, 2, 3].into_array(),
Validity::NonNullable,
)
.unwrap();

assert_eq!(
arr.into_primitive().unwrap().maybe_null_slice::<i32>(),
vec![1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
);
}

#[test]
fn take_with_nulls() {
let uncompressed = PrimitiveArray::from_vec(vec![1i32, 0, 3], Validity::AllValid);
let validity = BoolArray::from_vec(
vec![
true, true, false, false, false, true, true, true, true, true,
],
Validity::NonNullable,
);
let arr = RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
uncompressed.into(),
Validity::Array(validity.into()),
)
.unwrap();

let test_indices = PrimitiveArray::from_vec(vec![0, 2, 4, 6], Validity::NonNullable);
let taken = take(arr.array(), test_indices.array()).unwrap();

assert_eq!(taken.len(), test_indices.len());

let parray = taken.into_primitive().unwrap();
assert_eq!(
(0..4)
.map(|idx| parray.is_valid(idx).then(|| parray.get_as_cast::<i32>(idx)))
.collect::<Vec<Option<i32>>>(),
vec![Some(1), None, None, Some(3),]
);
}
}
105 changes: 3 additions & 102 deletions encodings/runend/src/runend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,10 @@ impl AcceptArrayVisitor for RunEndArray {
impl ArrayStatisticsCompute for RunEndArray {}

#[cfg(test)]
mod test {
use vortex::array::{BoolArray, PrimitiveArray};
mod tests {
use vortex::compute::unary::scalar_at;
use vortex::compute::{slice, take};
use vortex::validity::{ArrayValidity, Validity};
use vortex::{ArrayDType, IntoArray, IntoArrayVariant};
use vortex::validity::Validity;
use vortex::{ArrayDType, IntoArray};
use vortex_dtype::{DType, Nullability, PType};

use crate::RunEndArray;
Expand All @@ -196,101 +194,4 @@ mod test {
assert_eq!(scalar_at(arr.array(), 5).unwrap(), 3.into());
assert_eq!(scalar_at(arr.array(), 9).unwrap(), 3.into());
}

#[test]
fn slice_array() {
let arr = slice(
RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
vec![1i32, 2, 3].into_array(),
Validity::NonNullable,
)
.unwrap()
.array(),
3,
8,
)
.unwrap();
assert_eq!(
arr.dtype(),
&DType::Primitive(PType::I32, Nullability::NonNullable)
);
assert_eq!(arr.len(), 5);

assert_eq!(
arr.into_primitive().unwrap().maybe_null_slice::<i32>(),
vec![2, 2, 3, 3, 3]
);
}

#[test]
fn slice_end_inclusive() {
let arr = slice(
RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
vec![1i32, 2, 3].into_array(),
Validity::NonNullable,
)
.unwrap()
.array(),
4,
10,
)
.unwrap();
assert_eq!(
arr.dtype(),
&DType::Primitive(PType::I32, Nullability::NonNullable)
);
assert_eq!(arr.len(), 6);

assert_eq!(
arr.into_primitive().unwrap().maybe_null_slice::<i32>(),
vec![2, 3, 3, 3, 3, 3]
);
}

#[test]
fn flatten() {
let arr = RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
vec![1i32, 2, 3].into_array(),
Validity::NonNullable,
)
.unwrap();

assert_eq!(
arr.into_primitive().unwrap().maybe_null_slice::<i32>(),
vec![1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
);
}

#[test]
fn with_nulls() {
let uncompressed = PrimitiveArray::from_vec(vec![1i32, 0, 3], Validity::AllValid);
let validity = BoolArray::from_vec(
vec![
true, true, false, false, false, true, true, true, true, true,
],
Validity::NonNullable,
);
let arr = RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
uncompressed.into(),
Validity::Array(validity.into()),
)
.unwrap();

let test_indices = PrimitiveArray::from_vec(vec![0, 2, 4, 6], Validity::NonNullable);
let taken = take(arr.array(), test_indices.array()).unwrap();

assert_eq!(taken.len(), test_indices.len());

let parray = taken.into_primitive().unwrap();
assert_eq!(
(0..4)
.map(|idx| parray.is_valid(idx).then(|| parray.get_as_cast::<i32>(idx)))
.collect::<Vec<Option<i32>>>(),
vec![Some(1), None, None, Some(3),]
);
}
}
36 changes: 17 additions & 19 deletions encodings/zigzag/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,47 @@ use zigzag::ZigZag as ExternalZigZag;

use crate::ZigZagArray;

pub fn zigzag_encode(parray: &PrimitiveArray) -> VortexResult<ZigZagArray> {
pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult<ZigZagArray> {
let validity = parray.validity();
let encoded = match parray.ptype() {
PType::I8 => zigzag_encode_primitive::<i8>(parray.maybe_null_slice(), parray.validity()),
PType::I16 => zigzag_encode_primitive::<i16>(parray.maybe_null_slice(), parray.validity()),
PType::I32 => zigzag_encode_primitive::<i32>(parray.maybe_null_slice(), parray.validity()),
PType::I64 => zigzag_encode_primitive::<i64>(parray.maybe_null_slice(), parray.validity()),
PType::I8 => zigzag_encode_primitive::<i8>(parray.into_maybe_null_slice(), validity),
PType::I16 => zigzag_encode_primitive::<i16>(parray.into_maybe_null_slice(), validity),
PType::I32 => zigzag_encode_primitive::<i32>(parray.into_maybe_null_slice(), validity),
PType::I64 => zigzag_encode_primitive::<i64>(parray.into_maybe_null_slice(), validity),
_ => vortex_bail!("Unsupported ptype {}", parray.ptype()),
};
ZigZagArray::try_new(encoded.into_array())
}

fn zigzag_encode_primitive<T: ExternalZigZag + NativePType>(
values: &[T],
values: Vec<T>,
validity: Validity,
) -> PrimitiveArray
where
<T as ExternalZigZag>::UInt: NativePType,
{
let mut encoded = Vec::with_capacity(values.len());
encoded.extend(values.iter().map(|v| T::encode(*v)));
PrimitiveArray::from_vec(encoded.to_vec(), validity)
PrimitiveArray::from_vec(values.into_iter().map(|v| T::encode(v)).collect(), validity)
}

pub fn zigzag_decode(parray: &PrimitiveArray) -> PrimitiveArray {
pub fn zigzag_decode(parray: PrimitiveArray) -> PrimitiveArray {
let validity = parray.validity();
match parray.ptype() {
PType::U8 => zigzag_decode_primitive::<i8>(parray.maybe_null_slice(), parray.validity()),
PType::U16 => zigzag_decode_primitive::<i16>(parray.maybe_null_slice(), parray.validity()),
PType::U32 => zigzag_decode_primitive::<i32>(parray.maybe_null_slice(), parray.validity()),
PType::U64 => zigzag_decode_primitive::<i64>(parray.maybe_null_slice(), parray.validity()),
PType::U8 => zigzag_decode_primitive::<i8>(parray.into_maybe_null_slice(), validity),
PType::U16 => zigzag_decode_primitive::<i16>(parray.into_maybe_null_slice(), validity),
PType::U32 => zigzag_decode_primitive::<i32>(parray.into_maybe_null_slice(), validity),
PType::U64 => zigzag_decode_primitive::<i64>(parray.into_maybe_null_slice(), validity),
_ => panic!("Unsupported ptype {}", parray.ptype()),
}
}

fn zigzag_decode_primitive<T: ExternalZigZag + NativePType>(
values: &[T::UInt],
values: Vec<T::UInt>,
validity: Validity,
) -> PrimitiveArray
where
<T as ExternalZigZag>::UInt: NativePType,
{
let mut encoded = Vec::with_capacity(values.len());
encoded.extend(values.iter().map(|v| T::decode(*v)));
PrimitiveArray::from_vec(encoded, validity)
PrimitiveArray::from_vec(values.into_iter().map(|v| T::decode(v)).collect(), validity)
}

#[cfg(test)]
Expand All @@ -61,7 +59,7 @@ mod test {

#[test]
fn test_compress() {
let compressed = zigzag_encode(&PrimitiveArray::from(Vec::from_iter(
let compressed = zigzag_encode(PrimitiveArray::from(Vec::from_iter(
(-10_000..10_000).map(|i| i as i64),
)))
.unwrap();
Expand Down
Loading

0 comments on commit edb014b

Please sign in to comment.