Skip to content

Commit

Permalink
feat: Add Date32/Date64 in aggregate fuzz testing (#13041)
Browse files Browse the repository at this point in the history
* refactor PrimitiveArrayGenerator.

* support Date32/Date64 type in data generator.

* fix format.

* remove unnecessary type para in PrimitiveArrayGenerator.

* introduce FromNative trait and replace the unsafe.
  • Loading branch information
LeslieKid authored Oct 26, 2024
1 parent 7b2284c commit 73cfa6c
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 56 deletions.
2 changes: 2 additions & 0 deletions datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ fn baseline_config() -> DatasetGeneratorConfig {
ColumnDescr::new("u16", DataType::UInt16),
ColumnDescr::new("u32", DataType::UInt32),
ColumnDescr::new("u64", DataType::UInt64),
ColumnDescr::new("date32", DataType::Date32),
ColumnDescr::new("date64", DataType::Date64),
// TODO: date/time columns
// todo decimal columns
// begin string columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

use std::sync::Arc;

use arrow::datatypes::{
Date32Type, Date64Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
use arrow_array::{ArrayRef, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
Expand Down Expand Up @@ -222,7 +226,7 @@ macro_rules! generate_string_array {
}

macro_rules! generate_primitive_array {
($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $DATA_TYPE:ident) => {
($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {
paste::paste! {{
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
Expand All @@ -239,7 +243,7 @@ macro_rules! generate_primitive_array {
rng: $ARRAY_GEN_RNG,
};

generator.[< gen_data_ $DATA_TYPE >]()
generator.gen_data::<$ARROW_TYPE>()
}}}
}

Expand Down Expand Up @@ -297,7 +301,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i8
Int8Type
)
}
DataType::Int16 => {
Expand All @@ -306,7 +310,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i16
Int16Type
)
}
DataType::Int32 => {
Expand All @@ -315,7 +319,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i32
Int32Type
)
}
DataType::Int64 => {
Expand All @@ -324,7 +328,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i64
Int64Type
)
}
DataType::UInt8 => {
Expand All @@ -333,7 +337,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u8
UInt8Type
)
}
DataType::UInt16 => {
Expand All @@ -342,7 +346,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u16
UInt16Type
)
}
DataType::UInt32 => {
Expand All @@ -351,7 +355,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u32
UInt32Type
)
}
DataType::UInt64 => {
Expand All @@ -360,7 +364,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u64
UInt64Type
)
}
DataType::Float32 => {
Expand All @@ -369,7 +373,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
f32
Float32Type
)
}
DataType::Float64 => {
Expand All @@ -378,7 +382,25 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
f64
Float64Type
)
}
DataType::Date32 => {
generate_primitive_array!(
self,
num_rows,
batch_gen_rng,
array_gen_rng,
Date32Type
)
}
DataType::Date64 => {
generate_primitive_array!(
self,
num_rows,
batch_gen_rng,
array_gen_rng,
Date64Type
)
}
DataType::Utf8 => {
Expand Down
134 changes: 90 additions & 44 deletions test-utils/src/array_gen/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,45 @@
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, PrimitiveArray, UInt32Array};
use arrow::datatypes::{
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
UInt32Type, UInt64Type, UInt8Type,
};
use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, UInt32Array};
use arrow::datatypes::DataType;
use rand::distributions::Standard;
use rand::prelude::Distribution;
use rand::rngs::StdRng;
use rand::Rng;

/// Trait for converting type safely from a native type T impl this trait.
pub trait FromNative: std::fmt::Debug + Send + Sync + Copy + Default {
/// Convert native type from i64.
fn from_i64(_: i64) -> Option<Self> {
None
}
}

macro_rules! native_type {
($t: ty $(, $from:ident)*) => {
impl FromNative for $t {
$(
#[inline]
fn $from(v: $t) -> Option<Self> {
Some(v)
}
)*
}
};
}

native_type!(i8);
native_type!(i16);
native_type!(i32);
native_type!(i64, from_i64);
native_type!(u8);
native_type!(u16);
native_type!(u32);
native_type!(u64);
native_type!(f32);
native_type!(f64);

/// Randomly generate primitive array
pub struct PrimitiveArrayGenerator {
/// the total number of strings in the output
Expand All @@ -35,46 +66,61 @@ pub struct PrimitiveArrayGenerator {
pub rng: StdRng,
}

macro_rules! impl_gen_data {
($NATIVE_TYPE:ty, $ARROW_TYPE:ident) => {
paste::paste! {
pub fn [< gen_data_ $NATIVE_TYPE >](&mut self) -> ArrayRef {
// table of strings from which to draw
let distinct_primitives: PrimitiveArray<$ARROW_TYPE> = (0..self.num_distinct_primitives)
.map(|_| Some(self.rng.gen::<$NATIVE_TYPE>()))
.collect();
// TODO: support generating more primitive arrays
impl PrimitiveArrayGenerator {
pub fn gen_data<A>(&mut self) -> ArrayRef
where
A: ArrowPrimitiveType,
A::Native: FromNative,
Standard: Distribution<<A as ArrowPrimitiveType>::Native>,
{
// table of primitives from which to draw
let distinct_primitives: PrimitiveArray<A> = (0..self.num_distinct_primitives)
.map(|_| {
Some(match A::DATA_TYPE {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float32
| DataType::Float64
| DataType::Date32 => self.rng.gen::<A::Native>(),

// pick num_strings randomly from the distinct string table
let indicies: UInt32Array = (0..self.num_primitives)
.map(|_| {
if self.rng.gen::<f64>() < self.null_pct {
None
} else if self.num_distinct_primitives > 1 {
let range = 1..(self.num_distinct_primitives as u32);
Some(self.rng.gen_range(range))
} else {
Some(0)
}
})
.collect();
DataType::Date64 => {
// TODO: constrain this range to valid dates if necessary
let date_value = self.rng.gen_range(i64::MIN..=i64::MAX);
let millis_per_day = 86_400_000;
let adjusted_value = date_value - (date_value % millis_per_day);
A::Native::from_i64(adjusted_value).unwrap()
}

let options = None;
arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
}
}
};
}
_ => {
let arrow_type = A::DATA_TYPE;
panic!("Unsupported arrow data type: {arrow_type}")
}
})
})
.collect();

// TODO: support generating more primitive arrays
impl PrimitiveArrayGenerator {
impl_gen_data!(i8, Int8Type);
impl_gen_data!(i16, Int16Type);
impl_gen_data!(i32, Int32Type);
impl_gen_data!(i64, Int64Type);
impl_gen_data!(u8, UInt8Type);
impl_gen_data!(u16, UInt16Type);
impl_gen_data!(u32, UInt32Type);
impl_gen_data!(u64, UInt64Type);
impl_gen_data!(f32, Float32Type);
impl_gen_data!(f64, Float64Type);
// pick num_primitves randomly from the distinct string table
let indicies: UInt32Array = (0..self.num_primitives)
.map(|_| {
if self.rng.gen::<f64>() < self.null_pct {
None
} else if self.num_distinct_primitives > 1 {
let range = 1..(self.num_distinct_primitives as u32);
Some(self.rng.gen_range(range))
} else {
Some(0)
}
})
.collect();

let options = None;
arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
}
}

0 comments on commit 73cfa6c

Please sign in to comment.