Skip to content

Commit

Permalink
feat: Add boolean column to aggregate queries for fuzz testing (#13331)
Browse files Browse the repository at this point in the history
* add bool col

* clippy fix

* remove change

* fmt fix

* typo fix
  • Loading branch information
jonathanc-n authored Nov 13, 2024
1 parent f894c7d commit 5467a28
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 5 deletions.
1 change: 1 addition & 0 deletions datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ fn baseline_config() -> DatasetGeneratorConfig {
// low cardinality columns
ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10),
ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10),
ColumnDescr::new("bool", DataType::Boolean),
ColumnDescr::new("binary", DataType::Binary),
ColumnDescr::new("large_binary", DataType::LargeBinary),
ColumnDescr::new("binaryview", DataType::BinaryView),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
use std::sync::Arc;

use arrow::datatypes::{
BinaryType, BinaryViewType, ByteArrayType, ByteViewType, Date32Type, Date64Type,
Decimal128Type, Decimal256Type, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type, Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType,
BinaryType, BinaryViewType, BooleanType, ByteArrayType, ByteViewType, Date32Type,
Date64Type, Decimal128Type, Decimal256Type, Float32Type, Float64Type, Int16Type,
Int32Type, Int64Type, Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType,
IntervalYearMonthType, LargeBinaryType, LargeUtf8Type, StringViewType,
Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
Expand All @@ -38,8 +38,8 @@ use rand::{
};
use test_utils::{
array_gen::{
BinaryArrayGenerator, DecimalArrayGenerator, PrimitiveArrayGenerator,
StringArrayGenerator,
BinaryArrayGenerator, BooleanArrayGenerator, DecimalArrayGenerator,
PrimitiveArrayGenerator, StringArrayGenerator,
},
stagger_batch,
};
Expand Down Expand Up @@ -269,6 +269,26 @@ macro_rules! generate_decimal_array {
}};
}

// Generating `BooleanArray` due to it being a special type in Arrow (bit-packed)
macro_rules! generate_boolean_array {
($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{
// Select a null percentage from the candidate percentages
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];

let num_distinct_booleans = if $MAX_NUM_DISTINCT >= 2 { 2 } else { 1 };

let mut generator = BooleanArrayGenerator {
num_booleans: $NUM_ROWS,
num_distinct_booleans,
null_pct,
rng: $ARRAY_GEN_RNG,
};

generator.gen_data::<$ARROW_TYPE>()
}};
}

macro_rules! generate_primitive_array {
($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {{
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
Expand Down Expand Up @@ -689,6 +709,16 @@ impl RecordBatchGenerator {
StringViewType
)
}
DataType::Boolean => {
generate_boolean_array! {
self,
num_rows,
max_num_distinct,
batch_gen_rng,
array_gen_rng,
BooleanType
}
}
_ => {
panic!("Unsupported data generator type: {}", col.column_type)
}
Expand Down
68 changes: 68 additions & 0 deletions test-utils/src/array_gen/boolean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, BooleanArray, BooleanBuilder, UInt32Array};
use arrow::compute::take;
use rand::rngs::StdRng;
use rand::Rng;

/// Randomly generate boolean arrays
pub struct BooleanArrayGenerator {
pub num_booleans: usize,
pub num_distinct_booleans: usize,
pub null_pct: f64,
pub rng: StdRng,
}

impl BooleanArrayGenerator {
/// Generate BooleanArray with bit-packed values
pub fn gen_data<D>(&mut self) -> ArrayRef {
// Table of booleans from which to draw (distinct means 1 or 2)
let distinct_booleans: BooleanArray = match self.num_distinct_booleans {
1 => {
let value = self.rng.gen::<bool>();
let mut builder = BooleanBuilder::with_capacity(1);
builder.append_value(value);
builder.finish()
}
2 => {
let mut builder = BooleanBuilder::with_capacity(2);
builder.append_value(true);
builder.append_value(false);
builder.finish()
}
_ => unreachable!(),
};

// Generate indices to select from the distinct booleans
let indices: UInt32Array = (0..self.num_booleans)
.map(|_| {
if self.rng.gen::<f64>() < self.null_pct {
None
} else if self.num_distinct_booleans > 1 {
Some(self.rng.gen_range(0..self.num_distinct_booleans as u32))
} else {
Some(0)
}
})
.collect();

let options = None;

take(&distinct_booleans, &indices, options).unwrap()
}
}
2 changes: 2 additions & 0 deletions test-utils/src/array_gen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
// under the License.

mod binary;
mod boolean;
mod decimal;
mod primitive;
mod random_data;
mod string;

pub use binary::BinaryArrayGenerator;
pub use boolean::BooleanArrayGenerator;
pub use decimal::DecimalArrayGenerator;
pub use primitive::PrimitiveArrayGenerator;
pub use string::StringArrayGenerator;

0 comments on commit 5467a28

Please sign in to comment.