Skip to content

Commit

Permalink
make data_gen_rounds able to set again, and add more tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rachelint committed Oct 8, 2024
1 parent fbf3a6e commit 79b0734
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
25 changes: 21 additions & 4 deletions datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async fn test_group_by_single_int64() {

// Define data generator config
let columns = vec![
ColumnDescr::new("a", DataType::Int64),
ColumnDescr::new("a", DataType::Int32),
ColumnDescr::new("b", DataType::Int64),
ColumnDescr::new("c", DataType::Int64),
];
Expand All @@ -79,10 +79,13 @@ async fn test_group_by_single_int64() {
// Build fuzzer
let fuzzer = builder
.data_gen_config(data_gen_config)
.data_gen_rounds(32)
.add_sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, avg(a) FROM fuzz_table GROUP BY b")
.table_name("fuzz_table")
.build();
Expand All @@ -97,7 +100,7 @@ async fn test_group_by_single_string() {

// Define data generator config
let columns = vec![
ColumnDescr::new("a", DataType::Int64),
ColumnDescr::new("a", DataType::Int32),
ColumnDescr::new("b", DataType::Utf8),
ColumnDescr::new("c", DataType::Int64),
];
Expand All @@ -114,7 +117,14 @@ async fn test_group_by_single_string() {
// Build fuzzer
let fuzzer = builder
.data_gen_config(data_gen_config)
.data_gen_rounds(32)
.add_sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b")
.add_sql("SELECT b, avg(a) FROM fuzz_table GROUP BY b")
.table_name("fuzz_table")
.build();

Expand All @@ -128,7 +138,7 @@ async fn test_group_by_mixed_string_int64() {

// Define data generator config
let columns = vec![
ColumnDescr::new("a", DataType::Int64),
ColumnDescr::new("a", DataType::Int32),
ColumnDescr::new("b", DataType::Utf8),
ColumnDescr::new("c", DataType::Int64),
ColumnDescr::new("d", DataType::Int32),
Expand All @@ -146,7 +156,14 @@ async fn test_group_by_mixed_string_int64() {
// Build fuzzer
let fuzzer = builder
.data_gen_config(data_gen_config)
.add_sql("SELECT b, c, sum(a) FROM fuzz_table GROUP BY b,c")
.data_gen_rounds(32)
.add_sql("SELECT b, c, sum(a) FROM fuzz_table GROUP BY b, c")
.add_sql("SELECT b, c, sum(distinct a) FROM fuzz_table GROUP BY b,c")
.add_sql("SELECT b, c, max(a) FROM fuzz_table GROUP BY b, c")
.add_sql("SELECT b, c, min(a) FROM fuzz_table GROUP BY b, c")
.add_sql("SELECT b, c, count(a) FROM fuzz_table GROUP BY b, c")
.add_sql("SELECT b, c, count(distinct a) FROM fuzz_table GROUP BY b, c")
.add_sql("SELECT b, c, avg(a) FROM fuzz_table GROUP BY b, c")
.table_name("fuzz_table")
.build();

Expand Down
25 changes: 19 additions & 6 deletions datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ use crate::fuzz_cases::aggregation_fuzzer::{
run_sql,
};

/// Rounds to call `generate` of [`DatasetGenerator`]
/// in [`AggregationFuzzer`] `len(sort_keys_set) + 1` datasets
/// will be generated for testing.
const DATA_GEN_ROUNDS: usize = 16;

/// Rounds to call `generate` of [`SessionContextGenerator`]
/// in [`AggregationFuzzer`], `ctx_gen_rounds` random [`SessionContext`]
/// will generated for each dataset for testing.
Expand All @@ -50,6 +45,9 @@ pub struct AggregationFuzzerBuilder {
/// Used to generate `dataset_generator` in [`AggregationFuzzer`],
/// no default, and required to set
data_gen_config: Option<DatasetGeneratorConfig>,

/// See `data_gen_rounds` in [`AggregationFuzzer`], default 16
data_gen_rounds: usize,
}

impl AggregationFuzzerBuilder {
Expand All @@ -58,6 +56,7 @@ impl AggregationFuzzerBuilder {
candidate_sqls: Vec::new(),
table_name: None,
data_gen_config: None,
data_gen_rounds: 16,
}
}

Expand All @@ -76,18 +75,25 @@ impl AggregationFuzzerBuilder {
self
}

pub fn data_gen_rounds(mut self, data_gen_rounds: usize) -> Self {
self.data_gen_rounds = data_gen_rounds;
self
}

pub fn build(self) -> AggregationFuzzer {
assert!(!self.candidate_sqls.is_empty());
let candidate_sqls = self.candidate_sqls;
let table_name = self.table_name.expect("table_name is required");
let data_gen_config = self.data_gen_config.expect("data_gen_config is required");
let data_gen_rounds = self.data_gen_rounds;

let dataset_generator = DatasetGenerator::new(data_gen_config);

AggregationFuzzer {
candidate_sqls,
table_name,
dataset_generator,
data_gen_rounds,
}
}
}
Expand All @@ -110,6 +116,13 @@ pub struct AggregationFuzzer {

/// Dataset generator used to randomly generate datasets
dataset_generator: DatasetGenerator,

/// Rounds to call `generate` of [`DatasetGenerator`],
/// len(sort_keys_set) + 1` datasets will be generated for testing.
///
/// It is suggested to set value 2x or more bigger than num of
/// `candidate_sqls` for better test coverage.
data_gen_rounds: usize,
}

/// Query group including the tested dataset and its sql query
Expand All @@ -124,7 +137,7 @@ impl AggregationFuzzer {
let mut rng = thread_rng();

// Loop to generate datasets and its query
for _ in 0..DATA_GEN_ROUNDS {
for _ in 0..self.data_gen_rounds {
// Generate datasets first
let datasets = self
.dataset_generator
Expand Down

0 comments on commit 79b0734

Please sign in to comment.