From 79b0734df6c8d16f244b0c1db956e3567bca4534 Mon Sep 17 00:00:00 2001 From: kamille Date: Wed, 9 Oct 2024 01:51:11 +0800 Subject: [PATCH] make `data_gen_rounds` able to set again, and add more tests. --- .../core/tests/fuzz_cases/aggregate_fuzz.rs | 25 ++++++++++++++++--- .../fuzz_cases/aggregation_fuzzer/fuzzer.rs | 25 ++++++++++++++----- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 0aad4b502c0e..c0fe3851c059 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -62,7 +62,7 @@ async fn test_group_by_single_int64() { // Define data generator config let columns = vec![ - ColumnDescr::new("a", DataType::Int64), + ColumnDescr::new("a", DataType::Int32), ColumnDescr::new("b", DataType::Int64), ColumnDescr::new("c", DataType::Int64), ]; @@ -79,10 +79,13 @@ async fn test_group_by_single_int64() { // Build fuzzer let fuzzer = builder .data_gen_config(data_gen_config) + .data_gen_rounds(32) .add_sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b") .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b") .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b") .add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b") .add_sql("SELECT b, avg(a) FROM fuzz_table GROUP BY b") .table_name("fuzz_table") .build(); @@ -97,7 +100,7 @@ async fn test_group_by_single_string() { // Define data generator config let columns = vec![ - ColumnDescr::new("a", DataType::Int64), + ColumnDescr::new("a", DataType::Int32), ColumnDescr::new("b", DataType::Utf8), ColumnDescr::new("c", DataType::Int64), ]; @@ -114,7 +117,14 @@ async fn test_group_by_single_string() { // Build fuzzer let fuzzer = builder .data_gen_config(data_gen_config) + .data_gen_rounds(32) .add_sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b") + .add_sql("SELECT b, avg(a) FROM fuzz_table GROUP BY b") .table_name("fuzz_table") .build(); @@ -128,7 +138,7 @@ async fn test_group_by_mixed_string_int64() { // Define data generator config let columns = vec![ - ColumnDescr::new("a", DataType::Int64), + ColumnDescr::new("a", DataType::Int32), ColumnDescr::new("b", DataType::Utf8), ColumnDescr::new("c", DataType::Int64), ColumnDescr::new("d", DataType::Int32), @@ -146,7 +156,14 @@ async fn test_group_by_mixed_string_int64() { // Build fuzzer let fuzzer = builder .data_gen_config(data_gen_config) - .add_sql("SELECT b, c, sum(a) FROM fuzz_table GROUP BY b,c") + .data_gen_rounds(32) + .add_sql("SELECT b, c, sum(a) FROM fuzz_table GROUP BY b, c") + .add_sql("SELECT b, c, sum(distinct a) FROM fuzz_table GROUP BY b,c") + .add_sql("SELECT b, c, max(a) FROM fuzz_table GROUP BY b, c") + .add_sql("SELECT b, c, min(a) FROM fuzz_table GROUP BY b, c") + .add_sql("SELECT b, c, count(a) FROM fuzz_table GROUP BY b, c") + .add_sql("SELECT b, c, count(distinct a) FROM fuzz_table GROUP BY b, c") + .add_sql("SELECT b, c, avg(a) FROM fuzz_table GROUP BY b, c") .table_name("fuzz_table") .build(); diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index 3f5aa3577cc7..c9a28d9f2602 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -29,11 +29,6 @@ use crate::fuzz_cases::aggregation_fuzzer::{ run_sql, }; -/// Rounds to call `generate` of [`DatasetGenerator`] -/// in [`AggregationFuzzer`] `len(sort_keys_set) + 1` datasets -/// will be generated for testing. -const DATA_GEN_ROUNDS: usize = 16; - /// Rounds to call `generate` of [`SessionContextGenerator`] /// in [`AggregationFuzzer`], `ctx_gen_rounds` random [`SessionContext`] /// will generated for each dataset for testing. @@ -50,6 +45,9 @@ pub struct AggregationFuzzerBuilder { /// Used to generate `dataset_generator` in [`AggregationFuzzer`], /// no default, and required to set data_gen_config: Option, + + /// See `data_gen_rounds` in [`AggregationFuzzer`], default 16 + data_gen_rounds: usize, } impl AggregationFuzzerBuilder { @@ -58,6 +56,7 @@ impl AggregationFuzzerBuilder { candidate_sqls: Vec::new(), table_name: None, data_gen_config: None, + data_gen_rounds: 16, } } @@ -76,11 +75,17 @@ impl AggregationFuzzerBuilder { self } + pub fn data_gen_rounds(mut self, data_gen_rounds: usize) -> Self { + self.data_gen_rounds = data_gen_rounds; + self + } + pub fn build(self) -> AggregationFuzzer { assert!(!self.candidate_sqls.is_empty()); let candidate_sqls = self.candidate_sqls; let table_name = self.table_name.expect("table_name is required"); let data_gen_config = self.data_gen_config.expect("data_gen_config is required"); + let data_gen_rounds = self.data_gen_rounds; let dataset_generator = DatasetGenerator::new(data_gen_config); @@ -88,6 +93,7 @@ impl AggregationFuzzerBuilder { candidate_sqls, table_name, dataset_generator, + data_gen_rounds, } } } @@ -110,6 +116,13 @@ pub struct AggregationFuzzer { /// Dataset generator used to randomly generate datasets dataset_generator: DatasetGenerator, + + /// Rounds to call `generate` of [`DatasetGenerator`], + /// len(sort_keys_set) + 1` datasets will be generated for testing. + /// + /// It is suggested to set value 2x or more bigger than num of + /// `candidate_sqls` for better test coverage. + data_gen_rounds: usize, } /// Query group including the tested dataset and its sql query @@ -124,7 +137,7 @@ impl AggregationFuzzer { let mut rng = thread_rng(); // Loop to generate datasets and its query - for _ in 0..DATA_GEN_ROUNDS { + for _ in 0..self.data_gen_rounds { // Generate datasets first let datasets = self .dataset_generator