From 1a4af6b1167ea76814fe597bbbdb952424cf806b Mon Sep 17 00:00:00 2001 From: Eric Fredine Date: Mon, 1 Jul 2024 08:24:48 -0700 Subject: [PATCH] Adds support for Dictionary statistics from parquet data pages. (#11195) Co-authored-by: Eric Fredine --- .../core/src/datasource/physical_plan/parquet/statistics.rs | 3 +++ datafusion/core/tests/parquet/arrow_statistics.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index f68334ec24ca1..fcecae27a52fb 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -846,6 +846,9 @@ macro_rules! get_data_page_statistics { }) }).flatten().collect::>(), ))), + Some(DataType::Dictionary(_, value_type)) => { + [<$stat_type_prefix:lower _ page_statistics>](Some(value_type), $iterator) + }, Some(DataType::Timestamp(unit, timezone)) => { let iter = [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten(); Ok(match unit { diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index b68ebffa10455..ea83c1fa788d2 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -1752,7 +1752,7 @@ async fn test_dictionary() { expected_null_counts: UInt64Array::from(vec![1, 0]), expected_row_counts: Some(UInt64Array::from(vec![5, 2])), column_name: "string_dict_i32", - check: Check::RowGroup, + check: Check::Both, } .run();