From 6c5823ec25a0fc14a49922b10a3d274e072c4bd6 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 1 Nov 2024 10:23:23 -0500 Subject: [PATCH] Support `DictionaryArray` in `OVER` clause (#13153) * implement target type selection for range queries on dictionary data types Fixes #13151 * Update type_coercion.rs * Add test * query I? --- .../optimizer/src/analyzer/type_coercion.rs | 28 +++++++++++-------- .../sqllogictest/test_files/dictionary.slt | 6 ++++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 5d33b58a0241..9793c4c5490f 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -688,6 +688,21 @@ fn coerce_frame_bound( } } +fn extract_window_frame_target_type(col_type: &DataType) -> Result { + if col_type.is_numeric() + || is_utf8_or_large_utf8(col_type) + || matches!(col_type, DataType::Null) + { + Ok(col_type.clone()) + } else if is_datetime(col_type) { + Ok(DataType::Interval(IntervalUnit::MonthDayNano)) + } else if let DataType::Dictionary(_, value_type) = col_type { + extract_window_frame_target_type(value_type) + } else { + return internal_err!("Cannot run range queries on datatype: {col_type:?}"); + } +} + // Coerces the given `window_frame` to use appropriate natural types. // For example, ROWS and GROUPS frames use `UInt64` during calculations. fn coerce_window_frame( @@ -703,18 +718,7 @@ fn coerce_window_frame( .map(|s| s.expr.get_type(schema)) .transpose()?; if let Some(col_type) = current_types { - if col_type.is_numeric() - || is_utf8_or_large_utf8(&col_type) - || matches!(col_type, DataType::Null) - { - col_type - } else if is_datetime(&col_type) { - DataType::Interval(IntervalUnit::MonthDayNano) - } else { - return internal_err!( - "Cannot run range queries on datatype: {col_type:?}" - ); - } + extract_window_frame_target_type(&col_type)? } else { return internal_err!("ORDER BY column cannot be empty"); } diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt index 176331f570b0..b6923fcc944d 100644 --- a/datafusion/sqllogictest/test_files/dictionary.slt +++ b/datafusion/sqllogictest/test_files/dictionary.slt @@ -444,3 +444,9 @@ physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column2@1 = 1 03)----MemoryExec: partitions=1, partition_sizes=[1] + +# Window Functions +query I +select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, Utf8)')); +---- +1