Skip to content

Commit

Permalink
Support DictionaryArray in OVER clause (apache#13153)
Browse files Browse the repository at this point in the history
* implement target type selection for range queries on dictionary data types

Fixes apache#13151

* Update type_coercion.rs

* Add test

* query I?
  • Loading branch information
adriangb authored Nov 1, 2024
1 parent d2a15b3 commit 6c5823e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
28 changes: 16 additions & 12 deletions datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,21 @@ fn coerce_frame_bound(
}
}

fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
if col_type.is_numeric()
|| is_utf8_or_large_utf8(col_type)
|| matches!(col_type, DataType::Null)
{
Ok(col_type.clone())
} else if is_datetime(col_type) {
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
} else if let DataType::Dictionary(_, value_type) = col_type {
extract_window_frame_target_type(value_type)
} else {
return internal_err!("Cannot run range queries on datatype: {col_type:?}");
}
}

// Coerces the given `window_frame` to use appropriate natural types.
// For example, ROWS and GROUPS frames use `UInt64` during calculations.
fn coerce_window_frame(
Expand All @@ -703,18 +718,7 @@ fn coerce_window_frame(
.map(|s| s.expr.get_type(schema))
.transpose()?;
if let Some(col_type) = current_types {
if col_type.is_numeric()
|| is_utf8_or_large_utf8(&col_type)
|| matches!(col_type, DataType::Null)
{
col_type
} else if is_datetime(&col_type) {
DataType::Interval(IntervalUnit::MonthDayNano)
} else {
return internal_err!(
"Cannot run range queries on datatype: {col_type:?}"
);
}
extract_window_frame_target_type(&col_type)?
} else {
return internal_err!("ORDER BY column cannot be empty");
}
Expand Down
6 changes: 6 additions & 0 deletions datafusion/sqllogictest/test_files/dictionary.slt
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,9 @@ physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: column2@1 = 1
03)----MemoryExec: partitions=1, partition_sizes=[1]

# Window Functions
query I
select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, Utf8)'));
----
1

0 comments on commit 6c5823e

Please sign in to comment.