From be19afca4ca93cf7ae0305aafec10b29a8140a58 Mon Sep 17 00:00:00 2001 From: wiedld Date: Mon, 11 Nov 2024 02:23:23 -0800 Subject: [PATCH] Fix ExprSchema extraction of metadata for Cast expressions. (#13305) * test(12733): reproducers for schema bugs * fix(12733): properly extract field metadata from Cast expr * test(12733): update metadata preservation test, for new contract (a.k.a. cast preserves field metadata) --- datafusion/expr/src/expr_schema.rs | 7 +- .../sqllogictest/test_files/metadata.slt | 69 +++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 07a36672f272..f0a6ed89e6e9 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -347,6 +347,7 @@ impl ExprSchemable for Expr { match self { Expr::Column(c) => Ok(schema.metadata(c)?.clone()), Expr::Alias(Alias { expr, .. }) => expr.metadata(schema), + Expr::Cast(Cast { expr, .. }) => expr.metadata(schema), _ => Ok(HashMap::new()), } } @@ -681,13 +682,11 @@ mod tests { .with_data_type(DataType::Int32) .with_metadata(meta.clone()); - // col and alias should be metadata-preserving + // col, alias, and cast should be metadata-preserving assert_eq!(meta, expr.metadata(&schema).unwrap()); assert_eq!(meta, expr.clone().alias("bar").metadata(&schema).unwrap()); - - // cast should drop input metadata since the type has changed assert_eq!( - HashMap::new(), + meta, expr.clone() .cast_to(&DataType::Int64, &schema) .unwrap() diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index 8f787254c096..7252c84caf14 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -168,5 +168,74 @@ LIMIT 1; 2020-09-08T13:42:29.190855123Z + +# distinct (aggregate) alone +query P +SELECT + DISTINCT ts as dist +FROM table_with_metadata; +---- +2020-09-08T13:42:29.190855123 + +# cast alone +query D +SELECT + ts::DATE as casted +FROM table_with_metadata; +---- +2020-09-08 +2020-09-08 +2020-09-08 + +# Regression test: distinct with cast +query D +SELECT DISTINCT (ts::DATE) AS dist + FROM table_with_metadata; +---- +2020-09-08 + + + +# count distinct with group by +query II +SELECT + id AS grp, + COUNT(DISTINCT nonnull_name) as dist +FROM table_with_metadata +GROUP BY grp +order by 1 asc nulls last; +---- +1 1 +3 1 +NULL 1 + +# count (not distinct) & cast, with group by +query TI +SELECT + CAST(id AS TEXT) AS grp, + COUNT(nonnull_name) as dist +FROM table_with_metadata +GROUP BY grp +order by 1 asc nulls last; +---- +1 1 +3 1 +NULL 1 + +# Regression test: count distinct & cast, with group by +query TI +SELECT + CAST(id AS TEXT) AS grp, + COUNT(DISTINCT nonnull_name) as dist +FROM table_with_metadata +GROUP BY grp +order by 1 asc nulls last; +---- +1 1 +3 1 +NULL 1 + + + statement ok drop table table_with_metadata;