diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt index 4c58c56a87a1..48e79621d850 100644 --- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt +++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt @@ -65,11 +65,11 @@ SortPreservingMergeExec: [a@0 ASC NULLS LAST,b@2 ASC NULLS LAST] ----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true -// Cast to larger types as well as preserving ordering -// doesn't invalidate lexicographical ordering. -// Hence '[CAST(a AS BIGINT) AS a_big ASC, b ASC]' -// is valid for the given ordering: '[a ASC, b ASC]'. -// See discussion for rationale: https://github.com/apache/arrow-datafusion/issues/8838#issue-2077714891 +# Cast to larger types as well as preserving ordering +# doesn't invalidate lexicographical ordering. +# Hence '[CAST(a AS BIGINT) AS a_big ASC, b ASC]' +# is valid for the given ordering: '[a ASC, b ASC]'. +# See discussion for rationale: https://github.com/apache/arrow-datafusion/issues/8838#issue-2077714891 query TT EXPLAIN SELECT a, CAST(a AS BIGINT) AS a_big, b @@ -117,6 +117,8 @@ ProjectionExec: expr=[a@0 as a, a@0 as a_big, b@1 as b] # test for cast Utf8 +# (must actually sort as the sort order for a number cast to utf8 is different than for int) +# See discussion: https://github.com/apache/arrow-datafusion/pull/9127#discussion_r1492336709 query TT EXPLAIN SELECT @@ -135,3 +137,38 @@ SortPreservingMergeExec: [a_str@0 ASC NULLS LAST,b@1 ASC NULLS LAST] ----ProjectionExec: expr=[CAST(a@0 AS Utf8) as a_str, b@1 as b] ------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 --------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true + +# We cannot determine a+b is ordered from the +# invariant [a ASC, b ASC] is satisfied. Hence +# we should see a SortExec with a+b ASC in the plan. +query TT +EXPLAIN +SELECT a, b +FROM multiple_ordered_table +ORDER BY a + b ASC; +---- +logical_plan +Sort: multiple_ordered_table.a + multiple_ordered_table.b ASC NULLS LAST +--TableScan: multiple_ordered_table projection=[a, b] +physical_plan +SortExec: expr=[a@0 + b@1 ASC NULLS LAST] +--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true + +# With similar reasoning above. It is not guaranteed sum_expr is ordered +# Hence we should see a SortExec with sum_expr ASC in the plan. +query TT +EXPLAIN +SELECT CAST(a+b AS BIGINT) sum_expr, a, b +FROM multiple_ordered_table +ORDER BY sum_expr ASC; +---- +logical_plan +Sort: sum_expr ASC NULLS LAST +--Projection: CAST(multiple_ordered_table.a + multiple_ordered_table.b AS Int64) AS sum_expr, multiple_ordered_table.a, multiple_ordered_table.b +----TableScan: multiple_ordered_table projection=[a, b] +physical_plan +SortPreservingMergeExec: [sum_expr@0 ASC NULLS LAST] +--SortExec: expr=[sum_expr@0 ASC NULLS LAST] +----ProjectionExec: expr=[CAST(a@0 + b@1 AS Int64) as sum_expr, a@0 as a, b@1 as b] +------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true