Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Implicitly plan UNNEST as lateral #13695

Merged
merged 8 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions datafusion/sql/src/relation/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ pub(crate) fn is_lateral(factor: &TableFactor) -> bool {
match factor {
TableFactor::Derived { lateral, .. } => *lateral,
TableFactor::Function { lateral, .. } => *lateral,
TableFactor::UNNEST { .. } => true,
_ => false,
}
}
Expand Down
6 changes: 6 additions & 0 deletions datafusion/sql/src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context.set_outer_query_schema(old_query_schema);
planner_context.set_outer_from_schema(Some(old_from_schema));

// We can omit the subquery wrapper if there are no columns
// referencing the outer scope.
if outer_ref_columns.is_empty() {
return Ok(plan);
}

match plan {
LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => {
subquery_alias(
Expand Down
20 changes: 17 additions & 3 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3129,9 +3129,8 @@ fn lateral_constant() {
\n Cross Join: \
\n TableScan: j1\
\n SubqueryAlias: j2\
\n Subquery:\
\n Projection: Int64(1)\
\n EmptyRelation";
\n Projection: Int64(1)\
\n EmptyRelation";
quick_test(sql, expected);
}

Expand Down Expand Up @@ -3230,6 +3229,21 @@ fn lateral_nested_left_join() {
quick_test(sql, expected);
}

#[test]
fn lateral_unnest() {
let sql = "SELECT * from unnest_table u, unnest(u.array_col)";
let expected = "Projection: *\
\n Cross Join: \
\n SubqueryAlias: u\
\n TableScan: unnest_table\
\n Subquery:\
\n Projection: __unnest_placeholder(outer_ref(u.array_col),depth=1) AS UNNEST(outer_ref(u.array_col))\
\n Unnest: lists[__unnest_placeholder(outer_ref(u.array_col))|depth=1] structs[]\
\n Projection: outer_ref(u.array_col) AS __unnest_placeholder(outer_ref(u.array_col))\
\n EmptyRelation";
quick_test(sql, expected);
}

#[test]
fn hive_aggregate_with_filter() -> Result<()> {
let dialect = &HiveDialect {};
Expand Down
20 changes: 12 additions & 8 deletions datafusion/sqllogictest/test_files/joins.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4058,10 +4058,12 @@ logical_plan
03)----TableScan: join_t1 projection=[t1_id, t1_name]
04)--SubqueryAlias: series
05)----Subquery:
06)------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS i
07)--------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[]
08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
09)------------EmptyRelation
06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int))) AS i
07)--------Subquery:
08)----------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int)))
09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[]
10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
11)----------------EmptyRelation
physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(UInt32, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })


Expand All @@ -4081,10 +4083,12 @@ logical_plan
03)----TableScan: join_t1 projection=[t1_id, t1_name]
04)--SubqueryAlias: series
05)----Subquery:
06)------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS i
07)--------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[]
08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
09)------------EmptyRelation
06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int))) AS i
07)--------Subquery:
08)----------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int)))
09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[]
10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
11)----------------EmptyRelation
physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(UInt32, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })


Expand Down
41 changes: 41 additions & 0 deletions datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,47 @@ select count(*) from (select unnest(range(0, 100000)) id) t inner join (select u
----
100000

# Test implicit LATERAL support for UNNEST
# Issue: https://github.com/apache/datafusion/issues/13659
# TODO: https://github.com/apache/datafusion/issues/10048
query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
select * from unnest_table u, unnest(u.column1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to add a logical plan test like

explain select * from unnest_table u, unnest(u.column1);

You can refer to how other LATERAL tests do it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's good to keep this one and another planning one in the sqllogictest (for end-to-end SQL test).

Just like the following case:

# Test CROSS JOIN LATERAL syntax (planning)
query TT
explain select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);

and

# Test CROSS JOIN LATERAL syntax (execution)
# TODO: https://github.com/apache/datafusion/issues/10048
query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(UInt32, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);


# Test implicit LATERAL support for UNNEST (INNER JOIN)
query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1;

# Test implicit LATERAL planning for UNNEST
query TT
explain select * from unnest_table u, unnest(u.column1);
----
logical_plan
01)Cross Join:
02)--SubqueryAlias: u
03)----TableScan: unnest_table projection=[column1, column2, column3, column4, column5]
04)--Subquery:
05)----Projection: __unnest_placeholder(outer_ref(u.column1),depth=1) AS UNNEST(outer_ref(u.column1))
06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
07)--------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
08)----------EmptyRelation
physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), Column { relation: Some(Bare { table: "u" }), name: "column1" })

# Test implicit LATERAL planning for UNNEST (INNER JOIN)
query TT
explain select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1;
----
logical_plan
01)Inner Join: u.column3 = t.column1
02)--SubqueryAlias: u
03)----TableScan: unnest_table projection=[column1, column2, column3, column4, column5]
04)--SubqueryAlias: t
05)----Subquery:
06)------Projection: __unnest_placeholder(outer_ref(u.column1),depth=1) AS column1
07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
08)----------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
09)------------EmptyRelation
physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), Column { relation: Some(Bare { table: "u" }), name: "column1" })


## Unnest in subquery
query IIII
Expand Down
Loading