Skip to content

Commit

Permalink
support unnest as subexpression (#9592)
Browse files Browse the repository at this point in the history
* support unnest as subexpression and multiple unnests in projection

* update alias unnest

* remove unnest multiple columns

* rename mulitple unnest columns to multiple unnest functions in selection

* fix: move column name to if branch

---------

Co-authored-by: jefffffyang <[email protected]>
  • Loading branch information
YjyJeff and jefffffyang authored Mar 14, 2024
1 parent c2787c7 commit 8b4a8e6
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 16 deletions.
20 changes: 18 additions & 2 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ use crate::{utils, LogicalPlan, Projection, Subquery};
use arrow::compute::can_cast_types;
use arrow::datatypes::{DataType, Field};
use datafusion_common::{
internal_err, plan_datafusion_err, plan_err, Column, DFField, ExprSchema, Result,
internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFField,
ExprSchema, Result,
};
use std::collections::HashMap;
use std::sync::Arc;
Expand Down Expand Up @@ -113,7 +114,22 @@ impl ExprSchemable for Expr {
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;
Ok(arg_data_types[0].clone())
let arg_data_type = arg_data_types[0].clone();
// Unnest's output type is the inner type of the list
match arg_data_type{
DataType::List(field) | DataType::LargeList(field) | DataType::FixedSizeList(field, _) =>{
Ok(field.data_type().clone())
}
DataType::Struct(_) => {
not_impl_err!("unnest() does not support struct yet")
}
DataType::Null => {
not_impl_err!("unnest() does not support null yet")
}
_ => {
plan_err!("unnest() can only be applied to array, struct and null")
}
}
}
Expr::ScalarFunction(ScalarFunction { func_def, args }) => {
let arg_data_types = args
Expand Down
52 changes: 40 additions & 12 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::utils::{
resolve_columns, resolve_positions_to_exprs,
};

use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
use datafusion_common::{Column, UnnestOptions};
use datafusion_expr::expr::{Alias, Unnest};
Expand Down Expand Up @@ -276,33 +277,59 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Ok(plan)
}

// Try converting Expr::Unnest to LogicalPlan::Unnest if possible, otherwise do the final projection
/// Try converting Expr(Unnest(Expr)) to Projection/Unnest/Projection
pub(super) fn try_process_unnest(
&self,
input: LogicalPlan,
select_exprs: Vec<Expr>,
) -> Result<LogicalPlan> {
let mut unnest_columns = vec![];
// Map unnest expressions to their argument
let projection_exprs = select_exprs
let mut inner_projection_exprs = vec![];

let outer_projection_exprs = select_exprs
.into_iter()
.map(|expr| {
if let Expr::Unnest(Unnest { ref exprs }) = expr {
let column_name = expr.display_name()?;
unnest_columns.push(column_name.clone());
// Add alias for the argument expression, to avoid naming conflicts with other expressions
// in the select list. For example: `select unnest(col1), col1 from t`.
Ok(exprs[0].clone().alias(column_name))
let Transformed {
data: transformed_expr,
transformed,
tnr: _,
} = expr.transform_up_mut(&mut |expr: Expr| {
if let Expr::Unnest(Unnest { ref exprs }) = expr {
let column_name = expr.display_name()?;
unnest_columns.push(column_name.clone());
// Add alias for the argument expression, to avoid naming conflicts with other expressions
// in the select list. For example: `select unnest(col1), col1 from t`.
inner_projection_exprs
.push(exprs[0].clone().alias(column_name.clone()));
Ok(Transformed::yes(Expr::Column(Column::from_name(
column_name,
))))
} else {
Ok(Transformed::no(expr))
}
})?;

if !transformed {
if matches!(&transformed_expr, Expr::Column(_)) {
inner_projection_exprs.push(transformed_expr.clone());
Ok(transformed_expr)
} else {
// We need to evaluate the expr in the inner projection,
// outer projection just select its name
let column_name = transformed_expr.display_name()?;
inner_projection_exprs.push(transformed_expr);
Ok(Expr::Column(Column::from_name(column_name)))
}
} else {
Ok(expr)
Ok(transformed_expr)
}
})
.collect::<Result<Vec<_>>>()?;

// Do the final projection
if unnest_columns.is_empty() {
LogicalPlanBuilder::from(input)
.project(projection_exprs)?
.project(inner_projection_exprs)?
.build()
} else {
if unnest_columns.len() > 1 {
Expand All @@ -312,8 +339,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// Set preserve_nulls to false to ensure compatibility with DuckDB and PostgreSQL
let unnest_options = UnnestOptions::new().with_preserve_nulls(false);
LogicalPlanBuilder::from(input)
.project(projection_exprs)?
.project(inner_projection_exprs)?
.unnest_column_with_options(unnest_column, unnest_options)?
.project(outer_projection_exprs)?
.build()
}
}
Expand Down
67 changes: 65 additions & 2 deletions datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,10 @@ select array_remove(column1, 4), unnest(column2), column3 * 10 from unnest_table
query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null
select unnest(column3) from unnest_table;

## Unnest multiple columns
## Multiple unnest functions in selection
query error DataFusion error: This feature is not implemented: Only support single unnest expression for now
select unnest(column1), unnest(column2) from unnest_table;


## Unnest scalar in select list
query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null
select unnest(1);
Expand Down Expand Up @@ -254,5 +253,69 @@ select * from unnest([1,2,(select sum(column3) from unnest_table)]);
2
10

## Unnest is the sub-expression of other expression
query II
select unnest(column1) as a, column3 from unnest_table;
----
1 1
2 1
3 1
4 2
5 2
6 3
12 NULL

query BI
select unnest(column1) is not null, column3 from unnest_table;
----
true 1
true 1
true 1
true 2
true 2
true 3
true NULL

query II
select -unnest(column1) as a, column3 from unnest_table;
----
-1 1
-2 1
-3 1
-4 2
-5 2
-6 3
-12 NULL

query II
select unnest(array_remove(column1, 3)) as a, column3 from unnest_table;
----
1 1
2 1
4 2
5 2
6 3
12 NULL

query II
select unnest(array_remove(column1, 3)) as c1, column3 from unnest_table order by c1 desc, column3;
----
12 NULL
6 3
5 2
4 2
2 1
1 1

query II
select unnest(array_remove(column1, 3)) - 1 as c1, column3 from unnest_table;
----
0 1
1 1
3 2
4 2
5 3
11 NULL

statement ok
drop table unnest_table;

0 comments on commit 8b4a8e6

Please sign in to comment.