Skip to content

Commit

Permalink
fix: make sure JOIN ON expression is boolean type (apache#11423)
Browse files Browse the repository at this point in the history
* fix: make sure JOIN ON expression is boolean type

* Applied to DataFrame

* Update datafusion/optimizer/src/analyzer/type_coercion.rs

Co-authored-by: Andrew Lamb <[email protected]>

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
2 people authored and xinlifoobar committed Jul 17, 2024
1 parent d2e6ac4 commit e1c0042
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 5 deletions.
31 changes: 28 additions & 3 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -896,9 +896,8 @@ impl DataFrame {
join_type: JoinType,
on_exprs: impl IntoIterator<Item = Expr>,
) -> Result<DataFrame> {
let expr = on_exprs.into_iter().reduce(Expr::and);
let plan = LogicalPlanBuilder::from(self.plan)
.join_on(right.plan, join_type, expr)?
.join_on(right.plan, join_type, on_exprs)?
.build()?;
Ok(DataFrame {
session_state: self.session_state,
Expand Down Expand Up @@ -1694,7 +1693,7 @@ mod tests {
use crate::test_util::{register_aggregate_csv, test_table, test_table_with_name};

use arrow::array::{self, Int32Array};
use datafusion_common::{Constraint, Constraints};
use datafusion_common::{Constraint, Constraints, ScalarValue};
use datafusion_common_runtime::SpawnedTask;
use datafusion_expr::{
array_agg, cast, create_udf, expr, lit, BuiltInWindowFunction,
Expand Down Expand Up @@ -2555,6 +2554,32 @@ mod tests {
Ok(())
}

#[tokio::test]
async fn join_on_filter_datatype() -> Result<()> {
let left = test_table_with_name("a").await?.select_columns(&["c1"])?;
let right = test_table_with_name("b").await?.select_columns(&["c1"])?;

// JOIN ON untyped NULL
let join = left.clone().join_on(
right.clone(),
JoinType::Inner,
Some(Expr::Literal(ScalarValue::Null)),
)?;
let expected_plan = "CrossJoin:\
\n TableScan: a projection=[c1], full_filters=[Boolean(NULL)]\
\n TableScan: b projection=[c1]";
assert_eq!(expected_plan, format!("{:?}", join.into_optimized_plan()?));

// JOIN ON expression must be boolean type
let join = left.join_on(right, JoinType::Inner, Some(lit("TRUE")))?;
let expected = join.into_optimized_plan().unwrap_err();
assert_eq!(
expected.strip_backtrace(),
"type_coercion\ncaused by\nError during planning: Join condition must be boolean type, but got Utf8"
);
Ok(())
}

#[tokio::test]
async fn join_ambiguous_filter() -> Result<()> {
let left = test_table_with_name("a")
Expand Down
17 changes: 16 additions & 1 deletion datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl<'a> TypeCoercionRewriter<'a> {
Self { schema }
}

/// Coerce join equality expressions
/// Coerce join equality expressions and join filter
///
/// Joins must be treated specially as their equality expressions are stored
/// as a parallel list of left and right expressions, rather than a single
Expand All @@ -151,9 +151,24 @@ impl<'a> TypeCoercionRewriter<'a> {
})
.collect::<Result<Vec<_>>>()?;

// Join filter must be boolean
join.filter = join
.filter
.map(|expr| self.coerce_join_filter(expr))
.transpose()?;

Ok(LogicalPlan::Join(join))
}

fn coerce_join_filter(&self, expr: Expr) -> Result<Expr> {
let expr_type = expr.get_type(self.schema)?;
match expr_type {
DataType::Boolean => Ok(expr),
DataType::Null => expr.cast_to(&DataType::Boolean, self.schema),
other => plan_err!("Join condition must be boolean type, but got {other:?}"),
}
}

fn coerce_binary_op(
&self,
left: Expr,
Expand Down
12 changes: 11 additions & 1 deletion datafusion/sqllogictest/test_files/join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,6 @@ statement ok
DROP TABLE department


# Test issue: https://github.com/apache/datafusion/issues/11269
statement ok
CREATE TABLE t1 (v0 BIGINT) AS VALUES (-503661263);

Expand All @@ -998,11 +997,22 @@ CREATE TABLE t2 (v0 DOUBLE) AS VALUES (-1.663563947387);
statement ok
CREATE TABLE t3 (v0 DOUBLE) AS VALUES (0.05112015193508901);

# Test issue: https://github.com/apache/datafusion/issues/11269
query RR
SELECT t3.v0, t2.v0 FROM t1,t2,t3 WHERE t3.v0 >= t1.v0;
----
0.051120151935 -1.663563947387

# Test issue: https://github.com/apache/datafusion/issues/11414
query IRR
SELECT * FROM t1 INNER JOIN t2 ON NULL RIGHT JOIN t3 ON TRUE;
----
NULL NULL 0.051120151935

# ON expression must be boolean type
query error DataFusion error: type_coercion\ncaused by\nError during planning: Join condition must be boolean type, but got Utf8
SELECT * FROM t1 INNER JOIN t2 ON 'TRUE'

statement ok
DROP TABLE t1;

Expand Down

0 comments on commit e1c0042

Please sign in to comment.