Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect ... LIKE '%' simplification with NULLs #13259

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 51 additions & 16 deletions datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1476,13 +1476,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
negated,
escape_char: _,
case_insensitive: _,
}) if !is_null(&expr)
&& matches!(
pattern.as_ref(),
Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%"
) =>
}) if matches!(
pattern.as_ref(),
Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%"
) || matches!(
pattern.as_ref(),
Expr::Literal(ScalarValue::LargeUtf8(Some(pattern_str))) if pattern_str == "%"
) || matches!(
pattern.as_ref(),
Expr::Literal(ScalarValue::Utf8View(Some(pattern_str))) if pattern_str == "%"
) =>
{
Transformed::yes(lit(!negated))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was incorrect

// exp LIKE '%' is
// - when exp is not NULL, it's true
// - when exp is NULL, it's NULL
// exp NOT LIKE '%' is
// - when exp is not NULL, it's false
// - when exp is NULL, it's NULL
Transformed::yes(Expr::Case(Case {
expr: Some(Box::new(Expr::IsNotNull(expr))),
when_then_expr: vec![(Box::new(lit(true)), Box::new(lit(!negated)))],
else_expr: None,
}))
}

// a is not null/unknown --> true (if a is not nullable)
Expand Down Expand Up @@ -2777,10 +2792,22 @@ mod tests {
assert_no_change(regex_match(col("c1"), lit("f_o")));

// empty cases
assert_change(regex_match(col("c1"), lit("")), lit(true));
assert_change(regex_not_match(col("c1"), lit("")), lit(false));
assert_change(regex_imatch(col("c1"), lit("")), lit(true));
assert_change(regex_not_imatch(col("c1"), lit("")), lit(false));
assert_change(
regex_match(col("c1"), lit("")),
if_not_null(col("c1"), true),
);
assert_change(
regex_not_match(col("c1"), lit("")),
if_not_null(col("c1"), false),
);
assert_change(
regex_imatch(col("c1"), lit("")),
if_not_null(col("c1"), true),
);
assert_change(
regex_not_imatch(col("c1"), lit("")),
if_not_null(col("c1"), false),
);

// single character
assert_change(regex_match(col("c1"), lit("x")), like(col("c1"), "%x%"));
Expand Down Expand Up @@ -3606,20 +3633,20 @@ mod tests {

#[test]
fn test_like_and_ilke() {
// test non-null values
// LIKE '%'
let expr = like(col("c1"), "%");
assert_eq!(simplify(expr), lit(true));
assert_eq!(simplify(expr), if_not_null(col("c1"), true));

let expr = not_like(col("c1"), "%");
assert_eq!(simplify(expr), lit(false));
assert_eq!(simplify(expr), if_not_null(col("c1"), false));

let expr = ilike(col("c1"), "%");
assert_eq!(simplify(expr), lit(true));
assert_eq!(simplify(expr), if_not_null(col("c1"), true));

let expr = not_ilike(col("c1"), "%");
assert_eq!(simplify(expr), lit(false));
assert_eq!(simplify(expr), if_not_null(col("c1"), false));

// test null values
// null_constant LIKE '%'
let null = lit(ScalarValue::Utf8(None));
let expr = like(null.clone(), "%");
assert_eq!(simplify(expr), lit_bool_null());
Expand Down Expand Up @@ -4043,4 +4070,12 @@ mod tests {
);
}
}

fn if_not_null(expr: Expr, then: bool) -> Expr {
Expr::Case(Case {
expr: Some(expr.is_not_null().into()),
when_then_expr: vec![(lit(true).into(), lit(then).into())],
else_expr: None,
})
}
}
8 changes: 8 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_literal.slt
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,11 @@ query TT
select ' ', '|'
----
|

query BBB
SELECT
NULL LIKE '%',
'' LIKE '%',
'a' LIKE '%'
----
NULL true true
50 changes: 50 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_query.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,56 @@ NULL NULL NULL NULL NULL
#Raphael datafusionДатаФусион false false false false
#NULL NULL NULL NULL NULL NULL

# TODO (https://github.com/apache/datafusion/issues/12637) uncomment additional test projections
query TTBB
SELECT ascii_1, unicode_1,
ascii_1 LIKE '%' AS ascii_1_like_percent,
unicode_1 LIKE '%' AS unicode_1_like_percent
-- ascii_1 LIKE '%%' AS ascii_1_like_percent_percent, -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637
-- unicode_1 LIKE '%%' AS unicode_1_like_percent_percent -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637
FROM test_basic_operator
----
Andrew datafusion📊🔥 true true
Xiangpeng datafusion数据融合 true true
Raphael datafusionДатаФусион true true
under_score un iść core true true
percent pan Tadeusz ma iść w kąt true true
(empty) (empty) true true
NULL NULL NULL NULL
NULL NULL NULL NULL

# TODO (https://github.com/apache/datafusion/issues/12637) uncomment additional test projections
query TTBB
SELECT ascii_1, unicode_1,
ascii_1 NOT LIKE '%' AS ascii_1_not_like_percent,
unicode_1 NOT LIKE '%' AS unicode_1_not_like_percent
-- ascii_1 NOT LIKE '%%' AS ascii_1_not_like_percent_percent, -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637
-- unicode_1 NOT LIKE '%%' AS unicode_1_not_like_percent_percent -- TODO enable after fixing https://github.com/apache/datafusion/issues/12637
FROM test_basic_operator
----
Andrew datafusion📊🔥 false false
Xiangpeng datafusion数据融合 false false
Raphael datafusionДатаФусион false false
under_score un iść core false false
percent pan Tadeusz ma iść w kąt false false
(empty) (empty) false false
NULL NULL NULL NULL
NULL NULL NULL NULL

query T
SELECT ascii_1 FROM test_basic_operator WHERE ascii_1 LIKE '%'
----
Andrew
Xiangpeng
Raphael
under_score
percent
(empty)

query T
SELECT ascii_1 FROM test_basic_operator WHERE ascii_1 NOT LIKE '%'
----

# Test pattern without wildcard characters
query TTBBBB
select ascii_1, unicode_1,
Expand Down