-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify EXPR LIKE 'constant'
to expr = 'constant'
#13061
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,7 @@ use datafusion_common::{ | |
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue}; | ||
use datafusion_expr::simplify::ExprSimplifyResult; | ||
use datafusion_expr::{ | ||
and, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility, | ||
and, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Operator, Volatility, | ||
WindowFunctionDefinition, | ||
}; | ||
use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; | ||
|
@@ -1470,19 +1470,31 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { | |
}) => Transformed::yes(simplify_regex_expr(left, op, right)?), | ||
|
||
// Rules for Like | ||
Expr::Like(Like { | ||
expr, | ||
pattern, | ||
negated, | ||
escape_char: _, | ||
case_insensitive: _, | ||
}) if !is_null(&expr) | ||
&& matches!( | ||
pattern.as_ref(), | ||
Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if pattern_str == "%" | ||
) => | ||
{ | ||
Transformed::yes(lit(!negated)) | ||
Expr::Like(ref like_expr) => { | ||
if let Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) = | ||
like_expr.pattern.as_ref() | ||
{ | ||
if !is_null(&like_expr.expr) && pattern_str == "%" { | ||
Transformed::yes(lit(!like_expr.negated)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is correct. Did you consider #13061 (comment) ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code was already here (see diff above). Did I change the logic in any way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ack. Can you please confirm or disconfirm the correctness concern? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree this is incorrect but this is the logic that was there before this PR. I'm happy to remove it. I guess no one has complained because the case that breaks is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
thanks for confirming and understood it's pre-existing. Can it be fixed though? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My latest commit removes that special case. I'm not sure how to fix it, your comment suggests it can't be fixed right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
actually the opposite. see #13259 |
||
} else if !pattern_str.contains(['%', '_'].as_ref()) { | ||
// If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add TODO to handle escaped search characters in a follow-up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made it not simplify if there is an escape character for now |
||
if like_expr.escape_char.is_some() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can this condition be checked on the same line with |
||
// TODO: handle escape characters | ||
// These currently aren't anywhere else in DataFusion | ||
Transformed::no(expr) | ||
} else { | ||
Transformed::yes(Expr::BinaryExpr(BinaryExpr { | ||
left: like_expr.expr.clone(), | ||
op: if like_expr.negated { NotEq } else { Eq }, | ||
right: like_expr.pattern.clone(), | ||
})) | ||
} | ||
} else { | ||
Transformed::no(expr) | ||
} | ||
} else { | ||
Transformed::no(expr) | ||
} | ||
} | ||
|
||
// a is not null/unknown --> true (if a is not nullable) | ||
|
@@ -3632,6 +3644,26 @@ mod tests { | |
|
||
let expr = not_ilike(null, "%"); | ||
assert_eq!(simplify(expr), lit_bool_null()); | ||
|
||
// test cases that get converted to equality | ||
let expr = like(col("c1"), "a"); | ||
assert_eq!(simplify(expr), col("c1").eq(lit("a"))); | ||
let expr = not_like(col("c1"), "a"); | ||
assert_eq!(simplify(expr), col("c1").not_eq(lit("a"))); | ||
let expr = like(col("c1"), "a_"); | ||
assert_eq!(simplify(expr), col("c1").like(lit("a_"))); | ||
let expr = not_like(col("c1"), "a_"); | ||
assert_eq!(simplify(expr), col("c1").not_like(lit("a_"))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a test cases where pattern is constant NULL (it won't be simplified today, that's fine) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in 6c847fa |
||
|
||
// null pattern | ||
let expr = Expr::Like(Like { | ||
negated: false, | ||
expr: Box::new(col("c1")), | ||
pattern: Box::new(Expr::Literal(ScalarValue::Utf8(None))), | ||
escape_char: None, | ||
case_insensitive: false, | ||
}); | ||
assert_eq!(simplify(expr.clone()), expr); | ||
} | ||
|
||
#[test] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove this is correct, but requires updating some test cases.
i am doing this in #13259
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
awesome, I'll wait for your work to be merged then continue here