-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add expr::like and expr::notlike to pruning logic #508
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -42,6 +42,7 @@ use crate::{ | |||||
logical_plan::{Expr, Operator}, | ||||||
optimizer::utils, | ||||||
physical_plan::{planner::DefaultPhysicalPlanner, ColumnarValue, PhysicalExpr}, | ||||||
scalar::ScalarValue, | ||||||
}; | ||||||
|
||||||
/// Interface to pass statistics information to [`PruningPredicates`] | ||||||
|
@@ -548,7 +549,7 @@ fn build_predicate_expression( | |||||
// allow partial failure in predicate expression generation | ||||||
// this can still produce a useful predicate when multiple conditions are joined using AND | ||||||
Err(_) => { | ||||||
return Ok(logical_plan::lit(true)); | ||||||
return Ok(unhandled); | ||||||
} | ||||||
}; | ||||||
let corrected_op = expr_builder.correct_operator(op); | ||||||
|
@@ -586,8 +587,45 @@ fn build_predicate_expression( | |||||
.min_column_expr()? | ||||||
.lt_eq(expr_builder.scalar_expr().clone()) | ||||||
} | ||||||
Operator::Like => { | ||||||
match &**right { | ||||||
// If the literal is a 'starts_with' | ||||||
Expr::Literal(ScalarValue::Utf8(Some(string))) | ||||||
if !string.starts_with('%') => | ||||||
{ | ||||||
let scalar_expr = | ||||||
Expr::Literal(ScalarValue::Utf8(Some(string.replace('%', "")))); | ||||||
// Behaves like Eq | ||||||
let min_column_expr = expr_builder.min_column_expr()?; | ||||||
let max_column_expr = expr_builder.max_column_expr()?; | ||||||
min_column_expr | ||||||
.lt_eq(scalar_expr.clone()) | ||||||
.and(scalar_expr.lt_eq(max_column_expr)) | ||||||
} | ||||||
_ => unhandled, | ||||||
} | ||||||
} | ||||||
Operator::NotLike => { | ||||||
match &**right { | ||||||
// If the literal is a 'starts_with' | ||||||
Expr::Literal(ScalarValue::Utf8(Some(string))) | ||||||
if !string.starts_with('%') => | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about patterns like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also consider escaped percent characters in the pattern. Example: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think as we can evaluate the like expression anyway, it might be easier to support like / not like to the full extent instead of only "startswith". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I only focused on expressions that don't start with Or how would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are right, that makes sense 👍 (I think escaping might be an issue in arrow-rs too) |
||||||
{ | ||||||
let scalar_expr = | ||||||
Expr::Literal(ScalarValue::Utf8(Some(string.replace('%', "")))); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if just removing For example in a pattern like If instead, for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, fixed and changed tests |
||||||
// Behaves like Eq | ||||||
let min_column_expr = expr_builder.min_column_expr()?; | ||||||
let max_column_expr = expr_builder.max_column_expr()?; | ||||||
// Inverse of Like | ||||||
min_column_expr | ||||||
.gt_eq(scalar_expr.clone()) | ||||||
.and(scalar_expr.gt_eq(max_column_expr)) | ||||||
} | ||||||
_ => unhandled, | ||||||
} | ||||||
} | ||||||
// other expressions are not supported | ||||||
_ => logical_plan::lit(true), | ||||||
_ => unhandled, | ||||||
}; | ||||||
Ok(statistics_expr) | ||||||
} | ||||||
|
@@ -1095,6 +1133,60 @@ mod tests { | |||||
Ok(()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn row_group_predicate_starts_with() -> Result<()> { | ||||||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||||||
// test LIKE operator that is converted to a 'starts_with' | ||||||
let expr = col("c1").like(lit("Banana%")); | ||||||
let expected_expr = | ||||||
"#c1_min LtEq Utf8(\"Banana\") And Utf8(\"Banana\") LtEq #c1_max"; | ||||||
let predicate_expr = | ||||||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||||||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||||||
|
||||||
Ok(()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn row_group_predicate_like() -> Result<()> { | ||||||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||||||
// test LIKE operator that can't be converted to a 'starts_with' | ||||||
let expr = col("c1").like(lit("%Banana%")); | ||||||
let expected_expr = "Boolean(true)"; | ||||||
let predicate_expr = | ||||||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||||||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||||||
|
||||||
Ok(()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn row_group_predicate_not_starts_with() -> Result<()> { | ||||||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||||||
// test LIKE operator that can't be converted to a 'starts_with' | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
let expr = col("c1").not().like(lit("Banana%")); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there is a difference between
Suggested change
https://github.com/apache/arrow-datafusion/blob/master/datafusion/src/logical_plan/expr.rs#L455-L457 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This explains why the filter was negated, thanks! |
||||||
let expected_expr = | ||||||
"NOT #c1_min LtEq Utf8(\"Banana\") And Utf8(\"Banana\") LtEq NOT #c1_max"; | ||||||
let predicate_expr = | ||||||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||||||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||||||
|
||||||
Ok(()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn row_group_predicate_not_like() -> Result<()> { | ||||||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||||||
// test LIKE operator that can't be converted to a 'starts_with' | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
let expr = col("c1").not().like(lit("%Banana%")); | ||||||
let expected_expr = "Boolean(true)"; | ||||||
let predicate_expr = | ||||||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||||||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||||||
|
||||||
Ok(()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn row_group_predicate_required_columns() -> Result<()> { | ||||||
let schema = Schema::new(vec![ | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 thanks I forgot that