-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add expr::like and expr::notlike to pruning logic #508
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ use crate::{ | |
logical_plan::{Expr, Operator}, | ||
optimizer::utils, | ||
physical_plan::{planner::DefaultPhysicalPlanner, ColumnarValue, PhysicalExpr}, | ||
scalar::ScalarValue, | ||
}; | ||
|
||
/// Interface to pass statistics information to [`PruningPredicates`] | ||
|
@@ -548,7 +549,7 @@ fn build_predicate_expression( | |
// allow partial failure in predicate expression generation | ||
// this can still produce a useful predicate when multiple conditions are joined using AND | ||
Err(_) => { | ||
return Ok(logical_plan::lit(true)); | ||
return Ok(unhandled); | ||
} | ||
}; | ||
let corrected_op = expr_builder.correct_operator(op); | ||
|
@@ -586,8 +587,47 @@ fn build_predicate_expression( | |
.min_column_expr()? | ||
.lt_eq(expr_builder.scalar_expr().clone()) | ||
} | ||
Operator::Like => { | ||
match &**right { | ||
// If the literal is a 'starts_with' | ||
Expr::Literal(ScalarValue::Utf8(Some(string))) | ||
if !string.starts_with('%') => | ||
{ | ||
// Split the string to get the first part before '%' | ||
let split = string.split('%').next().unwrap().to_string(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. won't this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like does not require There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I initially wondered the same thing -- lol! But my conclusion was "no it won't panic" I made a quick playground that shows this working https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=049f4c1640386ff99c3b5e07085e0889 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, won't panic because String::split() will always return at least 1 result, the full string if there's nothing to spilt by |
||
let scalar_expr = Expr::Literal(ScalarValue::Utf8(Some(split))); | ||
// Behaves like Eq | ||
let min_column_expr = expr_builder.min_column_expr()?; | ||
let max_column_expr = expr_builder.max_column_expr()?; | ||
min_column_expr | ||
.lt_eq(scalar_expr.clone()) | ||
.and(scalar_expr.lt_eq(max_column_expr)) | ||
} | ||
_ => unhandled, | ||
} | ||
} | ||
Operator::NotLike => { | ||
match &**right { | ||
// If the literal is a 'starts_with' | ||
Expr::Literal(ScalarValue::Utf8(Some(string))) | ||
if !string.starts_with('%') => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about patterns like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also consider escaped percent characters in the pattern. Example: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think as we can evaluate the like expression anyway, it might be easier to support like / not like to the full extent instead of only "startswith". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I only focused on expressions that don't start with Or how would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are right, that makes sense 👍 (I think escaping might be an issue in arrow-rs too) |
||
{ | ||
// Split the string to get the first part before '%' | ||
let split = string.split('%').next().unwrap().to_string(); | ||
let scalar_expr = Expr::Literal(ScalarValue::Utf8(Some(split))); | ||
// Behaves like Eq | ||
let min_column_expr = expr_builder.min_column_expr()?; | ||
let max_column_expr = expr_builder.max_column_expr()?; | ||
// Inverse of Like | ||
min_column_expr | ||
.gt_eq(scalar_expr.clone()) | ||
.and(scalar_expr.gt_eq(max_column_expr)) | ||
} | ||
_ => unhandled, | ||
} | ||
} | ||
// other expressions are not supported | ||
_ => logical_plan::lit(true), | ||
_ => unhandled, | ||
}; | ||
Ok(statistics_expr) | ||
} | ||
|
@@ -1095,6 +1135,60 @@ mod tests { | |
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn row_group_predicate_starts_with() -> Result<()> { | ||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||
// test LIKE operator that is converted to a 'starts_with' | ||
let expr = col("c1").like(lit("Banana%lemon")); | ||
let expected_expr = | ||
"#c1_min LtEq Utf8(\"Banana\") And Utf8(\"Banana\") LtEq #c1_max"; | ||
let predicate_expr = | ||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn row_group_predicate_like() -> Result<()> { | ||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||
// test LIKE operator that can't be converted to a 'starts_with' | ||
let expr = col("c1").like(lit("%Banana%")); | ||
let expected_expr = "Boolean(true)"; | ||
let predicate_expr = | ||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn row_group_predicate_not_starts_with() -> Result<()> { | ||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||
// test NOT LIKE operator that is converted to a 'starts_with' | ||
let expr = col("c1").not_like(lit("Banana%Lemon")); | ||
let expected_expr = | ||
"#c1_min GtEq Utf8(\"Banana\") And Utf8(\"Banana\") GtEq #c1_max"; | ||
let predicate_expr = | ||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn row_group_predicate_not_like() -> Result<()> { | ||
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, true)]); | ||
// test NOT LIKE operator that can't be converted to a 'starts_with' | ||
let expr = col("c1").not_like(lit("%Banana%")); | ||
let expected_expr = "Boolean(true)"; | ||
let predicate_expr = | ||
build_predicate_expression(&expr, &schema, &mut RequiredStatColumns::new())?; | ||
assert_eq!(format!("{:?}", predicate_expr), expected_expr); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn row_group_predicate_required_columns() -> Result<()> { | ||
let schema = Schema::new(vec![ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 thanks I forgot that