Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
adriangb committed Oct 28, 2024
1 parent 5979426 commit 835eb9b
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions datafusion/core/src/physical_optimizer/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1582,7 +1582,9 @@ fn build_statistics_expr(
))
}
Operator::LikeMatch => build_like_match(expr_builder).ok_or_else(|| {
plan_datafusion_err!("LIKE expression with wildcard at the beginning is not supported")
plan_datafusion_err!(
"LIKE expression with wildcard at the beginning is not supported"
)
})?,
Operator::Gt => {
// column > literal => (min, max) > literal => max > literal
Expand Down Expand Up @@ -1677,8 +1679,7 @@ fn build_like_match(
))));
(upper_bound_lit, lower_bound_lit)
} else {
let bound =
Arc::new(phys_expr::Literal::new(ScalarValue::Utf8(Some(s.clone()))));
let bound = Arc::new(phys_expr::Literal::new(ScalarValue::Utf8(Some(s.clone()))));
(bound.clone(), bound)
};
let upper_bound_expr = Arc::new(phys_expr::BinaryExpr::new(
Expand Down Expand Up @@ -1707,32 +1708,32 @@ fn increment_utf8(data: &str) -> Option<String> {
// Helper function to check if a character is valid to use
fn is_valid_unicode(c: char) -> bool {
let cp = c as u32;

// Filter out Unicode surrogate pairs range
if (0xD800..=0xDFFF).contains(&cp) {
return false;
}

// Filter out non-characters
if (0xFDD0..=0xFDEF).contains(&cp) {
return false;
}

// Filter out private use areas and other invalid ranges
match cp {
0xFFFE | 0xFFFF => false,
_ if cp >= 0x110000 => false,
_ => true
_ => true,
}
}

// Convert string to vector of code points
let mut code_points: Vec<char> = data.chars().collect();

// Work backwards through code points
for idx in (0..code_points.len()).rev() {
let original = code_points[idx] as u32;

// Try incrementing the code point
if let Some(next_char) = char::from_u32(original + 1) {
// Check if it's a valid continuation
Expand Down Expand Up @@ -3593,10 +3594,10 @@ mod tests {

// Test 2-byte UTF-8 sequences
assert_eq!(increment_utf8("ß").unwrap(), "à"); // U+00DF -> U+00E0

// Test 3-byte UTF-8 sequences
assert_eq!(increment_utf8("℣").unwrap(), "ℤ"); // U+2123 -> U+2124

// Test at UTF-8 boundaries
assert_eq!(increment_utf8("\u{7FF}").unwrap(), "\u{800}"); // 2-byte to 3-byte boundary
assert_eq!(increment_utf8("\u{FFFF}").unwrap(), "\u{10000}"); // 3-byte to 4-byte boundary
Expand Down

0 comments on commit 835eb9b

Please sign in to comment.