diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index fdcfefb75624c..e90f0cbdd9f3a 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -3576,272 +3576,235 @@ mod tests { fn prune_utf8_eq() { let (schema, statistics) = utf8_setup(); - // Expression "s1 = 'A'" - // s1 ["A", "Z"] ==> some rows could pass (must keep) - // s1 ["A", "L"] ==> some rows could pass (must keep) - // s1 ["N", "Z"] ==> no rows can pass (not keep) - // s1 ["M", "M"] ==> no rows can pass (not keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> no rows can pass (not keep) - // s1 ["AB", "A\u{10ffff}"] ==> no rows can pass (not keep) - // s1 ["A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + let expr = col("s1").eq(lit("A")); let expected_ret = &[ - true, true, false, false, true, true, true, false, false, false, + // s1 ["A", "Z"] ==> some rows could pass (must keep) + true, // s1 ["A", "L"] ==> some rows could pass (must keep) + true, // s1 ["N", "Z"] ==> no rows can pass (not keep) + false, // s1 ["M", "M"] ==> no rows can pass (not keep) + false, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> no rows can pass (not keep) + false, + // s1 ["AB", "A\u{10ffff}"] ==> no rows can pass (not keep) + false, + // s1 ["A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + false, ]; - prune_with_expr( - // s1 = 'A' - col("s1").eq(lit("A")), - &schema, - &statistics, - expected_ret, - ); + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 = ''" - // s1 ["A", "Z"] ==> no rows can pass (not keep) - // s1 ["A", "L"] ==> no rows can pass (not keep) - // s1 ["N", "Z"] ==> no rows can pass (not keep) - // s1 ["M", "M"] ==> no rows can pass (not keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> no rows can pass (not keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> all rows must pass (must keep) - // s1 ["AB", "A\u{10ffff}"] ==> no rows can pass (not keep) - // s1 ["A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + let expr = col("s1").eq(lit("")); let expected_ret = &[ - false, false, false, false, true, false, true, true, false, false, + // s1 ["A", "Z"] ==> no rows can pass (not keep) + false, // s1 ["A", "L"] ==> no rows can pass (not keep) + false, // s1 ["N", "Z"] ==> no rows can pass (not keep) + false, // s1 ["M", "M"] ==> no rows can pass (not keep) + false, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> no rows can pass (not keep) + false, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> all rows must pass (must keep) + true, // s1 ["AB", "A\u{10ffff}"] ==> no rows can pass (not keep) + false, + // s1 ["A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + false, ]; - prune_with_expr( - // s1 = '' - col("s1").eq(lit("")), - &schema, - &statistics, - expected_ret, - ); + prune_with_expr(expr, &schema, &statistics, expected_ret); } #[test] fn prune_utf8_not_eq() { let (schema, statistics) = utf8_setup(); - // Expression "s1 != 'A'" - // s1 ["A", "Z"] ==> some rows could pass (must keep) - // s1 ["A", "L"] ==> some rows could pass (must keep) - // s1 ["N", "Z"] ==> all rows must pass (must keep) - // s1 ["M", "M"] ==> all rows must pass (must keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> all rows must pass (must keep) - // s1 ["AB", "A\u{10ffff}"] ==> all rows must pass (must keep) - // s1 ["A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - let expected_ret = &[true, true, true, true, true, true, true, true, true, true]; - prune_with_expr( - // s1 != 'A' - col("s1").not_eq(lit("A")), - &schema, - &statistics, - expected_ret, - ); + let expr = col("s1").not_eq(lit("A")); + let expected_ret = &[ + // s1 ["A", "Z"] ==> some rows could pass (must keep) + true, // s1 ["A", "L"] ==> some rows could pass (must keep) + true, // s1 ["N", "Z"] ==> all rows must pass (must keep) + true, // s1 ["M", "M"] ==> all rows must pass (must keep) + true, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> all rows must pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}"] ==> all rows must pass (must keep) + true, + // s1 ["A\u{10ffff}\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + ]; + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 != ''" - // s1 ["A", "Z"] ==> all rows must pass (must keep) - // s1 ["A", "L"] ==> all rows must pass (must keep) - // s1 ["N", "Z"] ==> all rows must pass (must keep) - // s1 ["M", "M"] ==> all rows must pass (must keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> no rows can pass (not keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - let expected_ret = &[true, true, true, true, true, true, true, false, true, true]; - prune_with_expr( - // s1 != '' - col("s1").not_eq(lit("")), - &schema, - &statistics, - expected_ret, - ); + let expr = col("s1").not_eq(lit("")); + let expected_ret = &[ + // s1 ["A", "Z"] ==> all rows must pass (must keep) + true, // s1 ["A", "L"] ==> all rows must pass (must keep) + true, // s1 ["N", "Z"] ==> all rows must pass (must keep) + true, // s1 ["M", "M"] ==> all rows must pass (must keep) + true, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> no rows can pass (not keep) + false, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + ]; + prune_with_expr(expr, &schema, &statistics, expected_ret); } #[test] fn prune_utf8_like_one() { let (schema, statistics) = utf8_setup(); - // Expression "s1 LIKE 'A_'" - // s1 ["A", "Z"] ==> some rows could pass (must keep) - // s1 ["A", "L"] ==> some rows could pass (must keep) - // s1 ["N", "Z"] ==> no rows can pass (not keep) - // s1 ["M", "M"] ==> no rows can pass (not keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> no rows can pass (not keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + let expr = col("s1").like(lit("A_")); let expected_ret = &[ - true, true, false, false, true, true, true, false, true, true, + // s1 ["A", "Z"] ==> some rows could pass (must keep) + true, // s1 ["A", "L"] ==> some rows could pass (must keep) + true, // s1 ["N", "Z"] ==> no rows can pass (not keep) + false, // s1 ["M", "M"] ==> no rows can pass (not keep) + false, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> no rows can pass (not keep) + false, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, ]; - prune_with_expr( - // s1 LIKE 'A_' - col("s1").like(lit("A_")), - &schema, - &statistics, - expected_ret, - ); + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 LIKE '_A_'" - // s1 ["A", "Z"] ==> some rows could pass (must keep) - // s1 ["A", "L"] ==> some rows could pass (must keep) - // s1 ["N", "Z"] ==> some rows could pass (must keep) - // s1 ["M", "M"] ==> some rows could pass (must keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> some rows could pass (must keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) - let expected_ret = &[true, true, true, true, true, true, true, true, true, true]; - prune_with_expr( - // s1 LIKE '_A_' - col("s1").like(lit("_A_")), - &schema, - &statistics, - expected_ret, - ); + let expr = col("s1").like(lit("_A_")); + let expected_ret = &[ + // s1 ["A", "Z"] ==> some rows could pass (must keep) + true, // s1 ["A", "L"] ==> some rows could pass (must keep) + true, // s1 ["N", "Z"] ==> some rows could pass (must keep) + true, // s1 ["M", "M"] ==> some rows could pass (must keep) + true, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> some rows could pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, + ]; + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 LIKE '_'" - // s1 ["A", "Z"] ==> all rows must pass (must keep) - // s1 ["A", "L"] ==> all rows must pass (must keep) - // s1 ["N", "Z"] ==> all rows must pass (must keep) - // s1 ["M", "M"] ==> all rows must pass (must keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> all rows must pass (must keep) - // s1 ["", ""] ==> all rows must pass (must keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - let expected_ret = &[true, true, true, true, true, true, true, true, true, true]; - prune_with_expr( - // s1 LIKE '_' - col("s1").like(lit("_")), - &schema, - &statistics, - expected_ret, - ); + let expr = col("s1").like(lit("_")); + let expected_ret = &[ + // s1 ["A", "Z"] ==> all rows must pass (must keep) + true, // s1 ["A", "L"] ==> all rows must pass (must keep) + true, // s1 ["N", "Z"] ==> all rows must pass (must keep) + true, // s1 ["M", "M"] ==> all rows must pass (must keep) + true, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> all rows must pass (must keep) + true, // s1 ["", ""] ==> all rows must pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + ]; + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 LIKE ''" - // s1 ["A", "Z"] ==> no rows can pass (not keep) - // s1 ["A", "L"] ==> no rows can pass (not keep) - // s1 ["N", "Z"] ==> no rows can pass (not keep) - // s1 ["M", "M"] ==> no rows can pass (not keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> no rows can pass (not keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> all rows must pass (must keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + let expr = col("s1").like(lit("")); let expected_ret = &[ - false, false, false, false, true, false, true, true, false, false, + // s1 ["A", "Z"] ==> no rows can pass (not keep) + false, // s1 ["A", "L"] ==> no rows can pass (not keep) + false, // s1 ["N", "Z"] ==> no rows can pass (not keep) + false, // s1 ["M", "M"] ==> no rows can pass (not keep) + false, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> no rows can pass (not keep) + false, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> all rows must pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + false, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + false, ]; - prune_with_expr( - // s1 LIKE '' - col("s1").like(lit("")), - &schema, - &statistics, - expected_ret, - ); + prune_with_expr(expr, &schema, &statistics, expected_ret); } #[test] fn prune_utf8_like_many() { let (schema, statistics) = utf8_setup(); - // Expression "s1 LIKE 'A%'" - // s1 ["A", "Z"] ==> some rows could pass (must keep) - // s1 ["A", "L"] ==> some rows could pass (must keep) - // s1 ["N", "Z"] ==> no rows can pass (not keep) - // s1 ["M", "M"] ==> no rows can pass (not keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> no rows can pass (not keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + let expr = col("s1").like(lit("A%")); let expected_ret = &[ - true, true, false, false, true, true, true, false, true, true, + // s1 ["A", "Z"] ==> some rows could pass (must keep) + true, // s1 ["A", "L"] ==> some rows could pass (must keep) + true, // s1 ["N", "Z"] ==> no rows can pass (not keep) + false, // s1 ["M", "M"] ==> no rows can pass (not keep) + false, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> no rows can pass (not keep) + false, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, ]; - prune_with_expr( - // s1 LIKE 'A%' - col("s1").like(lit("A%")), - &schema, - &statistics, - expected_ret, - ); + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 LIKE '%A%'" - // s1 ["A", "Z"] ==> some rows could pass (must keep) - // s1 ["A", "L"] ==> some rows could pass (must keep) - // s1 ["N", "Z"] ==> some rows could pass (must keep) - // s1 ["M", "M"] ==> some rows could pass (must keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> some rows could pass (must keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) - let expected_ret = &[true, true, true, true, true, true, true, true, true, true]; - prune_with_expr( - // s1 LIKE '%A%' - col("s1").like(lit("%A%")), - &schema, - &statistics, - expected_ret, - ); + let expr = col("s1").like(lit("%A%")); + let expected_ret = &[ + // s1 ["A", "Z"] ==> some rows could pass (must keep) + true, // s1 ["A", "L"] ==> some rows could pass (must keep) + true, // s1 ["N", "Z"] ==> some rows could pass (must keep) + true, // s1 ["M", "M"] ==> some rows could pass (must keep) + true, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> some rows could pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> some rows could pass (must keep) + true, + ]; + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 LIKE '%'" - // s1 ["A", "Z"] ==> all rows must pass (must keep) - // s1 ["A", "L"] ==> all rows must pass (must keep) - // s1 ["N", "Z"] ==> all rows must pass (must keep) - // s1 ["M", "M"] ==> all rows must pass (must keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> unknown (must keep) - // s1 ["", "A"] ==> all rows must pass (must keep) - // s1 ["", ""] ==> all rows must pass (must keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) - let expected_ret = &[true, true, true, true, true, true, true, true, true, true]; - prune_with_expr( - // s1 LIKE '%' - col("s1").like(lit("%")), - &schema, - &statistics, - expected_ret, - ); + let expr = col("s1").like(lit("%")); + let expected_ret = &[ + // s1 ["A", "Z"] ==> all rows must pass (must keep) + true, // s1 ["A", "L"] ==> all rows must pass (must keep) + true, // s1 ["N", "Z"] ==> all rows must pass (must keep) + true, // s1 ["M", "M"] ==> all rows must pass (must keep) + true, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> unknown (must keep) + true, // s1 ["", "A"] ==> all rows must pass (must keep) + true, // s1 ["", ""] ==> all rows must pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> all rows must pass (must keep) + true, + ]; + prune_with_expr(expr, &schema, &statistics, expected_ret); - // Expression "s1 LIKE ''" - // s1 ["A", "Z"] ==> no rows can pass (not keep) - // s1 ["A", "L"] ==> no rows can pass (not keep) - // s1 ["N", "Z"] ==> no rows can pass (not keep) - // s1 ["M", "M"] ==> no rows can pass (not keep) - // s1 [NULL, NULL] ==> unknown (must keep) - // s1 ["A", NULL] ==> no rows can pass (not keep) - // s1 ["", "A"] ==> some rows could pass (must keep) - // s1 ["", ""] ==> all rows must pass (must keep) - // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) - // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + let expr = col("s1").like(lit("")); let expected_ret = &[ - false, false, false, false, true, false, true, true, false, false, + // s1 ["A", "Z"] ==> no rows can pass (not keep) + false, // s1 ["A", "L"] ==> no rows can pass (not keep) + false, // s1 ["N", "Z"] ==> no rows can pass (not keep) + false, // s1 ["M", "M"] ==> no rows can pass (not keep) + false, // s1 [NULL, NULL] ==> unknown (must keep) + true, // s1 ["A", NULL] ==> no rows can pass (not keep) + false, // s1 ["", "A"] ==> some rows could pass (must keep) + true, // s1 ["", ""] ==> all rows must pass (must keep) + true, + // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + false, + // s1 ["A\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no rows can pass (not keep) + false, ]; - prune_with_expr( - // s1 LIKE '' - col("s1").like(lit("")), - &schema, - &statistics, - expected_ret, - ); + prune_with_expr(expr, &schema, &statistics, expected_ret); } #[test]