From e13d7dc3efa7c28549b80d2eacf5ce629636220c Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 19 May 2024 08:11:26 +0800 Subject: [PATCH 01/16] add ident needs quote check --- datafusion/sql/src/unparser/dialect.rs | 3 +++ datafusion/sql/src/unparser/expr.rs | 16 ++++++++++++++-- datafusion/sql/src/unparser/plan.rs | 11 +++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index ccbd388fc4b7..24b0941c97a9 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -21,6 +21,9 @@ /// See pub trait Dialect { fn identifier_quote_style(&self) -> Option; + fn identifier_needs_quote(&self, _: &str) -> bool { + true + } } pub struct DefaultDialect {} diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 6209b409fa54..8db4d394e087 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -492,10 +492,14 @@ impl Unparser<'_> { let mut id = table_ref.to_vec(); id.push(col.name.to_string()); return Ok(ast::Expr::CompoundIdentifier( - id.iter().map(|i| self.new_ident(i.to_string())).collect(), + id.iter() + .map(|i| self.new_ident_quoted_if_needs(i.to_string())) + .collect(), )); } - Ok(ast::Expr::Identifier(self.new_ident(col.name.to_string()))) + Ok(ast::Expr::Identifier( + self.new_ident_quoted_if_needs(col.name.to_string()), + )) } fn convert_bound( @@ -532,6 +536,14 @@ impl Unparser<'_> { .collect::>>() } + pub(super) fn new_ident_quoted_if_needs(&self, ident: String) -> ast::Ident { + if self.dialect.identifier_needs_quote(&ident) { + self.new_ident(ident) + } else { + self.new_ident_without_quote_style(ident) + } + } + pub(super) fn new_ident(&self, str: String) -> ast::Ident { ast::Ident { value: str, diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 3373220a8476..1173a66fbcda 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -141,9 +141,12 @@ impl Unparser<'_> { let mut builder = TableRelationBuilder::default(); let mut table_parts = vec![]; if let Some(schema_name) = scan.table_name.schema() { - table_parts.push(self.new_ident(schema_name.to_string())); + table_parts + .push(self.new_ident_quoted_if_needs(schema_name.to_string())); } - table_parts.push(self.new_ident(scan.table_name.table().to_string())); + table_parts.push( + self.new_ident_quoted_if_needs(scan.table_name.table().to_string()), + ); builder.name(ast::ObjectName(table_parts)); relation.table(builder); @@ -416,7 +419,7 @@ impl Unparser<'_> { Ok(ast::SelectItem::ExprWithAlias { expr: inner, - alias: self.new_ident(name.to_string()), + alias: self.new_ident_quoted_if_needs(name.to_string()), }) } _ => { @@ -487,7 +490,7 @@ impl Unparser<'_> { fn new_table_alias(&self, alias: String) -> ast::TableAlias { ast::TableAlias { - name: self.new_ident(alias), + name: self.new_ident_quoted_if_needs(alias), columns: Vec::new(), } } From bce7e413898485c94aa7f025d53f29960a56d557 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 19 May 2024 09:47:07 +0800 Subject: [PATCH 02/16] implement the check for default dialect and fix tests --- datafusion/sql/Cargo.toml | 1 + datafusion/sql/src/unparser/dialect.rs | 8 +++ datafusion/sql/src/unparser/expr.rs | 74 ++++++++++++------------- datafusion/sql/src/unparser/plan.rs | 2 +- datafusion/sql/tests/sql_integration.rs | 12 ++-- 5 files changed, 51 insertions(+), 46 deletions(-) diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index ef3ed265c7ab..2fd4db5b7c62 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -49,6 +49,7 @@ datafusion-expr = { workspace = true } log = { workspace = true } sqlparser = { workspace = true } strum = { version = "0.26.1", features = ["derive"] } +regex = { version = "1.8" } [dev-dependencies] ctor = { workspace = true } diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 24b0941c97a9..98677de42314 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +use regex::Regex; +use sqlparser::keywords::ALL_KEYWORDS; + /// Dialect is used to capture dialect specific syntax. /// Note: this trait will eventually be replaced by the Dialect in the SQLparser package /// @@ -31,6 +34,11 @@ impl Dialect for DefaultDialect { fn identifier_quote_style(&self) -> Option { Some('"') } + fn identifier_needs_quote(&self, ident: &str) -> bool { + let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); + ALL_KEYWORDS.contains(&ident.to_uppercase().as_str()) + || !identifier_regex.is_match(ident) + } } pub struct PostgreSqlDialect {} diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 8db4d394e087..0f38da3c452d 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -74,7 +74,7 @@ impl Display for Unparsed { /// let expr = col("a").gt(lit(4)); /// let sql = expr_to_sql(&expr).unwrap(); /// -/// assert_eq!(format!("{}", sql), "(\"a\" > 4)") +/// assert_eq!(format!("{}", sql), "(a > 4)") /// ``` pub fn expr_to_sql(expr: &Expr) -> Result { let unparser = Unparser::default(); @@ -989,67 +989,67 @@ mod tests { .build()?; let tests: Vec<(Expr, &str)> = vec![ - ((col("a") + col("b")).gt(lit(4)), r#"(("a" + "b") > 4)"#), + ((col("a") + col("b")).gt(lit(4)), r#"((a + b) > 4)"#), ( Expr::Column(Column { relation: Some(TableReference::partial("a", "b")), name: "c".to_string(), }) .gt(lit(4)), - r#"("a"."b"."c" > 4)"#, + r#"(a.b.c > 4)"#, ), ( case(col("a")) .when(lit(1), lit(true)) .when(lit(0), lit(false)) .otherwise(lit(ScalarValue::Null))?, - r#"CASE "a" WHEN 1 THEN true WHEN 0 THEN false ELSE NULL END"#, + r#"CASE a WHEN 1 THEN true WHEN 0 THEN false ELSE NULL END"#, ), ( when(col("a").is_null(), lit(true)).otherwise(lit(false))?, - r#"CASE WHEN "a" IS NULL THEN true ELSE false END"#, + r#"CASE WHEN a IS NULL THEN true ELSE false END"#, ), ( when(col("a").is_not_null(), lit(true)).otherwise(lit(false))?, - r#"CASE WHEN "a" IS NOT NULL THEN true ELSE false END"#, + r#"CASE WHEN a IS NOT NULL THEN true ELSE false END"#, ), ( Expr::Cast(Cast { expr: Box::new(col("a")), data_type: DataType::Date64, }), - r#"CAST("a" AS DATETIME)"#, + r#"CAST(a AS DATETIME)"#, ), ( Expr::Cast(Cast { expr: Box::new(col("a")), data_type: DataType::UInt32, }), - r#"CAST("a" AS INTEGER UNSIGNED)"#, + r#"CAST(a AS INTEGER UNSIGNED)"#, ), ( col("a").in_list(vec![lit(1), lit(2), lit(3)], false), - r#""a" IN (1, 2, 3)"#, + r#"a IN (1, 2, 3)"#, ), ( col("a").in_list(vec![lit(1), lit(2), lit(3)], true), - r#""a" NOT IN (1, 2, 3)"#, + r#"a NOT IN (1, 2, 3)"#, ), ( ScalarUDF::new_from_impl(DummyUDF::new()).call(vec![col("a"), col("b")]), - r#"dummy_udf("a", "b")"#, + r#"dummy_udf(a, b)"#, ), ( ScalarUDF::new_from_impl(DummyUDF::new()) .call(vec![col("a"), col("b")]) .is_null(), - r#"dummy_udf("a", "b") IS NULL"#, + r#"dummy_udf(a, b) IS NULL"#, ), ( ScalarUDF::new_from_impl(DummyUDF::new()) .call(vec![col("a"), col("b")]) .is_not_null(), - r#"dummy_udf("a", "b") IS NOT NULL"#, + r#"dummy_udf(a, b) IS NOT NULL"#, ), ( Expr::Like(Like { @@ -1059,7 +1059,7 @@ mod tests { escape_char: Some('o'), case_insensitive: true, }), - r#""a" NOT LIKE 'foo' ESCAPE 'o'"#, + r#"a NOT LIKE 'foo' ESCAPE 'o'"#, ), ( Expr::SimilarTo(Like { @@ -1069,7 +1069,7 @@ mod tests { escape_char: Some('o'), case_insensitive: true, }), - r#""a" LIKE 'foo' ESCAPE 'o'"#, + r#"a LIKE 'foo' ESCAPE 'o'"#, ), ( Expr::Literal(ScalarValue::Date64(Some(0))), @@ -1106,7 +1106,7 @@ mod tests { order_by: None, null_treatment: None, }), - r#"SUM("a")"#, + r#"SUM(a)"#, ), ( Expr::AggregateFunction(AggregateFunction { @@ -1145,7 +1145,7 @@ mod tests { window_frame: WindowFrame::new(None), null_treatment: None, }), - r#"ROW_NUMBER("col") OVER (ROWS BETWEEN NULL PRECEDING AND NULL FOLLOWING)"#, + r#"ROW_NUMBER(col) OVER (ROWS BETWEEN NULL PRECEDING AND NULL FOLLOWING)"#, ), ( Expr::WindowFunction(WindowFunction { @@ -1170,55 +1170,55 @@ mod tests { ), null_treatment: None, }), - r#"COUNT(*) OVER (ORDER BY "a" DESC NULLS FIRST RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING)"#, + r#"COUNT(*) OVER (ORDER BY a DESC NULLS FIRST RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING)"#, ), - (col("a").is_not_null(), r#""a" IS NOT NULL"#), - (col("a").is_null(), r#""a" IS NULL"#), + (col("a").is_not_null(), r#"a IS NOT NULL"#), + (col("a").is_null(), r#"a IS NULL"#), ( (col("a") + col("b")).gt(lit(4)).is_true(), - r#"(("a" + "b") > 4) IS TRUE"#, + r#"((a + b) > 4) IS TRUE"#, ), ( (col("a") + col("b")).gt(lit(4)).is_not_true(), - r#"(("a" + "b") > 4) IS NOT TRUE"#, + r#"((a + b) > 4) IS NOT TRUE"#, ), ( (col("a") + col("b")).gt(lit(4)).is_false(), - r#"(("a" + "b") > 4) IS FALSE"#, + r#"((a + b) > 4) IS FALSE"#, ), ( (col("a") + col("b")).gt(lit(4)).is_not_false(), - r#"(("a" + "b") > 4) IS NOT FALSE"#, + r#"((a + b) > 4) IS NOT FALSE"#, ), ( (col("a") + col("b")).gt(lit(4)).is_unknown(), - r#"(("a" + "b") > 4) IS UNKNOWN"#, + r#"((a + b) > 4) IS UNKNOWN"#, ), ( (col("a") + col("b")).gt(lit(4)).is_not_unknown(), - r#"(("a" + "b") > 4) IS NOT UNKNOWN"#, + r#"((a + b) > 4) IS NOT UNKNOWN"#, ), - (not(col("a")), r#"NOT "a""#), + (not(col("a")), r#"NOT a"#), ( Expr::between(col("a"), lit(1), lit(7)), - r#"("a" BETWEEN 1 AND 7)"#, + r#"(a BETWEEN 1 AND 7)"#, ), - (Expr::Negative(Box::new(col("a"))), r#"-"a""#), + (Expr::Negative(Box::new(col("a"))), r#"-a"#), ( exists(Arc::new(dummy_logical_plan.clone())), - r#"EXISTS (SELECT "t"."a" FROM "t" WHERE ("t"."a" = 1))"#, + r#"EXISTS (SELECT t.a FROM t WHERE (t.a = 1))"#, ), ( not_exists(Arc::new(dummy_logical_plan.clone())), - r#"NOT EXISTS (SELECT "t"."a" FROM "t" WHERE ("t"."a" = 1))"#, + r#"NOT EXISTS (SELECT t.a FROM t WHERE (t.a = 1))"#, ), ( try_cast(col("a"), DataType::Date64), - r#"TRY_CAST("a" AS DATETIME)"#, + r#"TRY_CAST(a AS DATETIME)"#, ), ( try_cast(col("a"), DataType::UInt32), - r#"TRY_CAST("a" AS INTEGER UNSIGNED)"#, + r#"TRY_CAST(a AS INTEGER UNSIGNED)"#, ), ( Expr::ScalarVariable(Int8, vec![String::from("@a")]), @@ -1231,10 +1231,10 @@ mod tests { ), r#"@root.foo"#, ), - (col("x").eq(placeholder("$1")), r#"("x" = $1)"#), + (col("x").eq(placeholder("$1")), r#"(x = $1)"#), ( out_ref_col(DataType::Int32, "t.a").gt(lit(1)), - r#"("t"."a" > 1)"#, + r#"(t.a > 1)"#, ), ( grouping_set(vec![vec![col("a"), col("b")], vec![col("a")]]), @@ -1258,8 +1258,8 @@ mod tests { #[test] fn expr_to_unparsed_ok() -> Result<()> { let tests: Vec<(Expr, &str)> = vec![ - ((col("a") + col("b")).gt(lit(4)), r#"(("a" + "b") > 4)"#), - (col("a").sort(true, true), r#""a" ASC NULLS FIRST"#), + ((col("a") + col("b")).gt(lit(4)), r#"((a + b) > 4)"#), + (col("a").sort(true, true), r#"a ASC NULLS FIRST"#), ]; for (expr, expected) in tests { diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 1173a66fbcda..e23ee963ee75 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -52,7 +52,7 @@ use super::{ /// .unwrap(); /// let sql = plan_to_sql(&plan).unwrap(); /// -/// assert_eq!(format!("{}", sql), "SELECT \"table\".\"id\", \"table\".\"value\" FROM \"table\"") +/// assert_eq!(format!("{}", sql), "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"") /// ``` pub fn plan_to_sql(plan: &LogicalPlan) -> Result { let unparser = Unparser::default(); diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index bbedaca6a8bc..d147a6da21af 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4555,25 +4555,21 @@ impl TableSource for EmptyTable { #[test] fn roundtrip_expr() { let tests: Vec<(TableReference, &str, &str)> = vec![ - ( - TableReference::bare("person"), - "age > 35", - r#"("age" > 35)"#, - ), + (TableReference::bare("person"), "age > 35", r#"(age > 35)"#), ( TableReference::bare("person"), "id = '10'", - r#"("id" = '10')"#, + r#"(id = '10')"#, ), ( TableReference::bare("person"), "CAST(id AS VARCHAR)", - r#"CAST("id" AS VARCHAR)"#, + r#"CAST(id AS VARCHAR)"#, ), ( TableReference::bare("person"), "SUM((age * 2))", - r#"SUM(("age" * 2))"#, + r#"SUM((age * 2))"#, ), ]; From a616719cea59fe6d5f5e4ed955a6c8ac9c6ebcd4 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 19 May 2024 09:54:30 +0800 Subject: [PATCH 03/16] add test for need-quoted cases --- datafusion/sql/src/unparser/expr.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 0f38da3c452d..85f2823fa134 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1238,10 +1238,16 @@ mod tests { ), ( grouping_set(vec![vec![col("a"), col("b")], vec![col("a")]]), - r#"GROUPING SETS (("a", "b"), ("a"))"#, + r#"GROUPING SETS ((a, b), (a))"#, ), - (cube(vec![col("a"), col("b")]), r#"CUBE ("a", "b")"#), - (rollup(vec![col("a"), col("b")]), r#"ROLLUP ("a", "b")"#), + (cube(vec![col("a"), col("b")]), r#"CUBE (a, b)"#), + (rollup(vec![col("a"), col("b")]), r#"ROLLUP (a, b)"#), + (col("table").eq(lit(1)), r#"("table" = 1)"#), + ( + col("123_need_quoted").eq(lit(1)), + r#"("123_need_quoted" = 1)"#, + ), + (col("need-qutoed").eq(lit(1)), r#"("need-qutoed" = 1)"#), ]; for (expr, expected) in tests { From b8e7dbe102a0c6b49b93ef7a045eac136e232a27 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 19 May 2024 10:33:31 +0800 Subject: [PATCH 04/16] update cargo lock --- datafusion-cli/Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index db87f85e346b..99af80bf9df2 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1412,6 +1412,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "log", + "regex", "sqlparser", "strum 0.26.2", ] From 0293ca792052d7999947459ab608a8b204347fe7 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 19 May 2024 10:50:33 +0800 Subject: [PATCH 05/16] fomrat cargo toml --- datafusion/sql/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 2fd4db5b7c62..02d5fd391b30 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -47,9 +47,9 @@ arrow-schema = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } log = { workspace = true } +regex = { version = "1.8" } sqlparser = { workspace = true } strum = { version = "0.26.1", features = ["derive"] } -regex = { version = "1.8" } [dev-dependencies] ctor = { workspace = true } From 4063d5d886b6cf693cb5f60e805ab64ca440b76a Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 19 May 2024 11:26:16 +0800 Subject: [PATCH 06/16] fix the example test --- datafusion-examples/examples/plan_to_sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index 8ac6746a319d..cafd60af70c6 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -61,7 +61,7 @@ async fn main() -> Result<()> { fn simple_expr_to_sql_demo() -> Result<()> { let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8))); let sql = expr_to_sql(&expr)?.to_string(); - assert_eq!(sql, r#"(("a" < 5) OR ("a" = 8))"#); + assert_eq!(sql, r#"((a < 5) OR (a = 8))"#); Ok(()) } From c0e03d1cd34fe68ca409b71180cd947adee12e81 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 21 May 2024 19:32:48 +0800 Subject: [PATCH 07/16] move regex to top level --- Cargo.toml | 1 + datafusion/core/Cargo.toml | 2 +- datafusion/functions/Cargo.toml | 2 +- datafusion/physical-expr/Cargo.toml | 2 +- datafusion/sql/Cargo.toml | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e0edf30efaec..339bbdf3d07c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,6 +105,7 @@ object_store = { version = "0.9.1", default-features = false } parking_lot = "0.12" parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] } rand = "0.8" +regex = "1.8" rstest = "0.19.0" serde_json = "1" sqlparser = { version = "0.45.0", features = ["visitor"] } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index a3e299752189..bfbfc72acde7 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -145,7 +145,7 @@ postgres-protocol = "0.6.4" postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] } rand = { workspace = true, features = ["small_rng"] } rand_distr = "0.4.3" -regex = "1.5.4" +regex = { workspace = true } rstest = { workspace = true } rust_decimal = { version = "1.27.0", features = ["tokio-pg"] } serde_json = { workspace = true } diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index ceb851a39027..e4e99beee97f 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -80,7 +80,7 @@ itertools = { workspace = true } log = { workspace = true } md-5 = { version = "^0.10.0", optional = true } rand = { workspace = true } -regex = { version = "1.8", optional = true } +regex = { worksapce = true, optional = true } sha2 = { version = "^0.10.1", optional = true } unicode-segmentation = { version = "^1.7.1", optional = true } uuid = { version = "1.7", features = ["v4"], optional = true } diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 798654206e63..364940bdee8b 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -66,7 +66,7 @@ itertools = { workspace = true, features = ["use_std"] } log = { workspace = true } paste = "^1.0" petgraph = "0.6.2" -regex = { version = "1.8", optional = true } +regex = { workspace = true, optional = true } [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 02d5fd391b30..06bcdc9a1bd4 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -47,7 +47,7 @@ arrow-schema = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } log = { workspace = true } -regex = { version = "1.8" } +regex = { workspace = true } sqlparser = { workspace = true } strum = { version = "0.26.1", features = ["derive"] } From 743063408ae5517c6b94cc650e8399d50e24f2e2 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 21 May 2024 19:37:34 +0800 Subject: [PATCH 08/16] add comments for new_ident_quoted_if_needs func --- datafusion/sql/src/unparser/expr.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 85f2823fa134..05ff2fa10d9e 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -536,6 +536,7 @@ impl Unparser<'_> { .collect::>>() } + /// This function can create an identifier with or without quotes based on the dialect rules pub(super) fn new_ident_quoted_if_needs(&self, ident: String) -> ast::Ident { if self.dialect.identifier_needs_quote(&ident) { self.new_ident(ident) From a881a659113ca2594f3fd370240826547f787121 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 21 May 2024 19:48:38 +0800 Subject: [PATCH 09/16] fix typo and add test for space --- datafusion/sql/src/unparser/expr.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 05ff2fa10d9e..1a854644d68d 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1248,7 +1248,8 @@ mod tests { col("123_need_quoted").eq(lit(1)), r#"("123_need_quoted" = 1)"#, ), - (col("need-qutoed").eq(lit(1)), r#"("need-qutoed" = 1)"#), + (col("need-quoted").eq(lit(1)), r#"("need-quoted" = 1)"#), + (col("need quoted").eq(lit(1)), r#"("need quoted" = 1)"#), ]; for (expr, expected) in tests { From c9eb4a438c84544414357074b5a5a9a2f8456194 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 21 May 2024 21:01:51 +0800 Subject: [PATCH 10/16] fix example test --- datafusion-examples/examples/plan_to_sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index cafd60af70c6..918d71b5e905 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -106,7 +106,7 @@ async fn simple_plan_to_sql_demo() -> Result<()> { assert_eq!( sql, - r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""# + r#"SELECT "?table?".id, "?table?".int_col, "?table?".double_col, "?table?".date_string_col FROM "?table?""# ); Ok(()) From 2eba7173a507e23574b32a9b5754d882f6808fe1 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 21 May 2024 22:07:41 +0800 Subject: [PATCH 11/16] fix example test --- datafusion-examples/examples/plan_to_sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index 918d71b5e905..650dafc17c48 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -145,7 +145,7 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> { let sql = plan_to_sql(df.logical_plan())?.to_string(); assert_eq!( sql, - r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"# + r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col"< alltypes_plain.double_col))"# ); Ok(()) From dc75c2ded6c5be3c74eefc308c7bbb4a640537fc Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 21 May 2024 23:16:58 +0800 Subject: [PATCH 12/16] fix the test fail --- datafusion-examples/examples/plan_to_sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index 650dafc17c48..38354f1422e5 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -145,7 +145,7 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> { let sql = plan_to_sql(df.logical_plan())?.to_string(); assert_eq!( sql, - r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col"< alltypes_plain.double_col))"# + r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col < alltypes_plain.double_col))"# ); Ok(()) From 603d0b43ccc346d83f7d2a5ac1037a039fc01207 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 22 May 2024 13:14:04 +0800 Subject: [PATCH 13/16] remove unused example and modified comments --- datafusion-examples/examples/plan_to_sql.rs | 12 ------------ datafusion/sql/src/unparser/dialect.rs | 5 ++++- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index 38354f1422e5..bd708fe52bc1 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -49,7 +49,6 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser}; async fn main() -> Result<()> { // See how to evaluate expressions simple_expr_to_sql_demo()?; - simple_expr_to_sql_demo_no_escape()?; simple_expr_to_sql_demo_escape_mysql_style()?; simple_plan_to_sql_demo().await?; round_trip_plan_to_sql_demo().await?; @@ -65,17 +64,6 @@ fn simple_expr_to_sql_demo() -> Result<()> { Ok(()) } -/// DataFusion can convert expressions to SQL without escaping column names using -/// using a custom dialect and an explicit unparser -fn simple_expr_to_sql_demo_no_escape() -> Result<()> { - let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8))); - let dialect = CustomDialect::new(None); - let unparser = Unparser::new(&dialect); - let sql = unparser.expr_to_sql(&expr)?.to_string(); - assert_eq!(sql, r#"((a < 5) OR (a = 8))"#); - Ok(()) -} - /// DataFusion can convert expressions to SQL without escaping column names using /// using a custom dialect and an explicit unparser fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> { diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 98677de42314..47db5a762fc3 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,7 +18,10 @@ use regex::Regex; use sqlparser::keywords::ALL_KEYWORDS; -/// Dialect is used to capture dialect specific syntax. +/// `Dialect` to usse for Unparsing +/// +/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`) +/// but this behavior can be overridden as needed /// Note: this trait will eventually be replaced by the Dialect in the SQLparser package /// /// See From 3a9125cf235faab2a783ba84546edd22414797a9 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 22 May 2024 13:28:00 +0800 Subject: [PATCH 14/16] fix typo --- datafusion/sql/src/unparser/dialect.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 47db5a762fc3..7aae3ebd6d36 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,7 +18,7 @@ use regex::Regex; use sqlparser::keywords::ALL_KEYWORDS; -/// `Dialect` to usse for Unparsing +/// `Dialect` to use for Unparsing /// /// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`) /// but this behavior can be overridden as needed From 9a1d05c694a201ed5de1f329a561a3b321f1f682 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 22 May 2024 20:06:32 +0800 Subject: [PATCH 15/16] follow the latest Dialect trait in sqlparser --- datafusion/sql/src/unparser/dialect.rs | 27 +++++++++++++------------- datafusion/sql/src/unparser/expr.rs | 13 +++---------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 7aae3ebd6d36..0cec53b11554 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -26,28 +26,27 @@ use sqlparser::keywords::ALL_KEYWORDS; /// /// See pub trait Dialect { - fn identifier_quote_style(&self) -> Option; - fn identifier_needs_quote(&self, _: &str) -> bool { - true - } + fn identifier_quote_style(&self, _identifier: &str) -> Option; } pub struct DefaultDialect {} impl Dialect for DefaultDialect { - fn identifier_quote_style(&self) -> Option { - Some('"') - } - fn identifier_needs_quote(&self, ident: &str) -> bool { + fn identifier_quote_style(&self, _identifier: &str) -> Option { let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); - ALL_KEYWORDS.contains(&ident.to_uppercase().as_str()) - || !identifier_regex.is_match(ident) + if ALL_KEYWORDS.contains(&_identifier.to_uppercase().as_str()) + || !identifier_regex.is_match(_identifier) + { + Some('"') + } else { + None + } } } pub struct PostgreSqlDialect {} impl Dialect for PostgreSqlDialect { - fn identifier_quote_style(&self) -> Option { + fn identifier_quote_style(&self, _: &str) -> Option { Some('"') } } @@ -55,7 +54,7 @@ impl Dialect for PostgreSqlDialect { pub struct MySqlDialect {} impl Dialect for MySqlDialect { - fn identifier_quote_style(&self) -> Option { + fn identifier_quote_style(&self, _: &str) -> Option { Some('`') } } @@ -63,7 +62,7 @@ impl Dialect for MySqlDialect { pub struct SqliteDialect {} impl Dialect for SqliteDialect { - fn identifier_quote_style(&self) -> Option { + fn identifier_quote_style(&self, _: &str) -> Option { Some('`') } } @@ -81,7 +80,7 @@ impl CustomDialect { } impl Dialect for CustomDialect { - fn identifier_quote_style(&self) -> Option { + fn identifier_quote_style(&self, _: &str) -> Option { self.identifier_quote_style } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 1a854644d68d..5fe744e359a6 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -538,17 +538,10 @@ impl Unparser<'_> { /// This function can create an identifier with or without quotes based on the dialect rules pub(super) fn new_ident_quoted_if_needs(&self, ident: String) -> ast::Ident { - if self.dialect.identifier_needs_quote(&ident) { - self.new_ident(ident) - } else { - self.new_ident_without_quote_style(ident) - } - } - - pub(super) fn new_ident(&self, str: String) -> ast::Ident { + let quote_style = self.dialect.identifier_quote_style(&ident); ast::Ident { - value: str, - quote_style: self.dialect.identifier_quote_style(), + value: ident, + quote_style, } } From 8ed1525caddf87b80133968cd12f24dfd33447a2 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 22 May 2024 20:36:14 +0800 Subject: [PATCH 16/16] fix the parameter name --- datafusion/sql/src/unparser/dialect.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 0cec53b11554..6a0df61ac182 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -31,10 +31,10 @@ pub trait Dialect { pub struct DefaultDialect {} impl Dialect for DefaultDialect { - fn identifier_quote_style(&self, _identifier: &str) -> Option { + fn identifier_quote_style(&self, identifier: &str) -> Option { let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); - if ALL_KEYWORDS.contains(&_identifier.to_uppercase().as_str()) - || !identifier_regex.is_match(_identifier) + if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str()) + || !identifier_regex.is_match(identifier) { Some('"') } else {