From 2ea1c93811a7af6c3dac66980450f86f6f1b7187 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 08:54:15 -0400 Subject: [PATCH 01/13] more impls --- datafusion/sql/Cargo.toml | 1 + datafusion/sql/src/unparser/expr.rs | 287 ++++++++++++++++++++++------ 2 files changed, 225 insertions(+), 63 deletions(-) diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 7739058a5c9d..0ad4521b8b4e 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -39,6 +39,7 @@ unicode_expressions = [] [dependencies] arrow = { workspace = true } arrow-schema = { workspace = true } +chrono = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } log = { workspace = true } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index bb14c8a70739..3f79e67f0539 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -15,15 +15,22 @@ // specific language governing permissions and limitations // under the License. -use datafusion_common::{not_impl_err, Column, Result, ScalarValue}; +use arrow_schema::DataType; +use chrono::{NaiveDate, NaiveDateTime}; +use datafusion_common::{ + not_impl_datafusion_err, not_impl_err, Column, Result, ScalarValue, +}; use datafusion_expr::{ - expr::{Alias, InList, ScalarFunction, WindowFunction}, + expr::{AggregateFunctionDefinition, Alias, InList, ScalarFunction, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, Like, Operator, }; -use sqlparser::ast; +use sqlparser::ast::{self, Function, FunctionArg, Ident}; use super::Unparser; +/// The number of days from day 1 CE (0001-1-1) to Unix Epoch (1970-01-01) +static DAYS_FROM_CE_TO_UNIX_EPOCH: i32 = 719_163; + /// Convert a DataFusion [`Expr`] to `sqlparser::ast::Expr` /// /// This function is the opposite of `SqlToRel::sql_to_expr` and can @@ -70,7 +77,7 @@ impl Unparser<'_> { let r = self.expr_to_sql(right.as_ref())?; let op = self.op_to_sql(op)?; - Ok(self.binary_op_to_sql(l, r, op)) + Ok(ast::Expr::Nested(Box::new(self.binary_op_to_sql(l, r, op)))) } Expr::Case(Case { expr, @@ -79,10 +86,15 @@ impl Unparser<'_> { }) => { not_impl_err!("Unsupported expression: {expr:?}") } - Expr::Cast(Cast { expr, data_type: _ }) => { - not_impl_err!("Unsupported expression: {expr:?}") + Expr::Cast(Cast { expr, data_type }) => { + let inner_expr = self.expr_to_sql(expr)?; + Ok(ast::Expr::Cast { + expr: Box::new(inner_expr), + data_type: self.arrow_dtype_to_ast_dtype(data_type)?, + format: None, + }) } - Expr::Literal(value) => Ok(ast::Expr::Value(self.scalar_to_sql(value)?)), + Expr::Literal(value) => Ok(self.scalar_to_sql(value)?), Expr::Alias(Alias { expr, name: _, .. }) => self.expr_to_sql(expr), Expr::WindowFunction(WindowFunction { fun: _, @@ -103,6 +115,45 @@ impl Unparser<'_> { }) => { not_impl_err!("Unsupported expression: {expr:?}") } + Expr::AggregateFunction(agg) => { + let func_name = if let AggregateFunctionDefinition::BuiltIn(built_in) = + &agg.func_def + { + built_in.name() + } else { + return not_impl_err!( + "Only built in agg functions are supported, got {agg:?}" + ); + }; + + let args = agg + .args + .iter() + .map(|e| { + if matches!(e, Expr::Wildcard { qualifier: None }) { + Ok(FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard)) + } else { + self.expr_to_sql(e).map(|e| { + FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(e)) + }) + } + }) + .collect::>>()?; + + Ok(ast::Expr::Function(Function { + name: ast::ObjectName(vec![Ident { + value: func_name.to_string(), + quote_style: None, + }]), + args, + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + })) + } _ => not_impl_err!("Unsupported expression: {expr:?}"), } } @@ -174,133 +225,243 @@ impl Unparser<'_> { } } - fn scalar_to_sql(&self, v: &ScalarValue) -> Result { + /// DataFusion ScalarValues sometimes require a ast::Expr to construct. + /// For example ScalarValue::Date32(d) corresponds to the ast::Expr CAST('datestr' as DATE) + fn scalar_to_sql(&self, v: &ScalarValue) -> Result { match v { - ScalarValue::Null => Ok(ast::Value::Null), - ScalarValue::Boolean(Some(b)) => Ok(ast::Value::Boolean(b.to_owned())), - ScalarValue::Boolean(None) => Ok(ast::Value::Null), - ScalarValue::Float32(Some(f)) => Ok(ast::Value::Number(f.to_string(), false)), - ScalarValue::Float32(None) => Ok(ast::Value::Null), - ScalarValue::Float64(Some(f)) => Ok(ast::Value::Number(f.to_string(), false)), - ScalarValue::Float64(None) => Ok(ast::Value::Null), + ScalarValue::Null => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Boolean(Some(b)) => { + Ok(ast::Expr::Value(ast::Value::Boolean(b.to_owned()))) + } + ScalarValue::Boolean(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Float32(Some(f)) => { + Ok(ast::Expr::Value(ast::Value::Number(f.to_string(), false))) + } + ScalarValue::Float32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Float64(Some(f)) => { + Ok(ast::Expr::Value(ast::Value::Number(f.to_string(), false))) + } + ScalarValue::Float64(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::Decimal128(Some(_), ..) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::Decimal128(None, ..) => Ok(ast::Value::Null), + ScalarValue::Decimal128(None, ..) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::Decimal256(Some(_), ..) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::Decimal256(None, ..) => Ok(ast::Value::Null), - ScalarValue::Int8(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)), - ScalarValue::Int8(None) => Ok(ast::Value::Null), - ScalarValue::Int16(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)), - ScalarValue::Int16(None) => Ok(ast::Value::Null), - ScalarValue::Int32(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)), - ScalarValue::Int32(None) => Ok(ast::Value::Null), - ScalarValue::Int64(Some(i)) => Ok(ast::Value::Number(i.to_string(), false)), - ScalarValue::Int64(None) => Ok(ast::Value::Null), - ScalarValue::UInt8(Some(ui)) => Ok(ast::Value::Number(ui.to_string(), false)), - ScalarValue::UInt8(None) => Ok(ast::Value::Null), - ScalarValue::UInt16(Some(ui)) => { - Ok(ast::Value::Number(ui.to_string(), false)) + ScalarValue::Decimal256(None, ..) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int8(Some(i)) => { + Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::UInt16(None) => Ok(ast::Value::Null), - ScalarValue::UInt32(Some(ui)) => { - Ok(ast::Value::Number(ui.to_string(), false)) + ScalarValue::Int8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int16(Some(i)) => { + Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::UInt32(None) => Ok(ast::Value::Null), - ScalarValue::UInt64(Some(ui)) => { - Ok(ast::Value::Number(ui.to_string(), false)) + ScalarValue::Int16(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int32(Some(i)) => { + Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::UInt64(None) => Ok(ast::Value::Null), - ScalarValue::Utf8(Some(str)) => { - Ok(ast::Value::SingleQuotedString(str.to_string())) + ScalarValue::Int32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int64(Some(i)) => { + Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::Utf8(None) => Ok(ast::Value::Null), - ScalarValue::LargeUtf8(Some(str)) => { - Ok(ast::Value::SingleQuotedString(str.to_string())) + ScalarValue::Int64(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt8(Some(ui)) => { + Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) } - ScalarValue::LargeUtf8(None) => Ok(ast::Value::Null), + ScalarValue::UInt8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt16(Some(ui)) => { + Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + } + ScalarValue::UInt16(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt32(Some(ui)) => { + Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + } + ScalarValue::UInt32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt64(Some(ui)) => { + Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + } + ScalarValue::UInt64(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Utf8(Some(str)) => Ok(ast::Expr::Value( + ast::Value::SingleQuotedString(str.to_string()), + )), + ScalarValue::Utf8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::LargeUtf8(Some(str)) => Ok(ast::Expr::Value( + ast::Value::SingleQuotedString(str.to_string()), + )), + ScalarValue::LargeUtf8(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::Binary(Some(_)) => not_impl_err!("Unsupported scalar: {v:?}"), - ScalarValue::Binary(None) => Ok(ast::Value::Null), + ScalarValue::Binary(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::FixedSizeBinary(..) => { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::LargeBinary(Some(_)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::LargeBinary(None) => Ok(ast::Value::Null), + ScalarValue::LargeBinary(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::FixedSizeList(_a) => not_impl_err!("Unsupported scalar: {v:?}"), ScalarValue::List(_a) => not_impl_err!("Unsupported scalar: {v:?}"), ScalarValue::LargeList(_a) => not_impl_err!("Unsupported scalar: {v:?}"), - ScalarValue::Date32(Some(_d)) => not_impl_err!("Unsupported scalar: {v:?}"), - ScalarValue::Date32(None) => Ok(ast::Value::Null), - ScalarValue::Date64(Some(_d)) => not_impl_err!("Unsupported scalar: {v:?}"), - ScalarValue::Date64(None) => Ok(ast::Value::Null), + ScalarValue::Date32(Some(d)) => { + let date = + NaiveDate::from_num_days_from_ce_opt(d + DAYS_FROM_CE_TO_UNIX_EPOCH) + .ok_or(not_impl_datafusion_err!( + "Date overflow error for {d:?}" + ))?; + Ok(ast::Expr::Cast { + expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( + date.to_string(), + ))), + data_type: ast::DataType::Date, + format: None, + }) + } + ScalarValue::Date32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Date64(Some(ms)) => { + let datetime = NaiveDateTime::from_timestamp_millis(*ms).ok_or( + not_impl_datafusion_err!("Datetime overflow error for {ms:?}"), + )?; + Ok(ast::Expr::Cast { + expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( + datetime.to_string(), + ))), + data_type: ast::DataType::Datetime(None), + format: None, + }) + } + ScalarValue::Date64(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::Time32Second(Some(_t)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::Time32Second(None) => Ok(ast::Value::Null), + ScalarValue::Time32Second(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::Time32Millisecond(Some(_t)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::Time32Millisecond(None) => Ok(ast::Value::Null), + ScalarValue::Time32Millisecond(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::Time64Microsecond(Some(_t)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::Time64Microsecond(None) => Ok(ast::Value::Null), + ScalarValue::Time64Microsecond(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::Time64Nanosecond(Some(_t)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::Time64Nanosecond(None) => Ok(ast::Value::Null), + ScalarValue::Time64Nanosecond(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::TimestampSecond(Some(_ts), _) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::TimestampSecond(None, _) => Ok(ast::Value::Null), + ScalarValue::TimestampSecond(None, _) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::TimestampMillisecond(Some(_ts), _) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::TimestampMillisecond(None, _) => Ok(ast::Value::Null), + ScalarValue::TimestampMillisecond(None, _) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::TimestampMicrosecond(Some(_ts), _) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::TimestampMicrosecond(None, _) => Ok(ast::Value::Null), + ScalarValue::TimestampMicrosecond(None, _) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::TimestampNanosecond(Some(_ts), _) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::TimestampNanosecond(None, _) => Ok(ast::Value::Null), + ScalarValue::TimestampNanosecond(None, _) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::IntervalYearMonth(Some(_i)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::IntervalYearMonth(None) => Ok(ast::Value::Null), + ScalarValue::IntervalYearMonth(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::IntervalDayTime(Some(_i)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::IntervalDayTime(None) => Ok(ast::Value::Null), + ScalarValue::IntervalDayTime(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::IntervalMonthDayNano(Some(_i)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::IntervalMonthDayNano(None) => Ok(ast::Value::Null), + ScalarValue::IntervalMonthDayNano(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::DurationSecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::DurationSecond(None) => Ok(ast::Value::Null), + ScalarValue::DurationSecond(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::DurationMillisecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::DurationMillisecond(None) => Ok(ast::Value::Null), + ScalarValue::DurationMillisecond(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::DurationMicrosecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::DurationMicrosecond(None) => Ok(ast::Value::Null), + ScalarValue::DurationMicrosecond(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::DurationNanosecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::DurationNanosecond(None) => Ok(ast::Value::Null), + ScalarValue::DurationNanosecond(None) => { + Ok(ast::Expr::Value(ast::Value::Null)) + } ScalarValue::Struct(_) => not_impl_err!("Unsupported scalar: {v:?}"), ScalarValue::Dictionary(..) => not_impl_err!("Unsupported scalar: {v:?}"), } } + + fn arrow_dtype_to_ast_dtype(&self, data_type: &DataType) -> Result { + match data_type { + DataType::Null => { + not_impl_err!("Unsupported DataType: conversion: {data_type:?}") + } + DataType::Boolean => Ok(ast::DataType::Bool), + DataType::Int8 => Ok(ast::DataType::TinyInt(None)), + DataType::Int16 => Ok(ast::DataType::SmallInt(None)), + DataType::Int32 => Ok(ast::DataType::Integer(None)), + DataType::Int64 => Ok(ast::DataType::BigInt(None)), + DataType::UInt8 => Ok(ast::DataType::UnsignedTinyInt(None)), + DataType::UInt16 => Ok(ast::DataType::UnsignedSmallInt(None)), + DataType::UInt32 => Ok(ast::DataType::UnsignedInteger(None)), + DataType::UInt64 => Ok(ast::DataType::UnsignedBigInt(None)), + DataType::Float16 => { + not_impl_err!("Unsupported DataType: conversion: {data_type:?}") + } + DataType::Float32 => Ok(ast::DataType::Float(None)), + DataType::Float64 => Ok(ast::DataType::Double), + DataType::Timestamp(_, _) => { + not_impl_err!("Unsupported DataType: conversion: {data_type:?}") + } + DataType::Date32 => Ok(ast::DataType::Date), + DataType::Date64 => Ok(ast::DataType::Datetime(None)), + DataType::Time32(_) => todo!(), + DataType::Time64(_) => todo!(), + DataType::Duration(_) => todo!(), + DataType::Interval(_) => todo!(), + DataType::Binary => todo!(), + DataType::FixedSizeBinary(_) => todo!(), + DataType::LargeBinary => todo!(), + DataType::Utf8 => todo!(), + DataType::LargeUtf8 => todo!(), + DataType::List(_) => todo!(), + DataType::FixedSizeList(_, _) => todo!(), + DataType::LargeList(_) => todo!(), + DataType::Struct(_) => todo!(), + DataType::Union(_, _) => todo!(), + DataType::Dictionary(_, _) => todo!(), + DataType::Decimal128(_, _) => todo!(), + DataType::Decimal256(_, _) => todo!(), + DataType::Map(_, _) => todo!(), + DataType::RunEndEncoded(_, _) => todo!(), + } + } } #[cfg(test)] From 05f1a7136114013148b75941f37e1e8a5b2379d9 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 09:02:30 -0400 Subject: [PATCH 02/13] fix tests --- datafusion/sql/src/unparser/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 3f79e67f0539..99c3931eb65f 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -476,14 +476,14 @@ mod tests { #[test] fn expr_to_sql_ok() -> Result<()> { let tests: Vec<(Expr, &str)> = vec![ - (col("a").gt(lit(4)), r#"a > 4"#), + (col("a").gt(lit(4)), r#"(a > 4)"#), ( Expr::Column(Column { relation: Some(TableReference::partial("a", "b")), name: "c".to_string(), }) .gt(lit(4)), - r#"a.b.c > 4"#, + r#"(a.b.c > 4)"#, ), ]; From aedb46de6331fce52b0ff1b8f757ec680f67d8d0 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 09:34:45 -0400 Subject: [PATCH 03/13] cargo update dfcli --- datafusion-cli/Cargo.lock | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 9afd78d1cc88..984dd93088db 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -754,9 +754,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" dependencies = [ "arrayref", "arrayvec", @@ -1362,6 +1362,7 @@ version = "36.0.0" dependencies = [ "arrow", "arrow-schema", + "chrono", "datafusion-common", "datafusion-expr", "log", @@ -2644,9 +2645,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -3383,18 +3384,18 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", From 15f9a05ce82165a702cd90bbbb8d804e21870e15 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 09:36:30 -0400 Subject: [PATCH 04/13] fix custom_dialect test --- datafusion/sql/src/unparser/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 99c3931eb65f..e893b1991fee 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -508,7 +508,7 @@ mod tests { let actual = format!("{}", ast); - let expected = r#"'a' > 4"#; + let expected = r#"('a' > 4)"#; assert_eq!(actual, expected); Ok(()) From 48c33f52dfb2c636f57a716417fd99a8a3d97b79 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 20:25:39 -0400 Subject: [PATCH 05/13] add tests and feature flag --- datafusion/sql/Cargo.toml | 5 +- datafusion/sql/src/lib.rs | 1 + datafusion/sql/src/unparser/expr.rs | 74 +++++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 7 deletions(-) diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 0ad4521b8b4e..4bcbbca93cd9 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -33,13 +33,14 @@ name = "datafusion_sql" path = "src/lib.rs" [features] -default = ["unicode_expressions"] +default = ["unicode_expressions", "unparser"] unicode_expressions = [] +unparser = ["dep:chrono"] [dependencies] arrow = { workspace = true } arrow-schema = { workspace = true } -chrono = { workspace = true } +chrono = { workspace = true , optional=true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } log = { workspace = true } diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs index da66ee197adb..e8e07eebe22d 100644 --- a/datafusion/sql/src/lib.rs +++ b/datafusion/sql/src/lib.rs @@ -36,6 +36,7 @@ mod relation; mod select; mod set_expr; mod statement; +#[cfg(feature = "unparser")] pub mod unparser; pub mod utils; mod values; diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index e893b1991fee..5a17de263d32 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -16,7 +16,7 @@ // under the License. use arrow_schema::DataType; -use chrono::{NaiveDate, NaiveDateTime}; +use chrono::{DateTime, NaiveDate}; use datafusion_common::{ not_impl_datafusion_err, not_impl_err, Column, Result, ScalarValue, }; @@ -43,7 +43,7 @@ static DAYS_FROM_CE_TO_UNIX_EPOCH: i32 = 719_163; /// let expr = col("a").gt(lit(4)); /// let sql = expr_to_sql(&expr).unwrap(); /// -/// assert_eq!(format!("{}", sql), "a > 4") +/// assert_eq!(format!("{}", sql), "(a > 4)") /// ``` pub fn expr_to_sql(expr: &Expr) -> Result { let unparser = Unparser::default(); @@ -318,7 +318,7 @@ impl Unparser<'_> { } ScalarValue::Date32(None) => Ok(ast::Expr::Value(ast::Value::Null)), ScalarValue::Date64(Some(ms)) => { - let datetime = NaiveDateTime::from_timestamp_millis(*ms).ok_or( + let datetime = DateTime::from_timestamp_millis(*ms).ok_or( not_impl_datafusion_err!("Datetime overflow error for {ms:?}"), )?; Ok(ast::Expr::Cast { @@ -467,7 +467,7 @@ impl Unparser<'_> { #[cfg(test)] mod tests { use datafusion_common::TableReference; - use datafusion_expr::{col, lit}; + use datafusion_expr::{col, expr::AggregateFunction, lit}; use crate::unparser::dialect::CustomDialect; @@ -476,7 +476,7 @@ mod tests { #[test] fn expr_to_sql_ok() -> Result<()> { let tests: Vec<(Expr, &str)> = vec![ - (col("a").gt(lit(4)), r#"(a > 4)"#), + ((col("a") + col("b")).gt(lit(4)), r#"((a + b) > 4)"#), ( Expr::Column(Column { relation: Some(TableReference::partial("a", "b")), @@ -485,6 +485,70 @@ mod tests { .gt(lit(4)), r#"(a.b.c > 4)"#, ), + ( + Expr::Cast(Cast { + expr: Box::new(col("a")), + data_type: DataType::Date64, + }), + r#"CAST(a AS DATETIME)"#, + ), + ( + Expr::Cast(Cast { + expr: Box::new(col("a")), + data_type: DataType::UInt32, + }), + r#"CAST(a AS INTEGER UNSIGNED)"#, + ), + ( + Expr::Literal(ScalarValue::Date64(Some(0))), + r#"CAST('1970-01-01 00:00:00 UTC' AS DATETIME)"#, + ), + ( + Expr::Literal(ScalarValue::Date64(Some(10000))), + r#"CAST('1970-01-01 00:00:10 UTC' AS DATETIME)"#, + ), + ( + Expr::Literal(ScalarValue::Date64(Some(-10000))), + r#"CAST('1969-12-31 23:59:50 UTC' AS DATETIME)"#, + ), + ( + Expr::Literal(ScalarValue::Date32(Some(0))), + r#"CAST('1970-01-01' AS DATE)"#, + ), + ( + Expr::Literal(ScalarValue::Date32(Some(10))), + r#"CAST('1970-01-11' AS DATE)"#, + ), + ( + Expr::Literal(ScalarValue::Date32(Some(-1))), + r#"CAST('1969-12-31' AS DATE)"#, + ), + ( + Expr::AggregateFunction(AggregateFunction { + func_def: AggregateFunctionDefinition::BuiltIn( + datafusion_expr::AggregateFunction::Sum, + ), + args: vec![col("a")], + distinct: false, + filter: None, + order_by: None, + null_treatment: None, + }), + "SUM(a)", + ), + ( + Expr::AggregateFunction(AggregateFunction { + func_def: AggregateFunctionDefinition::BuiltIn( + datafusion_expr::AggregateFunction::Count, + ), + args: vec![Expr::Wildcard { qualifier: None }], + distinct: true, + filter: None, + order_by: None, + null_treatment: None, + }), + "COUNT(*)", + ), ]; for (expr, expected) in tests { From b18ae76566e3b5ec341360684910d04d643f26ff Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 20:37:49 -0400 Subject: [PATCH 06/13] fix comment --- datafusion/sql/src/unparser/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 5a17de263d32..65cd9ff7039b 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -28,7 +28,7 @@ use sqlparser::ast::{self, Function, FunctionArg, Ident}; use super::Unparser; -/// The number of days from day 1 CE (0001-1-1) to Unix Epoch (1970-01-01) +/// The number of days from day 1 CE (0001-01-01) to Unix Epoch (1970-01-01) static DAYS_FROM_CE_TO_UNIX_EPOCH: i32 = 719_163; /// Convert a DataFusion [`Expr`] to `sqlparser::ast::Expr` From b8bcd273de5a0d623540422ffc5130f1ed22e87c Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 20:58:12 -0400 Subject: [PATCH 07/13] remove chrono use arrow-array conversions --- datafusion/sql/Cargo.toml | 4 +-- datafusion/sql/src/unparser/expr.rs | 49 ++++++++++++++++++----------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 4bcbbca93cd9..ca2c1a240c21 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -35,12 +35,12 @@ path = "src/lib.rs" [features] default = ["unicode_expressions", "unparser"] unicode_expressions = [] -unparser = ["dep:chrono"] +unparser = [] [dependencies] arrow = { workspace = true } +arrow-array = { workspace = true } arrow-schema = { workspace = true } -chrono = { workspace = true , optional=true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } log = { workspace = true } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 65cd9ff7039b..9965b1c0d199 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. +use arrow_array::{Date32Array, Date64Array}; use arrow_schema::DataType; -use chrono::{DateTime, NaiveDate}; use datafusion_common::{ - not_impl_datafusion_err, not_impl_err, Column, Result, ScalarValue, + internal_datafusion_err, not_impl_err, Column, Result, ScalarValue, }; use datafusion_expr::{ expr::{AggregateFunctionDefinition, Alias, InList, ScalarFunction, WindowFunction}, @@ -28,9 +28,6 @@ use sqlparser::ast::{self, Function, FunctionArg, Ident}; use super::Unparser; -/// The number of days from day 1 CE (0001-01-01) to Unix Epoch (1970-01-01) -static DAYS_FROM_CE_TO_UNIX_EPOCH: i32 = 719_163; - /// Convert a DataFusion [`Expr`] to `sqlparser::ast::Expr` /// /// This function is the opposite of `SqlToRel::sql_to_expr` and can @@ -302,12 +299,19 @@ impl Unparser<'_> { ScalarValue::FixedSizeList(_a) => not_impl_err!("Unsupported scalar: {v:?}"), ScalarValue::List(_a) => not_impl_err!("Unsupported scalar: {v:?}"), ScalarValue::LargeList(_a) => not_impl_err!("Unsupported scalar: {v:?}"), - ScalarValue::Date32(Some(d)) => { - let date = - NaiveDate::from_num_days_from_ce_opt(d + DAYS_FROM_CE_TO_UNIX_EPOCH) - .ok_or(not_impl_datafusion_err!( - "Date overflow error for {d:?}" - ))?; + ScalarValue::Date32(Some(_)) => { + let date = v + .to_array()? + .as_any() + .downcast_ref::() + .ok_or(internal_datafusion_err!( + "Unable to downcast to Date32 from Date32 scalar" + ))? + .value_as_date(0) + .ok_or(internal_datafusion_err!( + "Unable to convert Date32 to NaiveDate" + ))?; + Ok(ast::Expr::Cast { expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( date.to_string(), @@ -317,10 +321,19 @@ impl Unparser<'_> { }) } ScalarValue::Date32(None) => Ok(ast::Expr::Value(ast::Value::Null)), - ScalarValue::Date64(Some(ms)) => { - let datetime = DateTime::from_timestamp_millis(*ms).ok_or( - not_impl_datafusion_err!("Datetime overflow error for {ms:?}"), - )?; + ScalarValue::Date64(Some(_)) => { + let datetime = v + .to_array()? + .as_any() + .downcast_ref::() + .ok_or(internal_datafusion_err!( + "Unable to downcast to Date64 from Date64 scalar" + ))? + .value_as_datetime(0) + .ok_or(internal_datafusion_err!( + "Unable to convert Date64 to NaiveDateTime" + ))?; + Ok(ast::Expr::Cast { expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( datetime.to_string(), @@ -501,15 +514,15 @@ mod tests { ), ( Expr::Literal(ScalarValue::Date64(Some(0))), - r#"CAST('1970-01-01 00:00:00 UTC' AS DATETIME)"#, + r#"CAST('1970-01-01 00:00:00' AS DATETIME)"#, ), ( Expr::Literal(ScalarValue::Date64(Some(10000))), - r#"CAST('1970-01-01 00:00:10 UTC' AS DATETIME)"#, + r#"CAST('1970-01-01 00:00:10' AS DATETIME)"#, ), ( Expr::Literal(ScalarValue::Date64(Some(-10000))), - r#"CAST('1969-12-31 23:59:50 UTC' AS DATETIME)"#, + r#"CAST('1969-12-31 23:59:50' AS DATETIME)"#, ), ( Expr::Literal(ScalarValue::Date32(Some(0))), From 937e3c8c347b2b0636431ba9599b111851b760e1 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 21:07:39 -0400 Subject: [PATCH 08/13] fix cargo lock again --- datafusion-cli/Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 984dd93088db..1a7f00a8e2ce 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1361,8 +1361,8 @@ name = "datafusion-sql" version = "36.0.0" dependencies = [ "arrow", + "arrow-array", "arrow-schema", - "chrono", "datafusion-common", "datafusion-expr", "log", @@ -2774,9 +2774,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "reqwest" -version = "0.11.25" +version = "0.11.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eea5a9eb898d3783f17c6407670e3592fd174cb81a10e51d4c37f49450b9946" +checksum = "78bf93c4af7a8bb7d879d51cebe797356ff10ae8516ace542b5182d9dcac10b2" dependencies = [ "base64 0.21.7", "bytes", @@ -3330,20 +3330,20 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "system-configuration" -version = "0.6.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658bc6ee10a9b4fcf576e9b0819d95ec16f4d2c02d39fd83ac1c8789785c4a42" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" dependencies = [ - "bitflags 2.4.2", + "bitflags 1.3.2", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.6.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" dependencies = [ "core-foundation-sys", "libc", From 4b7d5f5998fb621610786ee72ffa5e3dc6d53f8a Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 21:17:44 -0400 Subject: [PATCH 09/13] fix count distinct --- datafusion/sql/src/unparser/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 9965b1c0d199..69987fc55c9c 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -146,7 +146,7 @@ impl Unparser<'_> { filter: None, null_treatment: None, over: None, - distinct: false, + distinct: agg.distinct, special: false, order_by: vec![], })) @@ -560,7 +560,7 @@ mod tests { order_by: None, null_treatment: None, }), - "COUNT(*)", + "COUNT(DISTINCT *)", ), ]; From 0cda50c324a3a1700709c7f30942cf5f0b1e4e62 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Tue, 12 Mar 2024 21:23:36 -0400 Subject: [PATCH 10/13] retry windows ci From a7e4be6a96b40852c31a6a5aa2028e352cb623fc Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Wed, 13 Mar 2024 08:14:16 -0400 Subject: [PATCH 11/13] retry windows ci again From a2e8b6189f80e723d95782b8a89fcfae797367fa Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Wed, 13 Mar 2024 19:20:11 -0400 Subject: [PATCH 12/13] add roundtrip tests --- README.md | 1 + datafusion/sql/src/unparser/expr.rs | 4 ++-- datafusion/sql/tests/sql_integration.rs | 17 ++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e5ac9503be44..abd727672aca 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ Default features: - `parquet`: support for reading the [Apache Parquet] format - `regex_expressions`: regular expression functions, such as `regexp_match` - `unicode_expressions`: Include unicode aware functions such as `character_length` +- `unparser` : enables support to reverse LogicalPlans back into SQL Optional features: diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 0edcf1a3e3ee..403a7c6193d0 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -461,8 +461,8 @@ impl Unparser<'_> { DataType::Binary => todo!(), DataType::FixedSizeBinary(_) => todo!(), DataType::LargeBinary => todo!(), - DataType::Utf8 => todo!(), - DataType::LargeUtf8 => todo!(), + DataType::Utf8 => Ok(ast::DataType::Varchar(None)), + DataType::LargeUtf8 => Ok(ast::DataType::Text), DataType::List(_) => todo!(), DataType::FixedSizeList(_, _) => todo!(), DataType::LargeList(_) => todo!(), diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index fdf7ab8c3d28..396eebfdd767 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4493,8 +4493,11 @@ impl TableSource for EmptyTable { #[test] fn roundtrip_expr() { let tests: Vec<(TableReference, &str, &str)> = vec![ - (TableReference::bare("person"), "age > 35", "age > 35"), - (TableReference::bare("person"), "id = '10'", "id = '10'"), + (TableReference::bare("person"), "age > 35", "(age > 35)"), + (TableReference::bare("person"), "id = '10'", "(id = '10')"), + (TableReference::bare("person"), "CAST(id AS VARCHAR)", "CAST(id AS VARCHAR)"), + (TableReference::bare("person"), "SUM((age * 2))", "SUM((age * 2))"), + ]; let roundtrip = |table, sql: &str| -> Result { @@ -4540,15 +4543,15 @@ fn roundtrip_statement() { ), ( "select ta.j1_id from j1 ta where ta.j1_id > 1;", - r#"SELECT ta.j1_id FROM j1 AS ta WHERE ta.j1_id > 1"#, + r#"SELECT ta.j1_id FROM j1 AS ta WHERE (ta.j1_id > 1)"#, ), ( - "select ta.j1_id, tb.j2_string from j1 ta join j2 tb on ta.j1_id = tb.j2_id;", - r#"SELECT ta.j1_id, tb.j2_string FROM j1 AS ta JOIN j2 AS tb ON ta.j1_id = tb.j2_id"#, + "select ta.j1_id, tb.j2_string from j1 ta join j2 tb on (ta.j1_id = tb.j2_id);", + r#"SELECT ta.j1_id, tb.j2_string FROM j1 AS ta JOIN j2 AS tb ON (ta.j1_id = tb.j2_id)"#, ), ( - "select ta.j1_id, tb.j2_string, tc.j3_string from j1 ta join j2 tb on ta.j1_id = tb.j2_id join j3 tc on ta.j1_id = tc.j3_id;", - r#"SELECT ta.j1_id, tb.j2_string, tc.j3_string FROM j1 AS ta JOIN j2 AS tb ON ta.j1_id = tb.j2_id JOIN j3 AS tc ON ta.j1_id = tc.j3_id"#, + "select ta.j1_id, tb.j2_string, tc.j3_string from j1 ta join j2 tb on (ta.j1_id = tb.j2_id) join j3 tc on (ta.j1_id = tc.j3_id);", + r#"SELECT ta.j1_id, tb.j2_string, tc.j3_string FROM j1 AS ta JOIN j2 AS tb ON (ta.j1_id = tb.j2_id) JOIN j3 AS tc ON (ta.j1_id = tc.j3_id)"#, ), ]; From 9564e8c7229ff96947109160bf80616aed080a8f Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Wed, 13 Mar 2024 19:30:30 -0400 Subject: [PATCH 13/13] cargo fmt --- datafusion/sql/tests/sql_integration.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 396eebfdd767..a6ea22db9651 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4495,9 +4495,16 @@ fn roundtrip_expr() { let tests: Vec<(TableReference, &str, &str)> = vec![ (TableReference::bare("person"), "age > 35", "(age > 35)"), (TableReference::bare("person"), "id = '10'", "(id = '10')"), - (TableReference::bare("person"), "CAST(id AS VARCHAR)", "CAST(id AS VARCHAR)"), - (TableReference::bare("person"), "SUM((age * 2))", "SUM((age * 2))"), - + ( + TableReference::bare("person"), + "CAST(id AS VARCHAR)", + "CAST(id AS VARCHAR)", + ), + ( + TableReference::bare("person"), + "SUM((age * 2))", + "SUM((age * 2))", + ), ]; let roundtrip = |table, sql: &str| -> Result {