From c68e9775a22acf00e54b33542b10ac6d1a8cf887 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 20:33:12 +0200 Subject: [PATCH 1/6] Support bigquery `CAST AS x [STRING|DATE] FORMAT` syntax (#978) --- src/ast/mod.rs | 64 ++++++++++++++++++++++++++++++++++-- src/parser/mod.rs | 23 +++++++++++++ tests/sqlparser_bigquery.rs | 35 ++++++++++++++++++-- tests/sqlparser_common.rs | 10 ++++++ tests/sqlparser_postgres.rs | 3 +- tests/sqlparser_snowflake.rs | 1 + 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 87f7ebb37..fc15efbc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator { } } +/// Options for `CAST` / `TRY_CAST` +/// BigQuery: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CastFormat { + Value(Value), + ValueAtTimeZone(Value, Value), +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -437,12 +447,18 @@ pub enum Expr { Cast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` // this differs from CAST in the choice of how to implement invalid conversions TryCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting @@ -450,6 +466,9 @@ pub enum Expr { SafeCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { @@ -597,6 +616,15 @@ pub enum Expr { }, } +impl fmt::Display for CastFormat { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CastFormat::Value(v) => write!(f, "{v}"), + CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"), + } + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -753,9 +781,39 @@ impl fmt::Display for Expr { write!(f, "{op}{expr}") } } - Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"), - Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"), - Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"), + Expr::Cast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "CAST({expr} AS {data_type})") + } + } + Expr::TryCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "TRY_CAST({expr} AS {data_type})") + } + } + Expr::SafeCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "SAFE_CAST({expr} AS {data_type})") + } + } Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 95f1f8edc..829b299af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> { }) } + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + Ok(Some(CastFormat::ValueAtTimeZone( + value, + self.parse_value()?, + ))) + } else { + Ok(Some(CastFormat::Value(value))) + } + } else { + Ok(None) + } + } + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { expr: Box::new(expr), data_type, + format, }) } @@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::TryCast { expr: Box::new(expr), data_type, + format, }) } @@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::SafeCast { expr: Box::new(expr), data_type, + format, }) } @@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> { Ok(Expr::Cast { expr: Box::new(expr), data_type: self.parse_data_type()?, + format: None, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7a9a8d1c4..b3f683b9a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -304,8 +304,39 @@ fn parse_trailing_comma() { #[test] fn parse_cast_type() { - let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#; - bigquery().verified_only_select(sql); + let sql = r"SELECT SAFE_CAST(1 AS INT64)"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_date_format() { + let sql = + r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_time_format() { + let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_timestamp_format_tz() { + let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_string_to_bytes_format() { + let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_bytes_to_string_format() { + let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string"; + bigquery_and_generic().verified_only_select(sql); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1511aa76e..ff8bdd7a4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1934,6 +1934,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1944,6 +1945,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1970,6 +1972,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Nvarchar(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1980,6 +1983,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1990,6 +1994,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2000,6 +2005,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2010,6 +2016,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2020,6 +2027,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2030,6 +2038,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2043,6 +2052,7 @@ fn parse_try_cast() { &Expr::TryCast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe336bda7..654723668 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1782,7 +1782,8 @@ fn parse_array_index_expr() { })), data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new( DataType::Int(None) - )))))) + )))))), + format: None, }))), indexes: vec![num[1].clone(), num[2].clone()], }, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e92656d0b..bb988665d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -167,6 +167,7 @@ fn parse_array() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(None), + format: None, }, expr_from_projection(only(&select.projection)) ); From 88510f662563786a6e3af6b1ed109444bcd332e7 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 21:49:18 +0200 Subject: [PATCH 2/6] fix column `COLLATE` not displayed (#1012) --- src/ast/ddl.rs | 3 +++ tests/sqlparser_common.rs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index a4640d557..f1575d979 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -517,6 +517,9 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {}", self.name, self.data_type)?; + if let Some(collation) = &self.collation { + write!(f, " COLLATE {collation}")?; + } for option in &self.options { write!(f, " {option}")?; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ff8bdd7a4..3b8775e45 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7635,3 +7635,8 @@ fn parse_create_type() { create_type ); } + +#[test] +fn parse_create_table_collate() { + pg_and_generic().verified_stmt("CREATE TABLE tbl (foo INT, bar TEXT COLLATE \"de_DE\")"); +} From c03586b727a659bb6d22d77910f4d4e9b9d9688c Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Fri, 20 Oct 2023 22:13:22 +0200 Subject: [PATCH 3/6] Support mysql `RLIKE` and `REGEXP` binary operators (#1017) --- src/ast/mod.rs | 21 +++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 19 +++++++++++++++++-- src/test_utils.rs | 2 +- tests/sqlparser_mysql.rs | 12 ++++++++++++ 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fc15efbc4..3b0030017 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -429,6 +429,14 @@ pub enum Expr { pattern: Box, escape_char: Option, }, + /// MySQL: RLIKE regex or REGEXP regex + RLike { + negated: bool, + expr: Box, + pattern: Box, + // true for REGEXP, false for RLIKE (no difference in semantics) + regexp: bool, + }, /// Any operation e.g. `foo > ANY(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] AnyOp { left: Box, @@ -740,6 +748,19 @@ impl fmt::Display for Expr { pattern ), }, + Expr::RLike { + negated, + expr, + pattern, + regexp, + } => write!( + f, + "{} {}{} {}", + expr, + if *negated { "NOT " } else { "" }, + if *regexp { "REGEXP" } else { "RLIKE" }, + pattern + ), Expr::SimilarTo { negated, expr, diff --git a/src/keywords.rs b/src/keywords.rs index e1bbf44ae..6327ccc84 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -498,6 +498,7 @@ define_keywords!( REFERENCES, REFERENCING, REGCLASS, + REGEXP, REGR_AVGX, REGR_AVGY, REGR_COUNT, @@ -524,6 +525,7 @@ define_keywords!( RETURNS, REVOKE, RIGHT, + RLIKE, ROLE, ROLLBACK, ROLLUP, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 829b299af..0065f7987 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1932,10 +1932,21 @@ impl<'a> Parser<'a> { | Keyword::BETWEEN | Keyword::LIKE | Keyword::ILIKE - | Keyword::SIMILAR => { + | Keyword::SIMILAR + | Keyword::REGEXP + | Keyword::RLIKE => { self.prev_token(); let negated = self.parse_keyword(Keyword::NOT); - if self.parse_keyword(Keyword::IN) { + let regexp = self.parse_keyword(Keyword::REGEXP); + let rlike = self.parse_keyword(Keyword::RLIKE); + if regexp || rlike { + Ok(Expr::RLike { + negated, + expr: Box::new(expr), + pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + regexp, + }) + } else if self.parse_keyword(Keyword::IN) { self.parse_in(expr, negated) } else if self.parse_keyword(Keyword::BETWEEN) { self.parse_between(expr, negated) @@ -2178,6 +2189,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), _ => Ok(0), }, @@ -2186,6 +2199,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), diff --git a/src/test_utils.rs b/src/test_utils.rs index f0c5e425a..76a3e073b 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -111,7 +111,7 @@ impl TestedDialects { /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { - let mut statements = self.parse_sql_statements(sql).unwrap(); + let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); if !canonical.is_empty() && sql != canonical { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80b9dcfd8..6e59198d7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1454,6 +1454,18 @@ fn parse_show_variables() { mysql_and_generic().verified_stmt("SHOW VARIABLES WHERE value = '3306'"); } +#[test] +fn parse_rlike_and_regexp() { + for s in &[ + "SELECT 1 WHERE 'a' RLIKE '^a$'", + "SELECT 1 WHERE 'a' REGEXP '^a$'", + "SELECT 1 WHERE 'a' NOT RLIKE '^a$'", + "SELECT 1 WHERE 'a' NOT REGEXP '^a$'", + ] { + mysql_and_generic().verified_only_select(s); + } +} + #[test] fn parse_kill() { let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5"); From 5c10668dbb60bccaf11f224013d333a48e32ec38 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Tue, 24 Oct 2023 01:37:31 +0400 Subject: [PATCH 4/6] Add support for UNION DISTINCT BY NAME syntax (#997) Co-authored-by: Andrew Lamb --- src/ast/query.rs | 5 +- src/parser/mod.rs | 4 +- tests/sqlparser_duckdb.rs | 232 ++++++++++++++------------------------ 3 files changed, 89 insertions(+), 152 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 88b0931de..824fab1ba 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -120,7 +120,8 @@ impl fmt::Display for SetExpr { SetQuantifier::All | SetQuantifier::Distinct | SetQuantifier::ByName - | SetQuantifier::AllByName => write!(f, " {set_quantifier}")?, + | SetQuantifier::AllByName + | SetQuantifier::DistinctByName => write!(f, " {set_quantifier}")?, SetQuantifier::None => write!(f, "{set_quantifier}")?, } write!(f, " {right}")?; @@ -160,6 +161,7 @@ pub enum SetQuantifier { Distinct, ByName, AllByName, + DistinctByName, None, } @@ -170,6 +172,7 @@ impl fmt::Display for SetQuantifier { SetQuantifier::Distinct => write!(f, "DISTINCT"), SetQuantifier::ByName => write!(f, "BY NAME"), SetQuantifier::AllByName => write!(f, "ALL BY NAME"), + SetQuantifier::DistinctByName => write!(f, "DISTINCT BY NAME"), SetQuantifier::None => write!(f, ""), } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0065f7987..68a8cef1f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5696,7 +5696,9 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some(SetOperator::Union) => { - if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { SetQuantifier::ByName } else if self.parse_keyword(Keyword::ALL) { if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index b05cc0dd4..db11d1e77 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -132,155 +132,87 @@ fn test_create_table_macro() { #[test] fn test_select_union_by_name() { - let ast = duckdb().verified_query("SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::ByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - - assert_eq!(ast.body, expected); + let q1 = "SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"; + let q2 = "SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"; + let q3 = "SELECT * FROM capitals UNION DISTINCT BY NAME SELECT * FROM weather"; - let ast = - duckdb().verified_query("SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::AllByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - assert_eq!(ast.body, expected); + for (ast, expected_quantifier) in &[ + (duckdb().verified_query(q1), SetQuantifier::ByName), + (duckdb().verified_query(q2), SetQuantifier::AllByName), + (duckdb().verified_query(q3), SetQuantifier::DistinctByName), + ] { + let expected = Box::::new(SetExpr::SetOperation { + op: SetOperator::Union, + set_quantifier: *expected_quantifier, + left: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + right: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + }); + assert_eq!(ast.body, expected); + } } From 56f24ce2361bb2f9ee9d7566c3b1ce256ee02d8b Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 23 Oct 2023 14:50:45 -0700 Subject: [PATCH 5/6] Support subquery as function arg w/o parens in Snowflake dialect (#996) --- src/parser/mod.rs | 20 +++++++++++++++++++- tests/sqlparser_snowflake.rs | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 68a8cef1f..1c1d8b23e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1507,7 +1507,7 @@ impl<'a> Parser<'a> { within_group: false, })); } - // Snowflake defines ORDERY BY in within group instead of inside the function like + // Snowflake defines ORDER BY in within group instead of inside the function like // ANSI SQL. self.expect_token(&Token::RParen)?; let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { @@ -6914,6 +6914,24 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::RParen) { Ok((vec![], vec![])) } else { + // Snowflake permits a subquery to be passed as an argument without + // an enclosing set of parens if it's the only argument. + if dialect_of!(self is SnowflakeDialect) + && self + .parse_one_of_keywords(&[Keyword::WITH, Keyword::SELECT]) + .is_some() + { + self.prev_token(); + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + return Ok(( + vec![FunctionArg::Unnamed(FunctionArgExpr::from( + WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))), + ))], + vec![], + )); + } + let args = self.parse_comma_separated(Parser::parse_function_args)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { self.parse_comma_separated(Parser::parse_order_by_expr)? diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index bb988665d..79c9eb1ea 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1065,3 +1065,23 @@ fn test_snowflake_trim() { snowflake().parse_sql_statements(error_sql).unwrap_err() ); } + +#[test] +fn parse_subquery_function_argument() { + // Snowflake allows passing an unparenthesized subquery as the single + // argument to a function. + snowflake().one_statement_parses_to( + "SELECT parse_json(SELECT '{}')", + "SELECT parse_json((SELECT '{}'))", + ); + + // Subqueries that begin with WITH work too. + snowflake().one_statement_parses_to( + "SELECT parse_json(WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q)", + "SELECT parse_json((WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q))", + ); + + // Commas are parsed as part of the subquery, not additional arguments to + // the function. + snowflake().one_statement_parses_to("SELECT func(SELECT 1, 2)", "SELECT func((SELECT 1, 2))"); +} From e857a452016d82dfc00398a5483ce9551dff9565 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 23 Oct 2023 23:55:11 +0200 Subject: [PATCH 6/6] Support `SELECT * EXCEPT/REPLACE` syntax from ClickHouse (#1013) --- src/ast/query.rs | 2 ++ src/parser/mod.rs | 37 +++++++++++++++++++++++------------ tests/sqlparser_clickhouse.rs | 18 +++++++++++++++++ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 824fab1ba..4289b0bde 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -434,11 +434,13 @@ pub struct WildcardAdditionalOptions { /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. + /// Clickhouse syntax: pub opt_except: Option, /// `[RENAME ...]`. pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: + /// Clickhouse syntax: pub opt_replace: Option, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1c1d8b23e..9e0d595cb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6993,7 +6993,8 @@ impl<'a> Parser<'a> { } else { None }; - let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_except()? } else { None @@ -7004,7 +7005,8 @@ impl<'a> Parser<'a> { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_replace()? } else { None @@ -7047,18 +7049,27 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - let idents = self.parse_parenthesized_column_list(Mandatory, false)?; - match &idents[..] { - [] => { - return self.expected( - "at least one column should be parsed by the expect clause", - self.peek_token(), - )?; + if self.peek_token().token == Token::LParen { + let idents = self.parse_parenthesized_column_list(Mandatory, false)?; + match &idents[..] { + [] => { + return self.expected( + "at least one column should be parsed by the expect clause", + self.peek_token(), + )?; + } + [first, idents @ ..] => Some(ExceptSelectItem { + first_element: first.clone(), + additional_elements: idents.to_vec(), + }), } - [first, idents @ ..] => Some(ExceptSelectItem { - first_element: first.clone(), - additional_elements: idents.to_vec(), - }), + } else { + // Clickhouse allows EXCEPT column_name + let ident = self.parse_identifier()?; + Some(ExceptSelectItem { + first_element: ident, + additional_elements: vec![], + }) } } else { None diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9efe4a368..8cca0da0b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -355,6 +355,24 @@ fn parse_limit_by() { ); } +#[test] +fn parse_select_star_except() { + clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); +} + +#[test] +fn parse_select_star_except_no_parens() { + clickhouse().one_statement_parses_to( + "SELECT * EXCEPT prev_status FROM anomalies", + "SELECT * EXCEPT (prev_status) FROM anomalies", + ); +} + +#[test] +fn parse_select_star_replace() { + clickhouse().verified_stmt("SELECT * REPLACE (i + 1 AS i) FROM columns_transformers"); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})],