From 5263da68cdaa052dfd4f8989760569eae253253e Mon Sep 17 00:00:00 2001 From: Gabriel Villalonga Simon Date: Thu, 5 Oct 2023 20:32:43 +0100 Subject: [PATCH 01/17] Handle CREATE [TEMPORARY|TEMP] VIEW [IF NOT EXISTS] (#993) --- src/ast/mod.rs | 12 +++++++-- src/parser/mod.rs | 12 +++++++-- tests/sqlparser_common.rs | 55 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_sqlite.rs | 31 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d4e2f26ea..d048ccc1b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1318,6 +1318,10 @@ pub enum Statement { cluster_by: Vec, /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause with_no_schema_binding: bool, + /// if true, has SQLite `IF NOT EXISTS` clause + if_not_exists: bool, + /// if true, has SQLite `TEMP` or `TEMPORARY` clause + temporary: bool, }, /// CREATE TABLE CreateTable { @@ -2274,13 +2278,17 @@ impl fmt::Display for Statement { with_options, cluster_by, with_no_schema_binding, + if_not_exists, + temporary, } => { write!( f, - "CREATE {or_replace}{materialized}VIEW {name}", + "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, materialized = if *materialized { "MATERIALIZED " } else { "" }, - name = name + name = name, + temporary = if *temporary { "TEMPORARY " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } )?; if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0b3cef78..922a791f3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2478,7 +2478,7 @@ impl<'a> Parser<'a> { self.parse_create_table(or_replace, temporary, global, transient) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); - self.parse_create_view(or_replace) + self.parse_create_view(or_replace, temporary) } else if self.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { @@ -2955,9 +2955,15 @@ impl<'a> Parser<'a> { } } - pub fn parse_create_view(&mut self, or_replace: bool) -> Result { + pub fn parse_create_view( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let materialized = self.parse_keyword(Keyword::MATERIALIZED); self.expect_keyword(Keyword::VIEW)?; + let if_not_exists = dialect_of!(self is SQLiteDialect|GenericDialect) + && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; @@ -2992,6 +2998,8 @@ impl<'a> Parser<'a> { with_options, cluster_by, with_no_schema_binding, + if_not_exists, + temporary, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 027dc312f..c0ec456a9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5321,6 +5321,8 @@ fn parse_create_view() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5330,6 +5332,8 @@ fn parse_create_view() { assert_eq!(with_options, vec![]); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5371,6 +5375,8 @@ fn parse_create_view_with_columns() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); @@ -5380,6 +5386,39 @@ fn parse_create_view_with_columns() { assert!(!or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_view_temporary() { + let sql = "CREATE TEMPORARY VIEW myschema.myview AS SELECT foo FROM bar"; + match verified_stmt(sql) { + Statement::CreateView { + name, + columns, + query, + or_replace, + materialized, + with_options, + cluster_by, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + assert!(!or_replace); + assert_eq!(with_options, vec![]); + assert_eq!(cluster_by, vec![]); + assert!(!late_binding); + assert!(!if_not_exists); + assert!(temporary); } _ => unreachable!(), } @@ -5398,6 +5437,8 @@ fn parse_create_or_replace_view() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5407,6 +5448,8 @@ fn parse_create_or_replace_view() { assert!(or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5429,6 +5472,8 @@ fn parse_create_or_replace_materialized_view() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5438,6 +5483,8 @@ fn parse_create_or_replace_materialized_view() { assert!(or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5456,6 +5503,8 @@ fn parse_create_materialized_view() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5465,6 +5514,8 @@ fn parse_create_materialized_view() { assert!(!or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5483,6 +5534,8 @@ fn parse_create_materialized_view_with_cluster_by() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5492,6 +5545,8 @@ fn parse_create_materialized_view_with_cluster_by() { assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index c4e69d530..39a82cc8b 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -61,6 +61,37 @@ fn parse_create_virtual_table() { sqlite_and_generic().verified_stmt(sql); } +#[test] +fn parse_create_view_temporary_if_not_exists() { + let sql = "CREATE TEMPORARY VIEW IF NOT EXISTS myschema.myview AS SELECT foo FROM bar"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::CreateView { + name, + columns, + query, + or_replace, + materialized, + with_options, + cluster_by, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + assert!(!or_replace); + assert_eq!(with_options, vec![]); + assert_eq!(cluster_by, vec![]); + assert!(!late_binding); + assert!(if_not_exists); + assert!(temporary); + } + _ => unreachable!(), + } +} + #[test] fn double_equality_operator() { // Sqlite supports this operator: https://www.sqlite.org/lang_expr.html#binaryops From 83cb734b3c206502dd73998def455da554c37eef Mon Sep 17 00:00:00 2001 From: Zdenko Nevrala Date: Fri, 6 Oct 2023 20:48:18 +0200 Subject: [PATCH 02/17] Support Snowflake/BigQuery TRIM. (#975) --- src/ast/mod.rs | 6 ++++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_bigquery.rs | 26 ++++++++++++++++++++++++++ tests/sqlparser_common.rs | 24 ++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 25 +++++++++++++++++++++++++ 5 files changed, 95 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d048ccc1b..87f7ebb37 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -496,12 +496,14 @@ pub enum Expr { /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) /// TRIM() + /// TRIM(, [, characters]) -- only Snowflake or Bigquery /// ``` Trim { expr: Box, // ([BOTH | LEADING | TRAILING] trim_where: Option, trim_what: Option>, + trim_characters: Option>, }, /// ```sql /// OVERLAY( PLACING FROM [ FOR ] @@ -895,6 +897,7 @@ impl fmt::Display for Expr { expr, trim_where, trim_what, + trim_characters, } => { write!(f, "TRIM(")?; if let Some(ident) = trim_where { @@ -905,6 +908,9 @@ impl fmt::Display for Expr { } else { write!(f, "{expr}")?; } + if let Some(characters) = trim_characters { + write!(f, ", {}", display_comma_separated(characters))?; + } write!(f, ")") } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 922a791f3..95f1f8edc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1315,6 +1315,7 @@ impl<'a> Parser<'a> { /// ```sql /// TRIM ([WHERE] ['text' FROM] 'text') /// TRIM ('text') + /// TRIM(, [, characters]) -- only Snowflake or BigQuery /// ``` pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; @@ -1336,6 +1337,18 @@ impl<'a> Parser<'a> { expr: Box::new(expr), trim_where, trim_what: Some(trim_what), + trim_characters: None, + }) + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) + { + let characters = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where: None, + trim_what: None, + trim_characters: Some(characters), }) } else { self.expect_token(&Token::RParen)?; @@ -1343,6 +1356,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), trim_where, trim_what: None, + trim_characters: None, }) } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index e05581d5f..7a9a8d1c4 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -17,6 +17,7 @@ use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; +use sqlparser::parser::ParserError; use test_utils::*; #[test] @@ -549,3 +550,28 @@ fn parse_map_access_offset() { bigquery().verified_only_select(sql); } } + +#[test] +fn test_bigquery_trim() { + let real_sql = r#"SELECT customer_id, TRIM(item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(bigquery().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = bigquery().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value(Value::SingleQuotedString("xyz".to_owned()))), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value(Value::SingleQuotedString("a".to_owned()))]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + bigquery().parse_sql_statements(error_sql).unwrap_err() + ); +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c0ec456a9..1511aa76e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5225,6 +5225,30 @@ fn parse_trim() { ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); + + //keep Snowflake/BigQuery TRIM syntax failing + let all_expected_snowflake = TestedDialects { + dialects: vec![ + //Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(AnsiDialect {}), + //Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(MySqlDialect {}), + //Box::new(BigQueryDialect {}), + Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), + ], + options: None, + }; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + all_expected_snowflake + .parse_sql_statements("SELECT TRIM('xyz', 'a')") + .unwrap_err() + ); } #[test] diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e1db7ec61..e92656d0b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1039,3 +1039,28 @@ fn test_snowflake_stage_object_names() { } } } + +#[test] +fn test_snowflake_trim() { + let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(snowflake().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = snowflake().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value(Value::SingleQuotedString("xyz".to_owned()))), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value(Value::SingleQuotedString("a".to_owned()))]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + snowflake().parse_sql_statements(error_sql).unwrap_err() + ); +} From c68e9775a22acf00e54b33542b10ac6d1a8cf887 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 20:33:12 +0200 Subject: [PATCH 03/17] Support bigquery `CAST AS x [STRING|DATE] FORMAT` syntax (#978) --- src/ast/mod.rs | 64 ++++++++++++++++++++++++++++++++++-- src/parser/mod.rs | 23 +++++++++++++ tests/sqlparser_bigquery.rs | 35 ++++++++++++++++++-- tests/sqlparser_common.rs | 10 ++++++ tests/sqlparser_postgres.rs | 3 +- tests/sqlparser_snowflake.rs | 1 + 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 87f7ebb37..fc15efbc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator { } } +/// Options for `CAST` / `TRY_CAST` +/// BigQuery: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CastFormat { + Value(Value), + ValueAtTimeZone(Value, Value), +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -437,12 +447,18 @@ pub enum Expr { Cast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` // this differs from CAST in the choice of how to implement invalid conversions TryCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting @@ -450,6 +466,9 @@ pub enum Expr { SafeCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { @@ -597,6 +616,15 @@ pub enum Expr { }, } +impl fmt::Display for CastFormat { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CastFormat::Value(v) => write!(f, "{v}"), + CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"), + } + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -753,9 +781,39 @@ impl fmt::Display for Expr { write!(f, "{op}{expr}") } } - Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"), - Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"), - Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"), + Expr::Cast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "CAST({expr} AS {data_type})") + } + } + Expr::TryCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "TRY_CAST({expr} AS {data_type})") + } + } + Expr::SafeCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "SAFE_CAST({expr} AS {data_type})") + } + } Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 95f1f8edc..829b299af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> { }) } + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + Ok(Some(CastFormat::ValueAtTimeZone( + value, + self.parse_value()?, + ))) + } else { + Ok(Some(CastFormat::Value(value))) + } + } else { + Ok(None) + } + } + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { expr: Box::new(expr), data_type, + format, }) } @@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::TryCast { expr: Box::new(expr), data_type, + format, }) } @@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::SafeCast { expr: Box::new(expr), data_type, + format, }) } @@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> { Ok(Expr::Cast { expr: Box::new(expr), data_type: self.parse_data_type()?, + format: None, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7a9a8d1c4..b3f683b9a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -304,8 +304,39 @@ fn parse_trailing_comma() { #[test] fn parse_cast_type() { - let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#; - bigquery().verified_only_select(sql); + let sql = r"SELECT SAFE_CAST(1 AS INT64)"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_date_format() { + let sql = + r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_time_format() { + let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_timestamp_format_tz() { + let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_string_to_bytes_format() { + let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_bytes_to_string_format() { + let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string"; + bigquery_and_generic().verified_only_select(sql); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1511aa76e..ff8bdd7a4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1934,6 +1934,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1944,6 +1945,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1970,6 +1972,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Nvarchar(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1980,6 +1983,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1990,6 +1994,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2000,6 +2005,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2010,6 +2016,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2020,6 +2027,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2030,6 +2038,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2043,6 +2052,7 @@ fn parse_try_cast() { &Expr::TryCast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe336bda7..654723668 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1782,7 +1782,8 @@ fn parse_array_index_expr() { })), data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new( DataType::Int(None) - )))))) + )))))), + format: None, }))), indexes: vec![num[1].clone(), num[2].clone()], }, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e92656d0b..bb988665d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -167,6 +167,7 @@ fn parse_array() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(None), + format: None, }, expr_from_projection(only(&select.projection)) ); From 88510f662563786a6e3af6b1ed109444bcd332e7 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 21:49:18 +0200 Subject: [PATCH 04/17] fix column `COLLATE` not displayed (#1012) --- src/ast/ddl.rs | 3 +++ tests/sqlparser_common.rs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index a4640d557..f1575d979 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -517,6 +517,9 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {}", self.name, self.data_type)?; + if let Some(collation) = &self.collation { + write!(f, " COLLATE {collation}")?; + } for option in &self.options { write!(f, " {option}")?; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ff8bdd7a4..3b8775e45 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7635,3 +7635,8 @@ fn parse_create_type() { create_type ); } + +#[test] +fn parse_create_table_collate() { + pg_and_generic().verified_stmt("CREATE TABLE tbl (foo INT, bar TEXT COLLATE \"de_DE\")"); +} From c03586b727a659bb6d22d77910f4d4e9b9d9688c Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Fri, 20 Oct 2023 22:13:22 +0200 Subject: [PATCH 05/17] Support mysql `RLIKE` and `REGEXP` binary operators (#1017) --- src/ast/mod.rs | 21 +++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 19 +++++++++++++++++-- src/test_utils.rs | 2 +- tests/sqlparser_mysql.rs | 12 ++++++++++++ 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fc15efbc4..3b0030017 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -429,6 +429,14 @@ pub enum Expr { pattern: Box, escape_char: Option, }, + /// MySQL: RLIKE regex or REGEXP regex + RLike { + negated: bool, + expr: Box, + pattern: Box, + // true for REGEXP, false for RLIKE (no difference in semantics) + regexp: bool, + }, /// Any operation e.g. `foo > ANY(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] AnyOp { left: Box, @@ -740,6 +748,19 @@ impl fmt::Display for Expr { pattern ), }, + Expr::RLike { + negated, + expr, + pattern, + regexp, + } => write!( + f, + "{} {}{} {}", + expr, + if *negated { "NOT " } else { "" }, + if *regexp { "REGEXP" } else { "RLIKE" }, + pattern + ), Expr::SimilarTo { negated, expr, diff --git a/src/keywords.rs b/src/keywords.rs index e1bbf44ae..6327ccc84 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -498,6 +498,7 @@ define_keywords!( REFERENCES, REFERENCING, REGCLASS, + REGEXP, REGR_AVGX, REGR_AVGY, REGR_COUNT, @@ -524,6 +525,7 @@ define_keywords!( RETURNS, REVOKE, RIGHT, + RLIKE, ROLE, ROLLBACK, ROLLUP, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 829b299af..0065f7987 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1932,10 +1932,21 @@ impl<'a> Parser<'a> { | Keyword::BETWEEN | Keyword::LIKE | Keyword::ILIKE - | Keyword::SIMILAR => { + | Keyword::SIMILAR + | Keyword::REGEXP + | Keyword::RLIKE => { self.prev_token(); let negated = self.parse_keyword(Keyword::NOT); - if self.parse_keyword(Keyword::IN) { + let regexp = self.parse_keyword(Keyword::REGEXP); + let rlike = self.parse_keyword(Keyword::RLIKE); + if regexp || rlike { + Ok(Expr::RLike { + negated, + expr: Box::new(expr), + pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + regexp, + }) + } else if self.parse_keyword(Keyword::IN) { self.parse_in(expr, negated) } else if self.parse_keyword(Keyword::BETWEEN) { self.parse_between(expr, negated) @@ -2178,6 +2189,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), _ => Ok(0), }, @@ -2186,6 +2199,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), diff --git a/src/test_utils.rs b/src/test_utils.rs index f0c5e425a..76a3e073b 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -111,7 +111,7 @@ impl TestedDialects { /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { - let mut statements = self.parse_sql_statements(sql).unwrap(); + let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); if !canonical.is_empty() && sql != canonical { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80b9dcfd8..6e59198d7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1454,6 +1454,18 @@ fn parse_show_variables() { mysql_and_generic().verified_stmt("SHOW VARIABLES WHERE value = '3306'"); } +#[test] +fn parse_rlike_and_regexp() { + for s in &[ + "SELECT 1 WHERE 'a' RLIKE '^a$'", + "SELECT 1 WHERE 'a' REGEXP '^a$'", + "SELECT 1 WHERE 'a' NOT RLIKE '^a$'", + "SELECT 1 WHERE 'a' NOT REGEXP '^a$'", + ] { + mysql_and_generic().verified_only_select(s); + } +} + #[test] fn parse_kill() { let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5"); From 5c10668dbb60bccaf11f224013d333a48e32ec38 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Tue, 24 Oct 2023 01:37:31 +0400 Subject: [PATCH 06/17] Add support for UNION DISTINCT BY NAME syntax (#997) Co-authored-by: Andrew Lamb --- src/ast/query.rs | 5 +- src/parser/mod.rs | 4 +- tests/sqlparser_duckdb.rs | 232 ++++++++++++++------------------------ 3 files changed, 89 insertions(+), 152 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 88b0931de..824fab1ba 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -120,7 +120,8 @@ impl fmt::Display for SetExpr { SetQuantifier::All | SetQuantifier::Distinct | SetQuantifier::ByName - | SetQuantifier::AllByName => write!(f, " {set_quantifier}")?, + | SetQuantifier::AllByName + | SetQuantifier::DistinctByName => write!(f, " {set_quantifier}")?, SetQuantifier::None => write!(f, "{set_quantifier}")?, } write!(f, " {right}")?; @@ -160,6 +161,7 @@ pub enum SetQuantifier { Distinct, ByName, AllByName, + DistinctByName, None, } @@ -170,6 +172,7 @@ impl fmt::Display for SetQuantifier { SetQuantifier::Distinct => write!(f, "DISTINCT"), SetQuantifier::ByName => write!(f, "BY NAME"), SetQuantifier::AllByName => write!(f, "ALL BY NAME"), + SetQuantifier::DistinctByName => write!(f, "DISTINCT BY NAME"), SetQuantifier::None => write!(f, ""), } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0065f7987..68a8cef1f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5696,7 +5696,9 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some(SetOperator::Union) => { - if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { SetQuantifier::ByName } else if self.parse_keyword(Keyword::ALL) { if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index b05cc0dd4..db11d1e77 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -132,155 +132,87 @@ fn test_create_table_macro() { #[test] fn test_select_union_by_name() { - let ast = duckdb().verified_query("SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::ByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - - assert_eq!(ast.body, expected); + let q1 = "SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"; + let q2 = "SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"; + let q3 = "SELECT * FROM capitals UNION DISTINCT BY NAME SELECT * FROM weather"; - let ast = - duckdb().verified_query("SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::AllByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - assert_eq!(ast.body, expected); + for (ast, expected_quantifier) in &[ + (duckdb().verified_query(q1), SetQuantifier::ByName), + (duckdb().verified_query(q2), SetQuantifier::AllByName), + (duckdb().verified_query(q3), SetQuantifier::DistinctByName), + ] { + let expected = Box::::new(SetExpr::SetOperation { + op: SetOperator::Union, + set_quantifier: *expected_quantifier, + left: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + right: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + }); + assert_eq!(ast.body, expected); + } } From 56f24ce2361bb2f9ee9d7566c3b1ce256ee02d8b Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 23 Oct 2023 14:50:45 -0700 Subject: [PATCH 07/17] Support subquery as function arg w/o parens in Snowflake dialect (#996) --- src/parser/mod.rs | 20 +++++++++++++++++++- tests/sqlparser_snowflake.rs | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 68a8cef1f..1c1d8b23e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1507,7 +1507,7 @@ impl<'a> Parser<'a> { within_group: false, })); } - // Snowflake defines ORDERY BY in within group instead of inside the function like + // Snowflake defines ORDER BY in within group instead of inside the function like // ANSI SQL. self.expect_token(&Token::RParen)?; let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { @@ -6914,6 +6914,24 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::RParen) { Ok((vec![], vec![])) } else { + // Snowflake permits a subquery to be passed as an argument without + // an enclosing set of parens if it's the only argument. + if dialect_of!(self is SnowflakeDialect) + && self + .parse_one_of_keywords(&[Keyword::WITH, Keyword::SELECT]) + .is_some() + { + self.prev_token(); + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + return Ok(( + vec![FunctionArg::Unnamed(FunctionArgExpr::from( + WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))), + ))], + vec![], + )); + } + let args = self.parse_comma_separated(Parser::parse_function_args)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { self.parse_comma_separated(Parser::parse_order_by_expr)? diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index bb988665d..79c9eb1ea 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1065,3 +1065,23 @@ fn test_snowflake_trim() { snowflake().parse_sql_statements(error_sql).unwrap_err() ); } + +#[test] +fn parse_subquery_function_argument() { + // Snowflake allows passing an unparenthesized subquery as the single + // argument to a function. + snowflake().one_statement_parses_to( + "SELECT parse_json(SELECT '{}')", + "SELECT parse_json((SELECT '{}'))", + ); + + // Subqueries that begin with WITH work too. + snowflake().one_statement_parses_to( + "SELECT parse_json(WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q)", + "SELECT parse_json((WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q))", + ); + + // Commas are parsed as part of the subquery, not additional arguments to + // the function. + snowflake().one_statement_parses_to("SELECT func(SELECT 1, 2)", "SELECT func((SELECT 1, 2))"); +} From e857a452016d82dfc00398a5483ce9551dff9565 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 23 Oct 2023 23:55:11 +0200 Subject: [PATCH 08/17] Support `SELECT * EXCEPT/REPLACE` syntax from ClickHouse (#1013) --- src/ast/query.rs | 2 ++ src/parser/mod.rs | 37 +++++++++++++++++++++++------------ tests/sqlparser_clickhouse.rs | 18 +++++++++++++++++ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 824fab1ba..4289b0bde 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -434,11 +434,13 @@ pub struct WildcardAdditionalOptions { /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. + /// Clickhouse syntax: pub opt_except: Option, /// `[RENAME ...]`. pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: + /// Clickhouse syntax: pub opt_replace: Option, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1c1d8b23e..9e0d595cb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6993,7 +6993,8 @@ impl<'a> Parser<'a> { } else { None }; - let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_except()? } else { None @@ -7004,7 +7005,8 @@ impl<'a> Parser<'a> { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_replace()? } else { None @@ -7047,18 +7049,27 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - let idents = self.parse_parenthesized_column_list(Mandatory, false)?; - match &idents[..] { - [] => { - return self.expected( - "at least one column should be parsed by the expect clause", - self.peek_token(), - )?; + if self.peek_token().token == Token::LParen { + let idents = self.parse_parenthesized_column_list(Mandatory, false)?; + match &idents[..] { + [] => { + return self.expected( + "at least one column should be parsed by the expect clause", + self.peek_token(), + )?; + } + [first, idents @ ..] => Some(ExceptSelectItem { + first_element: first.clone(), + additional_elements: idents.to_vec(), + }), } - [first, idents @ ..] => Some(ExceptSelectItem { - first_element: first.clone(), - additional_elements: idents.to_vec(), - }), + } else { + // Clickhouse allows EXCEPT column_name + let ident = self.parse_identifier()?; + Some(ExceptSelectItem { + first_element: ident, + additional_elements: vec![], + }) } } else { None diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9efe4a368..8cca0da0b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -355,6 +355,24 @@ fn parse_limit_by() { ); } +#[test] +fn parse_select_star_except() { + clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); +} + +#[test] +fn parse_select_star_except_no_parens() { + clickhouse().one_statement_parses_to( + "SELECT * EXCEPT prev_status FROM anomalies", + "SELECT * EXCEPT (prev_status) FROM anomalies", + ); +} + +#[test] +fn parse_select_star_replace() { + clickhouse().verified_stmt("SELECT * REPLACE (i + 1 AS i) FROM columns_transformers"); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From ce62fe6d274d354fef34fad919b58f6ba16c61a3 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 00:06:39 +0200 Subject: [PATCH 09/17] Support `FILTER` in over clause (#1007) Co-authored-by: Andrew Lamb --- README.md | 2 +- src/ast/mod.rs | 9 +++++++++ src/ast/visitor.rs | 2 +- src/dialect/sqlite.rs | 4 ++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 4 ++++ tests/sqlparser_common.rs | 19 +++++++++++++++++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 ++++++ tests/sqlparser_postgres.rs | 6 ++++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 33 +++++++++++++++++++++++++++++++++ 15 files changed, 102 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 454ea6c29..e987c2a21 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ println!("AST: {:?}", ast); This outputs ```rust -AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })] +AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], filter: None, over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })] ``` diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3b0030017..11ce9b810 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1070,8 +1070,11 @@ impl Display for WindowType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WindowSpec { + /// `OVER (PARTITION BY ...)` pub partition_by: Vec, + /// `OVER (ORDER BY ...)` pub order_by: Vec, + /// `OVER (window frame)` pub window_frame: Option, } @@ -3729,6 +3732,8 @@ impl fmt::Display for CloseCursor { pub struct Function { pub name: ObjectName, pub args: Vec, + /// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)` + pub filter: Option>, pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, @@ -3777,6 +3782,10 @@ impl fmt::Display for Function { display_comma_separated(&self.order_by), )?; + if let Some(filter_cond) = &self.filter { + write!(f, " FILTER (WHERE {filter_cond})")?; + } + if let Some(o) = &self.over { write!(f, " OVER {o}")?; } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 09cb20a0c..4e025f962 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -506,7 +506,7 @@ where /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], -/// over: None, distinct: false, special: false, order_by: vec![], +/// filter: None, over: None, distinct: false, special: false, order_by: vec![], /// }); /// } /// ControlFlow::<()>::Continue(()) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index fa21224f6..37c7c7fa7 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -35,6 +35,10 @@ impl Dialect for SQLiteDialect { || ('\u{007f}'..='\u{ffff}').contains(&ch) } + fn supports_filter_during_aggregation(&self) -> bool { + true + } + fn is_identifier_part(&self, ch: char) -> bool { self.is_identifier_start(ch) || ch.is_ascii_digit() } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9e0d595cb..3bf5228c4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -772,6 +772,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -957,6 +958,17 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let distinct = self.parse_all_or_distinct()?.is_some(); let (args, order_by) = self.parse_optional_args_with_orderby()?; + let filter = if self.dialect.supports_filter_during_aggregation() + && self.parse_keyword(Keyword::FILTER) + && self.consume_token(&Token::LParen) + && self.parse_keyword(Keyword::WHERE) + { + let filter = Some(Box::new(self.parse_expr()?)); + self.expect_token(&Token::RParen)?; + filter + } else { + None + }; let over = if self.parse_keyword(Keyword::OVER) { if self.consume_token(&Token::LParen) { let window_spec = self.parse_window_spec()?; @@ -970,6 +982,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + filter, over, distinct, special: false, @@ -987,6 +1000,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + filter: None, over: None, distinct: false, special, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index b3f683b9a..fe95b1873 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -564,6 +564,7 @@ fn parse_map_access_offset() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( number("0") ))),], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 8cca0da0b..7d9cb0309 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,6 +50,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("endpoint".to_string()) ))), ], + filter: None, over: None, distinct: false, special: false, @@ -89,6 +90,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("app".to_string()) ))), ], + filter: None, over: None, distinct: false, special: false, @@ -138,6 +140,7 @@ fn parse_array_fn() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x1")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x2")))), ], + filter: None, over: None, distinct: false, special: false, @@ -196,6 +199,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3b8775e45..9eb52f6ec 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -875,6 +875,7 @@ fn parse_select_count_wildcard() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + filter: None, over: None, distinct: false, special: false, @@ -895,6 +896,7 @@ fn parse_select_count_distinct() { op: UnaryOperator::Plus, expr: Box::new(Expr::Identifier(Ident::new("x"))), }))], + filter: None, over: None, distinct: true, special: false, @@ -1862,6 +1864,7 @@ fn parse_select_having() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + filter: None, over: None, distinct: false, special: false, @@ -1887,6 +1890,7 @@ fn parse_select_qualify() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), args: vec![], + filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![Expr::Identifier(Ident::new("p"))], order_by: vec![OrderByExpr { @@ -3342,6 +3346,7 @@ fn parse_scalar_function_in_projection() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("id")) ))], + filter: None, over: None, distinct: false, special: false, @@ -3461,6 +3466,7 @@ fn parse_named_argument_function() { ))), }, ], + filter: None, over: None, distinct: false, special: false, @@ -3492,6 +3498,7 @@ fn parse_window_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), args: vec![], + filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], order_by: vec![OrderByExpr { @@ -3535,6 +3542,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window1".to_string(), quote_style: None, @@ -3560,6 +3568,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window2".to_string(), quote_style: None, @@ -4029,6 +4038,7 @@ fn parse_at_timezone() { quote_style: None, }]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero.clone()))], + filter: None, over: None, distinct: false, special: false, @@ -4056,6 +4066,7 @@ fn parse_at_timezone() { quote_style: None, },],), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero))], + filter: None, over: None, distinct: false, special: false, @@ -4067,6 +4078,7 @@ fn parse_at_timezone() { Value::SingleQuotedString("%Y-%m-%dT%H".to_string()), ),),), ], + filter: None, over: None, distinct: false, special: false, @@ -4225,6 +4237,7 @@ fn parse_table_function() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( Value::SingleQuotedString("1".to_owned()), )))], + filter: None, over: None, distinct: false, special: false, @@ -4376,6 +4389,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + filter: None, over: None, distinct: false, special: false, @@ -4405,6 +4419,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + filter: None, over: None, distinct: false, special: false, @@ -4416,6 +4431,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))), ], + filter: None, over: None, distinct: false, special: false, @@ -6888,6 +6904,7 @@ fn parse_time_functions() { let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), args: vec![], + filter: None, over: None, distinct: false, special: false, @@ -7374,6 +7391,7 @@ fn parse_pivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("amount"),]) ))]), + filter: None, over: None, distinct: false, special: false, @@ -7523,6 +7541,7 @@ fn parse_pivot_unpivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("population")) ))]), + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 6ca47e12c..6f3a8f994 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -346,6 +346,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f9eb4d8fb..ebadf95f2 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -334,6 +334,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 6e59198d7..3bcb84439 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1071,6 +1071,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("description")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1084,6 +1085,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_create")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1097,6 +1099,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_read")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1110,6 +1113,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_update")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1123,6 +1127,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_delete")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1512,6 +1517,7 @@ fn parse_table_colum_option_on_update() { option: ColumnOption::OnUpdate(Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_TIMESTAMP")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 654723668..0256579db 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2275,6 +2275,7 @@ fn test_composite_value() { named: true } )))], + filter: None, over: None, distinct: false, special: false, @@ -2436,6 +2437,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2447,6 +2449,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2458,6 +2461,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2469,6 +2473,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2919,6 +2924,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 5ae539b3c..6238d1eca 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -137,6 +137,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 79c9eb1ea..3319af7b9 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -248,6 +248,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 39a82cc8b..8d7ccf315 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -290,6 +290,39 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_window_function_with_filter() { + for func_name in [ + "row_number", + "rank", + "max", + "count", + "user_defined_function", + ] { + let sql = format!("SELECT {}(x) FILTER (WHERE y) OVER () FROM t", func_name); + let select = sqlite().verified_only_select(&sql); + assert_eq!(select.to_string(), sql); + assert_eq!( + select.projection, + vec![SelectItem::UnnamedExpr(Expr::Function(Function { + name: ObjectName(vec![Ident::new(func_name)]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("x")) + ))], + over: Some(WindowType::WindowSpec(WindowSpec { + partition_by: vec![], + order_by: vec![], + window_frame: None, + })), + filter: Some(Box::new(Expr::Identifier(Ident::new("y")))), + distinct: false, + special: false, + order_by: vec![] + }))] + ); + } +} + #[test] fn parse_attach_database() { let sql = "ATTACH DATABASE 'test.db' AS test"; From 2798b65b42c529bd089742a2028e94d59d82e493 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Oct 2023 18:07:00 -0400 Subject: [PATCH 10/17] snowflake/generic: `position` can be the name of a column (#1022) Co-authored-by: Lukasz Stefaniak --- src/parser/mod.rs | 4 +++- tests/sqlparser_snowflake.rs | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3bf5228c4..e79f31bac 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -794,7 +794,9 @@ impl<'a> Parser<'a> { Keyword::EXTRACT => self.parse_extract_expr(), Keyword::CEIL => self.parse_ceil_floor_expr(true), Keyword::FLOOR => self.parse_ceil_floor_expr(false), - Keyword::POSITION => self.parse_position_expr(), + Keyword::POSITION if self.peek_token().token == Token::LParen => { + self.parse_position_expr() + } Keyword::SUBSTRING => self.parse_substring_expr(), Keyword::OVERLAY => self.parse_overlay_expr(), Keyword::TRIM => self.parse_trim_expr(), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 3319af7b9..7e6f18138 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1067,6 +1067,12 @@ fn test_snowflake_trim() { ); } +#[test] +fn parse_position_not_function_columns() { + snowflake_and_generic() + .verified_stmt("SELECT position FROM tbl1 WHERE position NOT IN ('first', 'last')"); +} + #[test] fn parse_subquery_function_argument() { // Snowflake allows passing an unparenthesized subquery as the single From 8b2a248d7b90edce93e8c443d31a790d553fc0c2 Mon Sep 17 00:00:00 2001 From: Ilya Date: Tue, 24 Oct 2023 01:07:39 +0300 Subject: [PATCH 11/17] parse SQLite pragma statement (#969) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 18 ++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 28 ++++++++++++++++++++++++ tests/sqlparser_sqlite.rs | 45 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 11ce9b810..5aa42c96f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1914,6 +1914,12 @@ pub enum Statement { name: ObjectName, representation: UserDefinedTypeRepresentation, }, + // PRAGMA . = + Pragma { + name: ObjectName, + value: Option, + is_eq: bool, + }, } impl fmt::Display for Statement { @@ -3276,6 +3282,18 @@ impl fmt::Display for Statement { } => { write!(f, "CREATE TYPE {name} AS {representation}") } + Statement::Pragma { name, value, is_eq } => { + write!(f, "PRAGMA {name}")?; + if value.is_some() { + let val = value.as_ref().unwrap(); + if *is_eq { + write!(f, " = {val}")?; + } else { + write!(f, "({val})")?; + } + } + Ok(()) + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index 6327ccc84..405203601 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -472,6 +472,7 @@ define_keywords!( POSITION, POSITION_REGEX, POWER, + PRAGMA, PRECEDES, PRECEDING, PRECISION, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e79f31bac..f83f019ea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -491,6 +491,8 @@ impl<'a> Parser<'a> { Keyword::EXECUTE => Ok(self.parse_execute()?), Keyword::PREPARE => Ok(self.parse_prepare()?), Keyword::MERGE => Ok(self.parse_merge()?), + // `PRAGMA` is sqlite specific https://www.sqlite.org/pragma.html + Keyword::PRAGMA => Ok(self.parse_pragma()?), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -7502,6 +7504,32 @@ impl<'a> Parser<'a> { }) } + // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + pub fn parse_pragma(&mut self) -> Result { + let name = self.parse_object_name()?; + if self.consume_token(&Token::LParen) { + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(Statement::Pragma { + name, + value: Some(value), + is_eq: false, + }) + } else if self.consume_token(&Token::Eq) { + Ok(Statement::Pragma { + name, + value: Some(self.parse_number_value()?), + is_eq: true, + }) + } else { + Ok(Statement::Pragma { + name, + value: None, + is_eq: false, + }) + } + } + /// ```sql /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] /// ``` diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 8d7ccf315..2fdd4e3de 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -24,6 +24,51 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, SQLiteDialect}; use sqlparser::tokenizer::Token; +#[test] +fn pragma_no_value() { + let sql = "PRAGMA cache_size"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: None, + is_eq: false, + } => { + assert_eq!("cache_size", name.to_string()); + } + _ => unreachable!(), + } +} +#[test] +fn pragma_eq_style() { + let sql = "PRAGMA cache_size = 10"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: Some(val), + is_eq: true, + } => { + assert_eq!("cache_size", name.to_string()); + assert_eq!("10", val.to_string()); + } + _ => unreachable!(), + } +} +#[test] +fn pragma_funciton_style() { + let sql = "PRAGMA cache_size(10)"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: Some(val), + is_eq: false, + } => { + assert_eq!("cache_size", name.to_string()); + assert_eq!("10", val.to_string()); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_without_rowid() { let sql = "CREATE TABLE t (a INT) WITHOUT ROWID"; From 6739d377bd2c5acfbc4d4631651ee7a857caefec Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Oct 2023 18:09:02 -0400 Subject: [PATCH 12/17] Add docstrings for `Dialect`s, update README (#1016) --- README.md | 31 +++++++++++++++++++++---------- src/dialect/ansi.rs | 1 + src/dialect/bigquery.rs | 1 + src/dialect/clickhouse.rs | 1 + src/dialect/duckdb.rs | 1 + src/dialect/generic.rs | 2 ++ src/dialect/hive.rs | 1 + src/dialect/mod.rs | 3 +++ src/dialect/mssql.rs | 2 +- src/dialect/mysql.rs | 2 +- src/dialect/postgresql.rs | 1 + src/dialect/redshift.rs | 1 + src/dialect/snowflake.rs | 1 + src/dialect/sqlite.rs | 1 + 14 files changed, 37 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e987c2a21..58f5b8d48 100644 --- a/README.md +++ b/README.md @@ -124,28 +124,36 @@ parser](docs/custom_sql_parser.md). ## Contributing Contributions are highly encouraged! However, the bandwidth we have to -maintain this crate is fairly limited. +maintain this crate is limited. Please read the following sections carefully. -Pull requests that add support for or fix a bug in a feature in the -SQL standard, or a feature in a popular RDBMS, like Microsoft SQL +### New Syntax + +The most commonly accepted PRs add support for or fix a bug in a feature in the +SQL standard, or a a popular RDBMS, such as Microsoft SQL Server or PostgreSQL, will likely be accepted after a brief -review. +review. Any SQL feature that is dialect specific should be parsed by *both* the relevant [`Dialect`] +as well as [`GenericDialect`]. + +### Major API Changes The current maintainers do not plan for any substantial changes to -this crate's API at this time. And thus, PRs proposing major refactors +this crate's API. PRs proposing major refactors are not likely to be accepted. -Please be aware that, while we hope to review PRs in a reasonably -timely fashion, it may take a while. In order to speed the process, +### Testing + +While we hope to review PRs in a reasonably +timely fashion, it may take a week or more. In order to speed the process, please make sure the PR passes all CI checks, and includes tests demonstrating your code works as intended (and to avoid regressions). Remember to also test error paths. PRs without tests will not be reviewed or merged. Since the CI ensures that `cargo test`, `cargo fmt`, and `cargo clippy`, pass you -will likely want to run all three commands locally before submitting +should likely to run all three commands locally before submitting your PR. +### Filing Issues If you are unable to submit a patch, feel free to file an issue instead. Please try to include: @@ -156,8 +164,9 @@ try to include: * links to documentation for the feature for a few of the most popular databases that support it. -If you need support for a feature, you will likely need to implement -it yourself. Our goal as maintainers is to facilitate the integration +Unfortunately, if you need support for a feature, you will likely need to implement +it yourself, or file a well enough described ticket that another member of the community can do so. +Our goal as maintainers is to facilitate the integration of various features from various contributors, but not to provide the implementations ourselves, as we simply don't have the resources. @@ -183,3 +192,5 @@ licensed as above, without any additional terms or conditions. [Pratt Parser]: https://tdop.github.io/ [sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 +[`Dialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html +[`GenericDialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/struct.GenericDialect.html diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index 14c83ae16..d07bc07eb 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011). #[derive(Debug)] pub struct AnsiDialect {} diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 8266a32f0..46f27fea4 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/) #[derive(Debug, Default)] pub struct BigQueryDialect; diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 395116f9c..50fbde99e 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). #[derive(Debug)] pub struct ClickHouseDialect {} diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 4e6e9d9a4..a4f9309e6 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [DuckDB](https://duckdb.org/) #[derive(Debug, Default)] pub struct DuckDbDialect; diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8310954cd..4be4b9e23 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -12,6 +12,8 @@ use crate::dialect::Dialect; +/// A permissive, general purpose [`Dialect`], which parses a wide variety of SQL +/// statements, from many different dialects. #[derive(Debug, Default)] pub struct GenericDialect; diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 96cefb1d9..20800c1d3 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [Hive](https://hive.apache.org/). #[derive(Debug)] pub struct HiveDialect {} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e174528b0..625f9ce0a 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -64,6 +64,9 @@ macro_rules! dialect_of { /// custom extensions or various historical reasons. This trait /// encapsulates the parsing differences between dialects. /// +/// [`GenericDialect`] is the most permissive dialect, and parses the union of +/// all the other dialects, when there is no ambiguity. +/// /// # Examples /// Most users create a [`Dialect`] directly, as shown on the [module /// level documentation]: diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index f04398100..26ecd4782 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -12,7 +12,7 @@ use crate::dialect::Dialect; -// [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) dialect +/// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) #[derive(Debug)] pub struct MsSqlDialect {} diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 0f914ed02..8c3de74b7 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -19,7 +19,7 @@ use crate::{ keywords::Keyword, }; -/// [MySQL](https://www.mysql.com/) +/// A [`Dialect`] for [MySQL](https://www.mysql.com/) #[derive(Debug)] pub struct MySqlDialect {} diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index d131ff9c6..a0b192c85 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -16,6 +16,7 @@ use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; +/// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) #[derive(Debug)] pub struct PostgreSqlDialect {} diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index c85f3dc20..73457ab30 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -16,6 +16,7 @@ use core::str::Chars; use super::PostgreSqlDialect; +/// A [`Dialect`] for [RedShift](https://aws.amazon.com/redshift/) #[derive(Debug)] pub struct RedshiftSqlDialect {} diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 713394a1e..33425e846 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -28,6 +28,7 @@ use alloc::vec::Vec; #[cfg(not(feature = "std"))] use alloc::{format, vec}; +/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) #[derive(Debug, Default)] pub struct SnowflakeDialect; diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 37c7c7fa7..68515d24f 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -15,6 +15,7 @@ use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; +/// A [`Dialect`] for [SQLite](https://www.sqlite.org) #[derive(Debug)] pub struct SQLiteDialect {} From 86aa1b96be1c1fbf56cbe7cb04e12370df53605c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mehmet=20Emin=20KARAKA=C5=9E?= Date: Tue, 24 Oct 2023 12:45:25 +0300 Subject: [PATCH 13/17] Support `INSERT IGNORE` in `MySql` and `GenericDialect` (#1004) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 6 +++++- src/parser/mod.rs | 4 ++++ tests/sqlparser_mysql.rs | 41 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5aa42c96f..17f6d3a04 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1296,6 +1296,8 @@ pub enum Statement { Insert { /// Only for Sqlite or: Option, + /// Only for mysql + ignore: bool, /// INTO - optional keyword into: bool, /// TABLE @@ -2126,6 +2128,7 @@ impl fmt::Display for Statement { } Statement::Insert { or, + ignore, into, table_name, overwrite, @@ -2142,8 +2145,9 @@ impl fmt::Display for Statement { } else { write!( f, - "INSERT{over}{int}{tbl} {table_name} ", + "INSERT{ignore}{over}{int}{tbl} {table_name} ", table_name = table_name, + ignore = if *ignore { " IGNORE" } else { "" }, over = if *overwrite { " OVERWRITE" } else { "" }, int = if *into { " INTO" } else { "" }, tbl = if *table { " TABLE" } else { "" } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f83f019ea..d0b11ffea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6755,6 +6755,9 @@ impl<'a> Parser<'a> { None }; + let ignore = dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::IGNORE); + let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); let into = action == Some(Keyword::INTO); let overwrite = action == Some(Keyword::OVERWRITE); @@ -6852,6 +6855,7 @@ impl<'a> Parser<'a> { Ok(Statement::Insert { or, table_name, + ignore, into, overwrite, partitioned, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3bcb84439..8391bbadb 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -972,6 +972,47 @@ fn parse_simple_insert() { } } +#[test] +fn parse_ignore_insert() { + let sql = r"INSERT IGNORE INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; + + match mysql_and_generic().verified_stmt(sql) { + Statement::Insert { + table_name, + columns, + source, + on, + ignore, + .. + } => { + assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); + assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert!(on.is_none()); + assert!(ignore); + assert_eq!( + Box::new(Query { + with: None, + body: Box::new(SetExpr::Values(Values { + explicit_row: false, + rows: vec![vec![ + Expr::Value(Value::SingleQuotedString("Test Some Inserts".to_string())), + Expr::Value(number("1")) + ]] + })), + order_by: vec![], + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![] + }), + source + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_empty_row_insert() { let sql = "INSERT INTO tb () VALUES (), ()"; From 57090537f0b2984681ff9333c57f8a8ce7c995cb Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 12:30:05 +0200 Subject: [PATCH 14/17] Test that `regexp` can be used as an identifier in postgres (#1018) --- tests/sqlparser_postgres.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 0256579db..64fcbd38a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3390,6 +3390,13 @@ fn parse_truncate() { ); } +#[test] +fn parse_select_regexp_as_column_name() { + pg_and_generic().verified_only_select( + "SELECT REGEXP.REGEXP AS REGEXP FROM REGEXP AS REGEXP WHERE REGEXP.REGEXP", + ); +} + #[test] fn parse_create_table_with_alias() { let sql = "CREATE TABLE public.datatype_aliases From 9832adb37651da83483263cd652ff6ab01a7060f Mon Sep 17 00:00:00 2001 From: Chris A Date: Tue, 24 Oct 2023 05:33:51 -0500 Subject: [PATCH 15/17] Support "with" identifiers surrounded by backticks in `GenericDialect` (#1010) --- src/dialect/mod.rs | 2 +- tests/sqlparser_hive.rs | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 625f9ce0a..856cfe1c9 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -95,7 +95,7 @@ pub trait Dialect: Debug + Any { /// MySQL, MS SQL, and sqlite). You can accept one of characters listed /// in `Word::matching_end_quote` here fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' + ch == '"' || ch == '`' } /// Determine if quoted characters are proper for identifier fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 6f3a8f994..f63b9cef9 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::ast::{ SelectItem, Statement, TableFactor, UnaryOperator, Value, }; use sqlparser::dialect::{GenericDialect, HiveDialect}; -use sqlparser::parser::ParserError; +use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::test_utils::*; #[test] @@ -32,6 +32,20 @@ fn parse_table_create() { hive().verified_stmt(iof); } +fn generic(options: Option) -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options, + } +} + +#[test] +fn parse_describe() { + let describe = r#"DESCRIBE namespace.`table`"#; + hive().verified_stmt(describe); + generic(None).verified_stmt(describe); +} + #[test] fn parse_insert_overwrite() { let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; @@ -265,13 +279,8 @@ fn parse_create_function() { _ => unreachable!(), } - let generic = TestedDialects { - dialects: vec![Box::new(GenericDialect {})], - options: None, - }; - assert_eq!( - generic.parse_sql_statements(sql).unwrap_err(), + generic(None).parse_sql_statements(sql).unwrap_err(), ParserError::ParserError( "Expected an object type after CREATE, found: FUNCTION".to_string() ) From 004a8dc5ddbbbfc0935c09fb572cd6161af33525 Mon Sep 17 00:00:00 2001 From: Chris A Date: Tue, 24 Oct 2023 06:19:01 -0500 Subject: [PATCH 16/17] Support multiple `PARTITION` statements in `ALTER TABLE ADD` statement (#1011) Co-authored-by: Chris A Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 24 +++++++++++++++++++++--- src/ast/mod.rs | 5 +++-- src/dialect/generic.rs | 4 ++++ src/parser/mod.rs | 22 +++++++++++++++++----- tests/sqlparser_hive.rs | 6 ++++++ 5 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f1575d979..da2c8c9e4 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -69,7 +69,7 @@ pub enum AlterTableOperation { /// Add Partitions AddPartitions { if_not_exists: bool, - new_partitions: Vec, + new_partitions: Vec, }, DropPartitions { partitions: Vec, @@ -119,8 +119,8 @@ impl fmt::Display for AlterTableOperation { new_partitions, } => write!( f, - "ADD{ine} PARTITION ({})", - display_comma_separated(new_partitions), + "ADD{ine} {}", + display_separated(new_partitions, " "), ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {c}"), @@ -771,3 +771,21 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { Ok(()) } } + +/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Partition { + pub partitions: Vec, +} + +impl fmt::Display for Partition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "PARTITION ({})", + display_comma_separated(&self.partitions) + ) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 17f6d3a04..4c69d3ed0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -31,8 +31,9 @@ pub use self::data_type::{ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ProcedureParam, ReferentialAction, - TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, + ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeRepresentation, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 4be4b9e23..ea5cc6c34 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -18,6 +18,10 @@ use crate::dialect::Dialect; pub struct GenericDialect; impl Dialect for GenericDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' || ch == '`' + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@' } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0b11ffea..8930b0f49 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4195,6 +4195,13 @@ impl<'a> Parser<'a> { Ok(SqlOption { name, value }) } + pub fn parse_partition(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Partition { partitions }) + } + pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { if let Some(constraint) = self.parse_optional_table_constraint()? { @@ -4202,13 +4209,18 @@ impl<'a> Parser<'a> { } else { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + let mut new_partitions = vec![]; + loop { + if self.parse_keyword(Keyword::PARTITION) { + new_partitions.push(self.parse_partition()?); + } else { + break; + } + } + if !new_partitions.is_empty() { AlterTableOperation::AddPartitions { if_not_exists, - new_partitions: partitions, + new_partitions, } } else { let column_keyword = self.parse_keyword(Keyword::COLUMN); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index f63b9cef9..534a224ea 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -128,6 +128,12 @@ fn test_add_partition() { hive().verified_stmt(add); } +#[test] +fn test_add_multiple_partitions() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (`a` = 'asdf', `b` = 2) PARTITION (`a` = 'asdh', `b` = 3)"; + hive().verified_stmt(add); +} + #[test] fn test_drop_partition() { let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; From c5a7d6ccb97292ace1399f24f88dbb1027c0987f Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 13:20:12 +0200 Subject: [PATCH 17/17] Support for single-quoted identifiers (#1021) Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 38 ++++++++++++++++++++++++++------------ tests/sqlparser_sqlite.rs | 5 +++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8930b0f49..eb7c4a008 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -620,18 +620,29 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - Token::Word(w) if self.peek_token().token == Token::Period => { - let mut id_parts: Vec = vec![w.to_ident()]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mul => { - return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); - } - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); + t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { + if self.peek_token().token == Token::Period { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.to_ident(), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => unreachable!(), // We matched above + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident()), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) + } + Token::Mul => { + return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); + } + _ => { + return self + .expected("an identifier or a '*' after '.'", next_token); + } } } } @@ -830,6 +841,9 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { Token::Word(w) => id_parts.push(w.to_ident()), + Token::SingleQuotedString(s) => { + id_parts.push(Ident::with_quote('\'', s)) + } _ => { return self .expected("an identifier or a '*' after '.'", next_token); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 2fdd4e3de..b657acddf 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -335,6 +335,11 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_single_quoted_identified() { + sqlite().verified_only_select("SELECT 't'.*, t.'x' FROM 't'"); + // TODO: add support for select 't'.x +} #[test] fn parse_window_function_with_filter() { for func_name in [