From 00abaf218735b6003af6eb4f482d6a6e2659a12c Mon Sep 17 00:00:00 2001 From: Yuval Shkolar <85674443+yuval-illumex@users.noreply.github.com> Date: Mon, 9 Dec 2024 22:25:10 +0200 Subject: [PATCH 01/11] Support INSERT OVERWRITE INTO syntax (#1584) --- src/parser/mod.rs | 5 ++--- tests/sqlparser_snowflake.rs | 6 ++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ac76f6484..e47e71b45 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11291,9 +11291,8 @@ impl<'a> Parser<'a> { let replace_into = false; - let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); - let into = action == Some(Keyword::INTO); - let overwrite = action == Some(Keyword::OVERWRITE); + let overwrite = self.parse_keyword(Keyword::OVERWRITE); + let into = self.parse_keyword(Keyword::INTO); let local = self.parse_keyword(Keyword::LOCAL); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 3cbd87bf7..5ad861f47 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2952,3 +2952,9 @@ fn test_sf_double_dot_notation() { #[test] fn test_parse_double_dot_notation_wrong_position() {} + +#[test] +fn parse_insert_overwrite() { + let insert_overwrite_into = r#"INSERT OVERWRITE INTO schema.table SELECT a FROM b"#; + snowflake().verified_stmt(insert_overwrite_into); +} From 04271b0e4eec304dd689bd9875b13dae15db1a3f Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Wed, 11 Dec 2024 23:31:24 +0100 Subject: [PATCH 02/11] Parse `INSERT` with subquery when lacking column names (#1586) --- src/parser/mod.rs | 25 +++++++++++++++++++------ tests/sqlparser_common.rs | 2 ++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e47e71b45..04d6edcd5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11329,14 +11329,19 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { (vec![], None, vec![], None) } else { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let partitioned = self.parse_insert_partition()?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = if dialect_of!(self is HiveDialect) { - self.parse_parenthesized_column_list(Optional, false)? + let partitioned = self.parse_insert_partition()?; + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = if dialect_of!(self is HiveDialect) { + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + (columns, partitioned, after_columns) } else { - vec![] + Default::default() }; let source = Some(self.parse_query()?); @@ -11431,6 +11436,14 @@ impl<'a> Parser<'a> { } } + /// Returns true if the immediate tokens look like the + /// beginning of a subquery. `(SELECT ...` + fn peek_subquery_start(&mut self) -> bool { + let [maybe_lparen, maybe_select] = self.peek_tokens(); + Token::LParen == maybe_lparen + && matches!(maybe_select, Token::Word(w) if w.keyword == Keyword::SELECT) + } + fn parse_conflict_clause(&mut self) -> Option { if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { Some(SqliteOnConflict::Replace) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 42616d51e..f76516ef4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10964,6 +10964,8 @@ fn insert_into_with_parentheses() { Box::new(GenericDialect {}), ]); dialects.verified_stmt("INSERT INTO t1 (id, name) (SELECT t2.id, t2.name FROM t2)"); + dialects.verified_stmt("INSERT INTO t1 (SELECT t2.id, t2.name FROM t2)"); + dialects.verified_stmt(r#"INSERT INTO t1 ("select", name) (SELECT t2.name FROM t2)"#); } #[test] From a13f8c6b931ac17cd245a23abfc412c18bfb23e2 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Wed, 11 Dec 2024 23:31:55 +0100 Subject: [PATCH 03/11] Add support for ODBC functions (#1585) --- src/ast/mod.rs | 17 +++++++++ src/ast/spans.rs | 1 + src/ast/visitor.rs | 1 + src/keywords.rs | 1 + src/parser/mod.rs | 65 +++++++++++++++++++++++++++++++---- src/test_utils.rs | 1 + tests/sqlparser_clickhouse.rs | 4 +++ tests/sqlparser_common.rs | 43 +++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_postgres.rs | 7 ++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 2 ++ tests/sqlparser_sqlite.rs | 1 + 15 files changed, 142 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index bc4dda349..cfd0ac089 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -5523,6 +5523,15 @@ impl fmt::Display for CloseCursor { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { pub name: ObjectName, + /// Flags whether this function call uses the [ODBC syntax]. + /// + /// Example: + /// ```sql + /// SELECT {fn CONCAT('foo', 'bar')} + /// ``` + /// + /// [ODBC syntax]: https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/scalar-function-calls?view=sql-server-2017 + pub uses_odbc_syntax: bool, /// The parameters to the function, including any options specified within the /// delimiting parentheses. /// @@ -5561,6 +5570,10 @@ pub struct Function { impl fmt::Display for Function { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.uses_odbc_syntax { + write!(f, "{{fn ")?; + } + write!(f, "{}{}{}", self.name, self.parameters, self.args)?; if !self.within_group.is_empty() { @@ -5583,6 +5596,10 @@ impl fmt::Display for Function { write!(f, " OVER {o}")?; } + if self.uses_odbc_syntax { + write!(f, "}}")?; + } + Ok(()) } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index de577c9b8..7e45f838a 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1478,6 +1478,7 @@ impl Spanned for Function { fn span(&self) -> Span { let Function { name, + uses_odbc_syntax: _, parameters, args, filter, diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index eacd268a4..f7562b66c 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -530,6 +530,7 @@ where /// let old_expr = std::mem::replace(expr, Expr::Value(Value::Null)); /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), +/// uses_odbc_syntax: false, /// args: FunctionArguments::List(FunctionArgumentList { /// duplicate_treatment: None, /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], diff --git a/src/keywords.rs b/src/keywords.rs index 25a719d25..d0cfcd05b 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -333,6 +333,7 @@ define_keywords!( FLOAT8, FLOOR, FLUSH, + FN, FOLLOWING, FOR, FORCE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 04d6edcd5..39ab2db24 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1053,6 +1053,7 @@ impl<'a> Parser<'a> { { Ok(Some(Expr::Function(Function { name: ObjectName(vec![w.to_ident(w_span)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -1111,6 +1112,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Some(Expr::Function(Function { name: ObjectName(vec![w.to_ident(w_span)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(query), filter: None, @@ -1408,9 +1410,9 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } - Token::LBrace if self.dialect.supports_dictionary_syntax() => { + Token::LBrace => { self.prev_token(); - self.parse_duckdb_struct_literal() + self.parse_lbrace_expr() } _ => self.expected("an expression", next_token), }?; @@ -1509,7 +1511,29 @@ impl<'a> Parser<'a> { } } + /// Tries to parse the body of an [ODBC function] call. + /// i.e. without the enclosing braces + /// + /// ```sql + /// fn myfunc(1,2,3) + /// ``` + /// + /// [ODBC function]: https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/scalar-function-calls?view=sql-server-2017 + fn maybe_parse_odbc_fn_body(&mut self) -> Result, ParserError> { + self.maybe_parse(|p| { + p.expect_keyword(Keyword::FN)?; + let fn_name = p.parse_object_name(false)?; + let mut fn_call = p.parse_function_call(fn_name)?; + fn_call.uses_odbc_syntax = true; + Ok(Expr::Function(fn_call)) + }) + } + pub fn parse_function(&mut self, name: ObjectName) -> Result { + self.parse_function_call(name).map(Expr::Function) + } + + fn parse_function_call(&mut self, name: ObjectName) -> Result { self.expect_token(&Token::LParen)?; // Snowflake permits a subquery to be passed as an argument without @@ -1517,15 +1541,16 @@ impl<'a> Parser<'a> { if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; - return Ok(Expr::Function(Function { + return Ok(Function { name, + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(subquery), filter: None, null_treatment: None, over: None, within_group: vec![], - })); + }); } let mut args = self.parse_function_argument_list()?; @@ -1584,15 +1609,16 @@ impl<'a> Parser<'a> { None }; - Ok(Expr::Function(Function { + Ok(Function { name, + uses_odbc_syntax: false, parameters, args: FunctionArguments::List(args), null_treatment, filter, over, within_group, - })) + }) } /// Optionally parses a null treatment clause. @@ -1619,6 +1645,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::Function(Function { name, + uses_odbc_syntax: false, parameters: FunctionArguments::None, args, filter: None, @@ -2211,6 +2238,31 @@ impl<'a> Parser<'a> { } } + /// Parse expression types that start with a left brace '{'. + /// Examples: + /// ```sql + /// -- Dictionary expr. + /// {'key1': 'value1', 'key2': 'value2'} + /// + /// -- Function call using the ODBC syntax. + /// { fn CONCAT('foo', 'bar') } + /// ``` + fn parse_lbrace_expr(&mut self) -> Result { + let token = self.expect_token(&Token::LBrace)?; + + if let Some(fn_expr) = self.maybe_parse_odbc_fn_body()? { + self.expect_token(&Token::RBrace)?; + return Ok(fn_expr); + } + + if self.dialect.supports_dictionary_syntax() { + self.prev_token(); // Put back the '{' + return self.parse_duckdb_struct_literal(); + } + + self.expected("an expression", token) + } + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors @@ -7578,6 +7630,7 @@ impl<'a> Parser<'a> { } else { Ok(Statement::Call(Function { name: object_name, + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, over: None, diff --git a/src/test_utils.rs b/src/test_utils.rs index aaee20c5f..6e60a31c1 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -376,6 +376,7 @@ pub fn join(relation: TableFactor) -> Join { pub fn call(function: &str, args: impl IntoIterator) -> Expr { Expr::Function(Function { name: ObjectName(vec![Ident::new(function)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index ed0c74021..9d785576f 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -199,6 +199,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -821,6 +822,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Materialized(Expr::Function(Function { name: ObjectName(vec![Ident::new("now")]), + uses_odbc_syntax: false, args: FunctionArguments::List(FunctionArgumentList { args: vec![], duplicate_treatment: None, @@ -842,6 +844,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Ephemeral(Some(Expr::Function(Function { name: ObjectName(vec![Ident::new("now")]), + uses_odbc_syntax: false, args: FunctionArguments::List(FunctionArgumentList { args: vec![], duplicate_treatment: None, @@ -872,6 +875,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Alias(Expr::Function(Function { name: ObjectName(vec![Ident::new("toString")]), + uses_odbc_syntax: false, args: FunctionArguments::List(FunctionArgumentList { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Identifier(Ident::new("c")) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f76516ef4..7dfb98d6f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1108,6 +1108,7 @@ fn parse_select_count_wildcard() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -1130,6 +1131,7 @@ fn parse_select_count_distinct() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), @@ -2366,6 +2368,7 @@ fn parse_select_having() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -2396,6 +2399,7 @@ fn parse_select_qualify() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -2802,6 +2806,7 @@ fn parse_listagg() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("LISTAGG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), @@ -4603,6 +4608,7 @@ fn parse_named_argument_function() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4642,6 +4648,7 @@ fn parse_named_argument_function_with_eq_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4716,6 +4723,7 @@ fn parse_window_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4846,6 +4854,7 @@ fn test_parse_named_window() { quote_style: None, span: Span::empty(), }]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4880,6 +4889,7 @@ fn test_parse_named_window() { quote_style: None, span: Span::empty(), }]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -9008,6 +9018,7 @@ fn parse_time_functions() { let select = verified_only_select(&sql); let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10021,6 +10032,7 @@ fn parse_call() { assert_eq!( verified_stmt("CALL my_procedure('a')"), Statement::Call(Function { + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10511,6 +10523,7 @@ fn test_selective_aggregation() { vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10529,6 +10542,7 @@ fn test_selective_aggregation() { SelectItem::ExprWithAlias { expr: Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10968,6 +10982,35 @@ fn insert_into_with_parentheses() { dialects.verified_stmt(r#"INSERT INTO t1 ("select", name) (SELECT t2.name FROM t2)"#); } +#[test] +fn parse_odbc_scalar_function() { + let select = verified_only_select("SELECT {fn my_func(1, 2)}"); + let Expr::Function(Function { + name, + uses_odbc_syntax, + args, + .. + }) = expr_from_projection(only(&select.projection)) + else { + unreachable!("expected function") + }; + assert_eq!(name, &ObjectName(vec![Ident::new("my_func")])); + assert!(uses_odbc_syntax); + matches!(args, FunctionArguments::List(l) if l.args.len() == 2); + + verified_stmt("SELECT {fn fna()} AS foo, fnb(1)"); + + // Testing invalid SQL with any-one dialect is intentional. + // Depending on dialect flags the error message may be different. + let pg = TestedDialects::new(vec![Box::new(PostgreSqlDialect {})]); + assert_eq!( + pg.parse_sql_statements("SELECT {fn2 my_func()}") + .unwrap_err() + .to_string(), + "sql parser error: Expected: an expression, found: {" + ); +} + #[test] fn test_dictionary_syntax() { fn check(sql: &str, expect: Expr) { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 01ac0649a..a0fc49b9f 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -606,6 +606,7 @@ fn test_duckdb_named_argument_function_with_assignment_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 546b289ac..981218388 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -480,6 +480,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 31668c86a..66e40f46b 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -635,6 +635,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -1388,6 +1389,7 @@ fn parse_create_table_with_valid_options() { }, ], ), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List( FunctionArgumentList { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 92368e9ee..2e204d9bc 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2529,6 +2529,7 @@ fn parse_array_subquery_expr() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(Box::new(Query { with: None, @@ -2911,6 +2912,7 @@ fn test_composite_value() { Ident::new("information_schema"), Ident::new("_pg_expandarray") ]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -3088,6 +3090,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3100,6 +3103,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3112,6 +3116,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3124,6 +3129,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3599,6 +3605,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index f0c1f0c74..2fd855a09 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -154,6 +154,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5ad861f47..d6774c317 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1212,6 +1212,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -1423,6 +1424,7 @@ fn test_alter_table_clustering() { Expr::Identifier(Ident::with_quote('"', "c2")), Expr::Function(Function { name: ObjectName(vec![Ident::new("TO_DATE")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 4f23979c5..987b1263d 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -419,6 +419,7 @@ fn parse_window_function_with_filter() { select.projection, vec![SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new(func_name)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, From 5de5312406fae3f69b92b12dd63c68d7fce3ed74 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 12 Dec 2024 09:17:13 -0500 Subject: [PATCH 04/11] Update version to 0.53.0 and add release notes (#1592) --- Cargo.toml | 2 +- changelog/0.53.0.md | 95 +++++++++++++++++++++++++++++++++++++++++++ dev/release/README.md | 6 +++ 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 changelog/0.53.0.md diff --git a/Cargo.toml b/Cargo.toml index c4d0094f4..301a59c55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.52.0" +version = "0.53.0" authors = ["Apache DataFusion "] homepage = "https://github.com/apache/datafusion-sqlparser-rs" documentation = "https://docs.rs/sqlparser/" diff --git a/changelog/0.53.0.md b/changelog/0.53.0.md new file mode 100644 index 000000000..5b9de07d3 --- /dev/null +++ b/changelog/0.53.0.md @@ -0,0 +1,95 @@ + + +# sqlparser-rs 0.53.0 Changelog + +This release consists of 47 commits from 16 contributors. See credits at the end of this changelog for more information. + +**Other:** + +- hive: support for special not expression `!a` and raise error for `a!` factorial operator [#1472](https://github.com/apache/datafusion-sqlparser-rs/pull/1472) (wugeer) +- Add support for MSSQL's `OPENJSON WITH` clause [#1498](https://github.com/apache/datafusion-sqlparser-rs/pull/1498) (gaoqiangz) +- Parse true and false as identifiers in mssql [#1510](https://github.com/apache/datafusion-sqlparser-rs/pull/1510) (lovasoa) +- Fix the parsing error in MSSQL for multiple statements that include `DECLARE` statements [#1497](https://github.com/apache/datafusion-sqlparser-rs/pull/1497) (wugeer) +- Add support for Snowflake SHOW DATABASES/SCHEMAS/TABLES/VIEWS/COLUMNS statements [#1501](https://github.com/apache/datafusion-sqlparser-rs/pull/1501) (yoavcloud) +- Add support of COMMENT ON syntax for Snowflake [#1516](https://github.com/apache/datafusion-sqlparser-rs/pull/1516) (git-hulk) +- Add support for MYSQL's `CREATE TABLE SELECT` expr [#1515](https://github.com/apache/datafusion-sqlparser-rs/pull/1515) (wugeer) +- Add support for MSSQL's `XQuery` methods [#1500](https://github.com/apache/datafusion-sqlparser-rs/pull/1500) (gaoqiangz) +- Add support for Hive's `LOAD DATA` expr [#1520](https://github.com/apache/datafusion-sqlparser-rs/pull/1520) (wugeer) +- Fix ClickHouse document link from `Russian` to `English` [#1527](https://github.com/apache/datafusion-sqlparser-rs/pull/1527) (git-hulk) +- Support ANTI and SEMI joins without LEFT/RIGHT [#1528](https://github.com/apache/datafusion-sqlparser-rs/pull/1528) (delamarch3) +- support sqlite's OR clauses in update statements [#1530](https://github.com/apache/datafusion-sqlparser-rs/pull/1530) (lovasoa) +- support column type definitions in table aliases [#1526](https://github.com/apache/datafusion-sqlparser-rs/pull/1526) (lovasoa) +- Add support for MSSQL's `JSON_ARRAY`/`JSON_OBJECT` expr [#1507](https://github.com/apache/datafusion-sqlparser-rs/pull/1507) (gaoqiangz) +- Add support for PostgreSQL `UNLISTEN` syntax and Add support for Postgres `LOAD extension` expr [#1531](https://github.com/apache/datafusion-sqlparser-rs/pull/1531) (wugeer) +- Parse byte/bit string literals in MySQL and Postgres [#1532](https://github.com/apache/datafusion-sqlparser-rs/pull/1532) (mvzink) +- Allow example CLI to read from stdin [#1536](https://github.com/apache/datafusion-sqlparser-rs/pull/1536) (mvzink) +- recursive select calls are parsed with bad trailing_commas parameter [#1521](https://github.com/apache/datafusion-sqlparser-rs/pull/1521) (tomershaniii) +- PartiQL queries in Redshift [#1534](https://github.com/apache/datafusion-sqlparser-rs/pull/1534) (yoavcloud) +- Include license file in sqlparser_derive crate [#1543](https://github.com/apache/datafusion-sqlparser-rs/pull/1543) (ankane) +- Fallback to identifier parsing if expression parsing fails [#1513](https://github.com/apache/datafusion-sqlparser-rs/pull/1513) (yoavcloud) +- support `json_object('k':'v')` in postgres [#1546](https://github.com/apache/datafusion-sqlparser-rs/pull/1546) (lovasoa) +- Document micro benchmarks [#1555](https://github.com/apache/datafusion-sqlparser-rs/pull/1555) (alamb) +- Implement `Spanned` to retrieve source locations on AST nodes [#1435](https://github.com/apache/datafusion-sqlparser-rs/pull/1435) (Nyrox) +- Fix error in benchmark queries [#1560](https://github.com/apache/datafusion-sqlparser-rs/pull/1560) (alamb) +- Fix clippy warnings on rust 1.83 [#1570](https://github.com/apache/datafusion-sqlparser-rs/pull/1570) (iffyio) +- Support relation visitor to visit the `Option` field [#1556](https://github.com/apache/datafusion-sqlparser-rs/pull/1556) (goldmedal) +- Rename `TokenWithLocation` to `TokenWithSpan`, in backwards compatible way [#1562](https://github.com/apache/datafusion-sqlparser-rs/pull/1562) (alamb) +- Support MySQL size variants for BLOB and TEXT columns [#1564](https://github.com/apache/datafusion-sqlparser-rs/pull/1564) (mvzink) +- Increase version of sqlparser_derive from 0.2.2 to 0.3.0 [#1571](https://github.com/apache/datafusion-sqlparser-rs/pull/1571) (alamb) +- `json_object('k' VALUE 'v')` in postgres [#1547](https://github.com/apache/datafusion-sqlparser-rs/pull/1547) (lovasoa) +- Support snowflake double dot notation for object name [#1540](https://github.com/apache/datafusion-sqlparser-rs/pull/1540) (ayman-sigma) +- Update comments / docs for `Spanned` [#1549](https://github.com/apache/datafusion-sqlparser-rs/pull/1549) (alamb) +- Support Databricks struct literal [#1542](https://github.com/apache/datafusion-sqlparser-rs/pull/1542) (ayman-sigma) +- Encapsulate CreateFunction [#1573](https://github.com/apache/datafusion-sqlparser-rs/pull/1573) (philipcristiano) +- Support BIT column types [#1577](https://github.com/apache/datafusion-sqlparser-rs/pull/1577) (mvzink) +- Support parsing optional nulls handling for unique constraint [#1567](https://github.com/apache/datafusion-sqlparser-rs/pull/1567) (mvzink) +- Fix displaying WORK or TRANSACTION after BEGIN [#1565](https://github.com/apache/datafusion-sqlparser-rs/pull/1565) (mvzink) +- Add support of the ENUM8|ENUM16 for ClickHouse dialect [#1574](https://github.com/apache/datafusion-sqlparser-rs/pull/1574) (git-hulk) +- Parse Snowflake USE ROLE and USE SECONDARY ROLES [#1578](https://github.com/apache/datafusion-sqlparser-rs/pull/1578) (yoavcloud) +- Snowflake ALTER TABLE clustering options [#1579](https://github.com/apache/datafusion-sqlparser-rs/pull/1579) (yoavcloud) +- Support INSERT OVERWRITE INTO syntax [#1584](https://github.com/apache/datafusion-sqlparser-rs/pull/1584) (yuval-illumex) +- Parse `INSERT` with subquery when lacking column names [#1586](https://github.com/apache/datafusion-sqlparser-rs/pull/1586) (iffyio) +- Add support for ODBC functions [#1585](https://github.com/apache/datafusion-sqlparser-rs/pull/1585) (iffyio) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 8 Andrew Lamb + 6 Michael Victor Zink + 5 Ophir LOJKINE + 5 Yoav Cohen + 5 wugeer + 3 Ifeanyi Ubah + 3 gaoqiangz + 3 hulk + 2 Ayman Elkfrawy + 1 Andrew Kane + 1 Jax Liu + 1 Mark-Oliver Junge + 1 Philip Cristiano + 1 Yuval Shkolar + 1 delamarch3 + 1 tomershaniii +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/dev/release/README.md b/dev/release/README.md index c440f7387..c3018dd68 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -146,6 +146,12 @@ Move artifacts to the release location in SVN, using the `release-tarball.sh` sc ```shell ./dev/release/release-tarball.sh 0.52.0 1 ``` + +Promote the rc tag to the release tag +```shell +git tag v0.52.0 v0.52.0-rc3 +git push apache v0.52.0 +``` Congratulations! The release is now official! From 310882862147ad7ca43320da049a718c9a4538b0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 13 Dec 2024 07:22:30 -0500 Subject: [PATCH 05/11] Run cargo fmt on `derive` crate (#1595) --- .github/workflows/rust.yml | 2 +- derive/src/lib.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2502abe9d..6c8130dc4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust Toolchain uses: ./.github/actions/setup-builder - - run: cargo fmt -- --check + - run: cargo fmt --all -- --check lint: runs-on: ubuntu-latest diff --git a/derive/src/lib.rs b/derive/src/lib.rs index dd4d37b41..b81623312 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -18,7 +18,11 @@ use proc_macro2::TokenStream; use quote::{format_ident, quote, quote_spanned, ToTokens}; use syn::spanned::Spanned; -use syn::{parse::{Parse, ParseStream}, parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, Ident, Index, LitStr, Meta, Token, Type, TypePath}; +use syn::{ + parse::{Parse, ParseStream}, + parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, + Ident, Index, LitStr, Meta, Token, Type, TypePath, +}; use syn::{Path, PathArguments}; /// Implementation of `[#derive(Visit)]` @@ -267,7 +271,11 @@ fn visit_children( } fn is_option(ty: &Type) -> bool { - if let Type::Path(TypePath { path: Path { segments, .. }, .. }) = ty { + if let Type::Path(TypePath { + path: Path { segments, .. }, + .. + }) = ty + { if let Some(segment) = segments.last() { if segment.ident == "Option" { if let PathArguments::AngleBracketed(args) = &segment.arguments { From 885aa93465d0f984f4ff55cdff67f1be84472dc8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 13 Dec 2024 13:01:56 -0500 Subject: [PATCH 06/11] Add Apache license header to spans.rs (#1594) --- src/ast/spans.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 7e45f838a..88e0fbdf2 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + use core::iter; use crate::tokenizer::Span; From 7bc6ddb8fb0800111179a111fa281672285ce34f Mon Sep 17 00:00:00 2001 From: Martin Abelson Sahlen Date: Sun, 15 Dec 2024 11:39:42 +0200 Subject: [PATCH 07/11] Add support for BigQuery `ANY TYPE` data type (#1602) Co-authored-by: Martin Abelson Sahlen Co-authored-by: Martin Abelson Sahlen --- src/ast/data_type.rs | 6 +++++- src/parser/mod.rs | 4 ++++ tests/sqlparser_bigquery.rs | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 5b0239e17..b53b8f0d2 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -373,6 +373,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/current/plpgsql-trigger.html Trigger, + /// Any data type, used in BigQuery UDF definitions for templated parameters + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/user-defined-functions#templated-sql-udf-parameters + AnyType, } impl fmt::Display for DataType { @@ -383,7 +387,6 @@ impl fmt::Display for DataType { DataType::CharacterVarying(size) => { format_character_string_type(f, "CHARACTER VARYING", size) } - DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size), DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size), DataType::Nvarchar(size) => format_character_string_type(f, "NVARCHAR", size), @@ -626,6 +629,7 @@ impl fmt::Display for DataType { } DataType::Unspecified => Ok(()), DataType::Trigger => write!(f, "TRIGGER"), + DataType::AnyType => write!(f, "ANY TYPE"), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 39ab2db24..37323084d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8382,6 +8382,10 @@ impl<'a> Parser<'a> { Ok(DataType::Tuple(field_defs)) } Keyword::TRIGGER => Ok(DataType::Trigger), + Keyword::ANY if self.peek_keyword(Keyword::TYPE) => { + let _ = self.parse_keyword(Keyword::TYPE); + Ok(DataType::AnyType) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2be128a8c..34c14cc55 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2212,3 +2212,19 @@ fn test_any_value() { bigquery_and_generic().verified_expr("ANY_VALUE(fruit HAVING MAX sold)"); bigquery_and_generic().verified_expr("ANY_VALUE(fruit HAVING MIN sold)"); } + +#[test] +fn test_any_type() { + bigquery().verified_stmt(concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "my_function(param1 ANY TYPE) ", + "AS (", + "(SELECT 1)", + ")", + )); +} + +#[test] +fn test_any_type_dont_break_custom_type() { + bigquery_and_generic().verified_stmt("CREATE TABLE foo (x ANY)"); +} From 316bb14135ce21f023ac8bb8f94d6bea23d03c37 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Sun, 15 Dec 2024 10:40:25 +0100 Subject: [PATCH 08/11] Add support for TABLESAMPLE (#1580) --- src/ast/mod.rs | 7 +- src/ast/query.rs | 188 ++++++++++++++++++ src/ast/spans.rs | 1 + src/dialect/hive.rs | 5 + src/dialect/mod.rs | 11 ++ src/keywords.rs | 8 + src/parser/mod.rs | 123 ++++++++++++ src/test_utils.rs | 16 ++ tests/sqlparser_bigquery.rs | 25 +-- tests/sqlparser_clickhouse.rs | 23 +-- tests/sqlparser_common.rs | 357 ++++++++-------------------------- tests/sqlparser_databricks.rs | 11 +- tests/sqlparser_duckdb.rs | 38 +--- tests/sqlparser_hive.rs | 10 + tests/sqlparser_mssql.rs | 29 ++- tests/sqlparser_mysql.rs | 47 ++--- tests/sqlparser_postgres.rs | 4 +- tests/sqlparser_redshift.rs | 70 +++---- tests/sqlparser_snowflake.rs | 20 +- tests/sqlparser_sqlite.rs | 11 +- 20 files changed, 546 insertions(+), 458 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cfd0ac089..ccb2ed1bc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -69,8 +69,11 @@ pub use self::query::{ OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, - Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample, + TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, + TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, + TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index ad7fd261e..948febd26 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1002,6 +1002,9 @@ pub enum TableFactor { partitions: Vec, /// Optional PartiQL JsonPath: json_path: Option, + /// Optional table sample modifier + /// See: + sample: Option, }, Derived { lateral: bool, @@ -1146,6 +1149,184 @@ pub enum TableFactor { }, } +/// The table sample modifier options +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] + +pub enum TableSampleKind { + /// Table sample located before the table alias option + BeforeTableAlias(Box), + /// Table sample located after the table alias option + AfterTableAlias(Box), +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSample { + pub modifier: TableSampleModifier, + pub name: Option, + pub quantity: Option, + pub seed: Option, + pub bucket: Option, + pub offset: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleModifier { + Sample, + TableSample, +} + +impl fmt::Display for TableSampleModifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleModifier::Sample => write!(f, "SAMPLE")?, + TableSampleModifier::TableSample => write!(f, "TABLESAMPLE")?, + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleQuantity { + pub parenthesized: bool, + pub value: Expr, + pub unit: Option, +} + +impl fmt::Display for TableSampleQuantity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.parenthesized { + write!(f, "(")?; + } + write!(f, "{}", self.value)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + if self.parenthesized { + write!(f, ")")?; + } + Ok(()) + } +} + +/// The table sample method names +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleMethod { + Row, + Bernoulli, + System, + Block, +} + +impl fmt::Display for TableSampleMethod { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleMethod::Bernoulli => write!(f, "BERNOULLI"), + TableSampleMethod::Row => write!(f, "ROW"), + TableSampleMethod::System => write!(f, "SYSTEM"), + TableSampleMethod::Block => write!(f, "BLOCK"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleSeed { + pub modifier: TableSampleSeedModifier, + pub value: Value, +} + +impl fmt::Display for TableSampleSeed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ({})", self.modifier, self.value)?; + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleSeedModifier { + Repeatable, + Seed, +} + +impl fmt::Display for TableSampleSeedModifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleSeedModifier::Repeatable => write!(f, "REPEATABLE"), + TableSampleSeedModifier::Seed => write!(f, "SEED"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleUnit { + Rows, + Percent, +} + +impl fmt::Display for TableSampleUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleUnit::Percent => write!(f, "PERCENT"), + TableSampleUnit::Rows => write!(f, "ROWS"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleBucket { + pub bucket: Value, + pub total: Value, + pub on: Option, +} + +impl fmt::Display for TableSampleBucket { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BUCKET {} OUT OF {}", self.bucket, self.total)?; + if let Some(on) = &self.on { + write!(f, " ON {}", on)?; + } + Ok(()) + } +} +impl fmt::Display for TableSample { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " {}", self.modifier)?; + if let Some(name) = &self.name { + write!(f, " {}", name)?; + } + if let Some(quantity) = &self.quantity { + write!(f, " {}", quantity)?; + } + if let Some(seed) = &self.seed { + write!(f, " {}", seed)?; + } + if let Some(bucket) = &self.bucket { + write!(f, " ({})", bucket)?; + } + if let Some(offset) = &self.offset { + write!(f, " OFFSET {}", offset)?; + } + Ok(()) + } +} + /// The source of values in a `PIVOT` operation. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1404,6 +1585,7 @@ impl fmt::Display for TableFactor { partitions, with_ordinality, json_path, + sample, } => { write!(f, "{name}")?; if let Some(json_path) = json_path { @@ -1426,6 +1608,9 @@ impl fmt::Display for TableFactor { if *with_ordinality { write!(f, " WITH ORDINALITY")?; } + if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample { + write!(f, "{sample}")?; + } if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1435,6 +1620,9 @@ impl fmt::Display for TableFactor { if let Some(version) = version { write!(f, "{version}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, "{sample}")?; + } Ok(()) } TableFactor::Derived { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 88e0fbdf2..c2c7c14f0 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1699,6 +1699,7 @@ impl Spanned for TableFactor { with_ordinality: _, partitions: _, json_path: _, + sample: _, } => union_spans( name.0 .iter() diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 571f9b9ba..80f44cf7c 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -61,4 +61,9 @@ impl Dialect for HiveDialect { fn supports_load_data(&self) -> bool { true } + + /// See Hive + fn supports_table_sample_before_alias(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index f40cba719..8cce6a353 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -707,6 +707,17 @@ pub trait Dialect: Debug + Any { fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } + + /// Returns true if this dialect supports the `TABLESAMPLE` option + /// before the table alias option. For example: + /// + /// Table sample before alias: `SELECT * FROM tbl AS t TABLESAMPLE (10)` + /// Table sample after alias: `SELECT * FROM tbl TABLESAMPLE (10) AS t` + /// + /// + fn supports_table_sample_before_alias(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/keywords.rs b/src/keywords.rs index d0cfcd05b..7e3354078 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -120,6 +120,7 @@ define_keywords!( BEGIN, BEGIN_FRAME, BEGIN_PARTITION, + BERNOULLI, BETWEEN, BIGDECIMAL, BIGINT, @@ -128,12 +129,14 @@ define_keywords!( BINDING, BIT, BLOB, + BLOCK, BLOOMFILTER, BOOL, BOOLEAN, BOTH, BROWSE, BTREE, + BUCKET, BUCKETS, BY, BYPASSRLS, @@ -680,6 +683,7 @@ define_keywords!( RUN, SAFE, SAFE_CAST, + SAMPLE, SAVEPOINT, SCHEMA, SCHEMAS, @@ -690,6 +694,7 @@ define_keywords!( SECONDARY, SECRET, SECURITY, + SEED, SELECT, SEMI, SENSITIVE, @@ -932,6 +937,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::CONNECT, // Reserved for snowflake MATCH_RECOGNIZE Keyword::MATCH_RECOGNIZE, + // Reserved for Snowflake table sample + Keyword::SAMPLE, + Keyword::TABLESAMPLE, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 37323084d..7d70460b4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10598,6 +10598,13 @@ impl<'a> Parser<'a> { let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let mut sample = None; + if self.dialect.supports_table_sample_before_alias() { + if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + sample = Some(TableSampleKind::BeforeTableAlias(parsed_sample)); + } + } + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -10612,6 +10619,12 @@ impl<'a> Parser<'a> { } }; + if !self.dialect.supports_table_sample_before_alias() { + if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + sample = Some(TableSampleKind::AfterTableAlias(parsed_sample)); + } + } + let mut table = TableFactor::Table { name, alias, @@ -10621,6 +10634,7 @@ impl<'a> Parser<'a> { partitions, with_ordinality, json_path, + sample, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { @@ -10641,6 +10655,115 @@ impl<'a> Parser<'a> { } } + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { + let modifier = if self.parse_keyword(Keyword::TABLESAMPLE) { + TableSampleModifier::TableSample + } else if self.parse_keyword(Keyword::SAMPLE) { + TableSampleModifier::Sample + } else { + return Ok(None); + }; + + let name = match self.parse_one_of_keywords(&[ + Keyword::BERNOULLI, + Keyword::ROW, + Keyword::SYSTEM, + Keyword::BLOCK, + ]) { + Some(Keyword::BERNOULLI) => Some(TableSampleMethod::Bernoulli), + Some(Keyword::ROW) => Some(TableSampleMethod::Row), + Some(Keyword::SYSTEM) => Some(TableSampleMethod::System), + Some(Keyword::BLOCK) => Some(TableSampleMethod::Block), + _ => None, + }; + + let parenthesized = self.consume_token(&Token::LParen); + + let (quantity, bucket) = if parenthesized && self.parse_keyword(Keyword::BUCKET) { + let selected_bucket = self.parse_number_value()?; + self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; + let total = self.parse_number_value()?; + let on = if self.parse_keyword(Keyword::ON) { + Some(self.parse_expr()?) + } else { + None + }; + ( + None, + Some(TableSampleBucket { + bucket: selected_bucket, + total, + on, + }), + ) + } else { + let value = match self.maybe_parse(|p| p.parse_expr())? { + Some(num) => num, + None => { + if let Token::Word(w) = self.next_token().token { + Expr::Value(Value::Placeholder(w.value)) + } else { + return parser_err!( + "Expecting number or byte length e.g. 100M", + self.peek_token().span.start + ); + } + } + }; + let unit = if self.parse_keyword(Keyword::ROWS) { + Some(TableSampleUnit::Rows) + } else if self.parse_keyword(Keyword::PERCENT) { + Some(TableSampleUnit::Percent) + } else { + None + }; + ( + Some(TableSampleQuantity { + parenthesized, + value, + unit, + }), + None, + ) + }; + if parenthesized { + self.expect_token(&Token::RParen)?; + } + + let seed = if self.parse_keyword(Keyword::REPEATABLE) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Repeatable)?) + } else if self.parse_keyword(Keyword::SEED) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Seed)?) + } else { + None + }; + + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Some(Box::new(TableSample { + modifier, + name, + quantity, + seed, + bucket, + offset, + }))) + } + + fn parse_table_sample_seed( + &mut self, + modifier: TableSampleSeedModifier, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(TableSampleSeed { modifier, value }) + } + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, /// assuming the `OPENJSON` keyword was already consumed. fn parse_open_json_table_factor(&mut self) -> Result { diff --git a/src/test_utils.rs b/src/test_utils.rs index 6e60a31c1..e76cdb87a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -346,6 +346,21 @@ pub fn table(name: impl Into) -> TableFactor { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + } +} + +pub fn table_from_name(name: ObjectName) -> TableFactor { + TableFactor::Table { + name, + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, } } @@ -362,6 +377,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta partitions: vec![], with_ordinality: false, json_path: None, + sample: None, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 34c14cc55..0311eba16 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -222,16 +222,7 @@ fn parse_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::with_quote('"', "table")])), from[0].relation ); } @@ -1379,16 +1370,7 @@ fn parse_table_identifiers() { assert_eq!( select.from, vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(expected), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(expected)), joins: vec![] },] ); @@ -1562,6 +1544,7 @@ fn parse_table_time_travel() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![] },] @@ -1661,6 +1644,7 @@ fn parse_merge() { partitions: Default::default(), with_ordinality: false, json_path: None, + sample: None, }, table ); @@ -1677,6 +1661,7 @@ fn parse_merge() { partitions: Default::default(), with_ordinality: false, json_path: None, + sample: None, }, source ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9d785576f..d60506d90 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -63,16 +63,7 @@ fn parse_map_access_expr() { })], into: None, from: vec![TableWithJoins { - relation: Table { - name: ObjectName(vec![Ident::new("foos")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("foos")])), joins: vec![], }], lateral_views: vec![], @@ -175,9 +166,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -1625,6 +1614,14 @@ fn parse_explain_table() { } } +#[test] +fn parse_table_sample() { + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 0.1"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1000"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10 OFFSET 1 / 2"); +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7dfb98d6f..0f1813c2f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -41,7 +41,7 @@ use sqlparser::tokenizer::Span; use sqlparser::tokenizer::Tokenizer; use test_utils::{ all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection, - join, number, only, table, table_alias, TestedDialects, + join, number, only, table, table_alias, table_from_name, TestedDialects, }; #[macro_use] @@ -359,16 +359,7 @@ fn parse_update_set_from() { stmt, Statement::Update { table: TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t1")])), joins: vec![], }, assignments: vec![Assignment { @@ -391,16 +382,7 @@ fn parse_update_set_from() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t1")])), joins: vec![], }], lateral_views: vec![], @@ -480,6 +462,7 @@ fn parse_update_with_table_alias() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![], }, @@ -572,6 +555,7 @@ fn parse_select_with_table_alias() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![], }] @@ -601,16 +585,7 @@ fn parse_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::with_quote('"', "table")])), from[0].relation ); } @@ -649,29 +624,17 @@ fn parse_delete_statement_for_multi_tables() { tables[1] ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), from[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), from[0].joins[0].relation ); } @@ -689,55 +652,31 @@ fn parse_delete_statement_for_multi_tables_with_using() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), from[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), from[1].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), using[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), using[0].joins[0].relation ); } @@ -760,16 +699,7 @@ fn parse_where_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("foo")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::new("foo")])), from[0].relation, ); @@ -815,6 +745,7 @@ fn parse_where_delete_with_alias_statement() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, from[0].relation, ); @@ -832,6 +763,7 @@ fn parse_where_delete_with_alias_statement() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![], }]), @@ -4920,20 +4852,11 @@ fn test_parse_named_window() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "aggregate_test_100".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "aggregate_test_100".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -5511,20 +5434,11 @@ fn parse_interval_and_or_xor() { }))], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -6132,29 +6046,11 @@ fn parse_implicit_join() { assert_eq!( vec![ TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t1".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1".into()])), joins: vec![], }, TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t2".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2".into()])), joins: vec![], }, ], @@ -6166,53 +6062,17 @@ fn parse_implicit_join() { assert_eq!( vec![ TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t1a".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1a".into()])), joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t1b".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1b".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t2a".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2a".into()])), joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t2b".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2b".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6228,16 +6088,7 @@ fn parse_cross_join() { let select = verified_only_select(sql); assert_eq!( Join { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t2")])), global: false, join_operator: JoinOperator::CrossJoin, }, @@ -6263,6 +6114,7 @@ fn parse_joins_on() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global, join_operator: f(JoinConstraint::On(Expr::BinaryOp { @@ -6391,6 +6243,7 @@ fn parse_joins_using() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global: false, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), @@ -6465,6 +6318,7 @@ fn parse_natural_join() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global: false, join_operator: f(JoinConstraint::Natural), @@ -6728,16 +6582,7 @@ fn parse_derived_tables() { }), }, joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t2".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -7668,20 +7513,11 @@ fn lateral_function() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "customer".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "customer".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![Join { relation: TableFactor::Function { lateral: true, @@ -8499,6 +8335,7 @@ fn parse_merge() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, } ); assert_eq!(table, table_no_into); @@ -8519,16 +8356,10 @@ fn parse_merge() { )], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident::new("s"), + Ident::new("foo") + ])), joins: vec![], }], lateral_views: vec![], @@ -9611,6 +9442,7 @@ fn parse_pivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }), aggregate_functions: vec![ expected_function("a", None), @@ -9686,6 +9518,7 @@ fn parse_unpivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }), value: Ident { value: "quantity".to_string(), @@ -9756,6 +9589,7 @@ fn parse_pivot_unpivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }), value: Ident { value: "population".to_string(), @@ -10165,16 +9999,7 @@ fn parse_unload() { projection: vec![UnnamedExpr(Expr::Identifier(Ident::new("cola"))),], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("tab")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("tab")])), joins: vec![], }], lateral_views: vec![], @@ -10348,16 +10173,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("employees")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("employees")])), joins: vec![], }], into: None, @@ -10437,16 +10253,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("employees")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("employees")])), joins: vec![], }], into: None, @@ -10601,16 +10408,7 @@ fn test_match_recognize() { use MatchRecognizeSymbol::*; use RepetitionQuantifier::*; - let table = TableFactor::Table { - name: ObjectName(vec![Ident::new("my_table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }; + let table = table_from_name(ObjectName(vec![Ident::new("my_table")])); fn check(options: &str, expect: TableFactor) { let select = all_dialects_where(|d| d.supports_match_recognize()).verified_only_select( @@ -12585,3 +12383,16 @@ fn parse_create_table_with_enum_types() { ParserError::ParserError("Expected: literal string, found: 2".to_string()) ); } + +#[test] +fn test_table_sample() { + let dialects = all_dialects_where(|d| d.supports_table_sample_before_alias()); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50) AS t"); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 ROWS) AS t"); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 PERCENT) AS t"); + + let dialects = all_dialects_where(|d| !d.supports_table_sample_before_alias()); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); +} diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index d73c088a7..b9ca55d13 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -185,16 +185,7 @@ fn test_values_clause() { "SELECT * FROM values", )); assert_eq!( - Some(&TableFactor::Table { - name: ObjectName(vec![Ident::new("values")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }), + Some(&table_from_name(ObjectName(vec![Ident::new("values")]))), query .body .as_select() diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a0fc49b9f..d441cd195 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -268,20 +268,11 @@ fn test_select_union_by_name() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -306,20 +297,11 @@ fn test_select_union_by_name() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 981218388..5349f1207 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -459,6 +459,7 @@ fn parse_delimited_identifiers() { with_ordinality: _, partitions: _, json_path: _, + sample: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -537,6 +538,15 @@ fn parse_use() { ); } +#[test] +fn test_tample_sample() { + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (BUCKET 3 OUT OF 32 ON rand()) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (BUCKET 3 OUT OF 16 ON id)"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (100M) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (0.1 PERCENT) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (10 ROWS)"); +} + fn hive() -> TestedDialects { TestedDialects::new(vec![Box::new(HiveDialect {})]) } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 66e40f46b..ecc874af8 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -73,6 +73,7 @@ fn parse_table_time_travel() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![] },] @@ -221,6 +222,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -279,6 +281,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -338,6 +341,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -396,6 +400,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -434,6 +439,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -611,9 +617,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -1082,20 +1086,11 @@ fn parse_substring_in_select() { })], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![] }], lateral_views: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index cac1af852..bc7bf2f88 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1884,16 +1884,9 @@ fn parse_select_with_numeric_prefix_column_name() { )))], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::with_quote( + '"', "table" + )])), joins: vec![] }], lateral_views: vec![], @@ -1943,16 +1936,9 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::with_quote( + '"', "table" + )])), joins: vec![] }], lateral_views: vec![], @@ -2020,6 +2006,7 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -2034,6 +2021,7 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { @@ -2464,20 +2452,11 @@ fn parse_substring_in_select() { })], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![] }], lateral_views: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2e204d9bc..aaf4e65db 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3581,9 +3581,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 2fd855a09..9492946d3 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -39,27 +39,18 @@ fn test_square_brackets_over_db_schema_table_name() { assert_eq!( select.from[0], TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![ - Ident { - value: "test_schema".to_string(), - quote_style: Some('['), - span: Span::empty(), - }, - Ident { - value: "test_table".to_string(), - quote_style: Some('['), - span: Span::empty(), - } - ]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('['), + span: Span::empty(), + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('['), + span: Span::empty(), + } + ])), joins: vec![], } ); @@ -90,27 +81,18 @@ fn test_double_quotes_over_db_schema_table_name() { assert_eq!( select.from[0], TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![ - Ident { - value: "test_schema".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }, - Ident { - value: "test_table".to_string(), - quote_style: Some('"'), - span: Span::empty(), - } - ]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('"'), + span: Span::empty(), + } + ])), joins: vec![], } ); @@ -130,9 +112,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index d6774c317..adb8f8133 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1188,9 +1188,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -2960,3 +2958,19 @@ fn parse_insert_overwrite() { let insert_overwrite_into = r#"INSERT OVERWRITE INTO schema.table SELECT a FROM b"#; snowflake().verified_stmt(insert_overwrite_into); } + +#[test] +fn test_table_sample() { + snowflake_and_generic().verified_stmt("SELECT * FROM testtable SAMPLE (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable TABLESAMPLE (10)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE BLOCK (3) SEED (82)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)"); + snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) REPEATABLE (1)"); + snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); +} diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 987b1263d..ff0b54ef7 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -479,16 +479,7 @@ fn parse_update_tuple_row_values() { }], selection: None, table: TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("x")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("x")])), joins: vec![], }, from: None, From 7867ba3cf04c9c8324bfa26403945f0d53c2119a Mon Sep 17 00:00:00 2001 From: Aleksei Piianin Date: Sun, 15 Dec 2024 10:56:11 +0100 Subject: [PATCH 09/11] Redshift: Fix parsing for quoted numbered columns (#1576) --- src/dialect/mod.rs | 53 +++++++++++++++++++------ src/dialect/redshift.rs | 48 ++++++++++++++++++----- src/tokenizer.rs | 77 ++++++++++++++++++++++++++++++++----- tests/sqlparser_redshift.rs | 58 +++++++++++++++++++++++++--- 4 files changed, 200 insertions(+), 36 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8cce6a353..c32b763a4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -128,14 +128,39 @@ pub trait Dialect: Debug + Any { ch == '"' || ch == '`' } - /// Return the character used to quote identifiers. - fn identifier_quote_style(&self, _identifier: &str) -> Option { + /// Determine if a character starts a potential nested quoted identifier. + /// Example: RedShift supports the following quote styles to all mean the same thing: + /// ```sql + /// SELECT 1 AS foo; + /// SELECT 1 AS "foo"; + /// SELECT 1 AS [foo]; + /// SELECT 1 AS ["foo"]; + /// ``` + fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool { + false + } + + /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true + /// If the next sequence of tokens potentially represent a nested identifier, then this method + /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style. + /// + /// Example (Redshift): + /// ```text + /// `["foo"]` => Some(`[`, Some(`"`)) + /// `[foo]` => Some(`[`, None) + /// `[0]` => None + /// `"foo"` => None + /// ``` + fn peek_nested_delimited_identifier_quotes( + &self, + mut _chars: Peekable>, + ) -> Option<(char, Option)> { None } - /// Determine if quoted characters are proper for identifier - fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { - true + /// Return the character used to quote identifiers. + fn identifier_quote_style(&self, _identifier: &str) -> Option { + None } /// Determine if a character is a valid start character for an unquoted identifier @@ -869,6 +894,17 @@ mod tests { self.0.is_delimited_identifier_start(ch) } + fn is_nested_delimited_identifier_start(&self, ch: char) -> bool { + self.0.is_nested_delimited_identifier_start(ch) + } + + fn peek_nested_delimited_identifier_quotes( + &self, + chars: std::iter::Peekable>, + ) -> Option<(char, Option)> { + self.0.peek_nested_delimited_identifier_quotes(chars) + } + fn identifier_quote_style(&self, identifier: &str) -> Option { self.0.identifier_quote_style(identifier) } @@ -877,13 +913,6 @@ mod tests { self.0.supports_string_literal_backslash_escape() } - fn is_proper_identifier_inside_quotes( - &self, - chars: std::iter::Peekable>, - ) -> bool { - self.0.is_proper_identifier_inside_quotes(chars) - } - fn supports_filter_during_aggregation(&self) -> bool { self.0.supports_filter_during_aggregation() } diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 48eb00ab1..55405ba53 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -32,21 +32,51 @@ pub struct RedshiftSqlDialect {} // in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will // be a json path impl Dialect for RedshiftSqlDialect { - fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' || ch == '[' + /// Determine if a character starts a potential nested quoted identifier. + /// Example: RedShift supports the following quote styles to all mean the same thing: + /// ```sql + /// SELECT 1 AS foo; + /// SELECT 1 AS "foo"; + /// SELECT 1 AS [foo]; + /// SELECT 1 AS ["foo"]; + /// ``` + fn is_nested_delimited_identifier_start(&self, ch: char) -> bool { + ch == '[' } - /// Determine if quoted characters are proper for identifier - /// It's needed to distinguish treating square brackets as quotes from - /// treating them as json path. If there is identifier then we assume - /// there is no json path. - fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable>) -> bool { + /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true + /// If the next sequence of tokens potentially represent a nested identifier, then this method + /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style. + /// + /// Example (Redshift): + /// ```text + /// `["foo"]` => Some(`[`, Some(`"`)) + /// `[foo]` => Some(`[`, None) + /// `[0]` => None + /// `"foo"` => None + /// ``` + fn peek_nested_delimited_identifier_quotes( + &self, + mut chars: Peekable>, + ) -> Option<(char, Option)> { + if chars.peek() != Some(&'[') { + return None; + } + chars.next(); + let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); + if let Some(&ch) = not_white_chars.peek() { - return self.is_identifier_start(ch); + if ch == '"' { + return Some(('[', Some('"'))); + } + if self.is_identifier_start(ch) { + return Some(('[', None)); + } } - false + + None } fn is_identifier_start(&self, ch: char) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..9269f4fe6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1075,25 +1075,61 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::DoubleQuotedString(s))) } // delimited (quoted) identifier + quote_start if self.dialect.is_delimited_identifier_start(ch) => { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + Ok(Some(Token::make_word(&word, Some(quote_start)))) + } + // Potentially nested delimited (quoted) identifier quote_start - if self.dialect.is_delimited_identifier_start(ch) + if self + .dialect + .is_nested_delimited_identifier_start(quote_start) && self .dialect - .is_proper_identifier_inside_quotes(chars.peekable.clone()) => + .peek_nested_delimited_identifier_quotes(chars.peekable.clone()) + .is_some() => { - let error_loc = chars.location(); - chars.next(); // consume the opening quote + let Some((quote_start, nested_quote_start)) = self + .dialect + .peek_nested_delimited_identifier_quotes(chars.peekable.clone()) + else { + return self.tokenizer_error( + chars.location(), + format!("Expected nested delimiter '{quote_start}' before EOF."), + ); + }; + + let Some(nested_quote_start) = nested_quote_start else { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + return Ok(Some(Token::make_word(&word, Some(quote_start)))); + }; + + let mut word = vec![]; let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + let nested_quote_end = Word::matching_end_quote(nested_quote_start); + let error_loc = chars.location(); - if last_char == Some(quote_end) { - Ok(Some(Token::make_word(&s, Some(quote_start)))) - } else { - self.tokenizer_error( + chars.next(); // skip the first delimiter + peeking_take_while(chars, |ch| ch.is_whitespace()); + if chars.peek() != Some(&nested_quote_start) { + return self.tokenizer_error( + error_loc, + format!("Expected nested delimiter '{nested_quote_start}' before EOF."), + ); + } + word.push(nested_quote_start.into()); + word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?); + word.push(nested_quote_end.into()); + peeking_take_while(chars, |ch| ch.is_whitespace()); + if chars.peek() != Some("e_end) { + return self.tokenizer_error( error_loc, format!("Expected close delimiter '{quote_end}' before EOF."), - ) + ); } + chars.next(); // skip close delimiter + + Ok(Some(Token::make_word(&word.concat(), Some(quote_start)))) } // numbers and period '0'..='9' | '.' => { @@ -1597,6 +1633,27 @@ impl<'a> Tokenizer<'a> { s } + /// Read a quoted identifier + fn tokenize_quoted_identifier( + &self, + quote_start: char, + chars: &mut State, + ) -> Result { + let error_loc = chars.location(); + chars.next(); // consume the opening quote + let quote_end = Word::matching_end_quote(quote_start); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + + if last_char == Some(quote_end) { + Ok(s) + } else { + self.tokenizer_error( + error_loc, + format!("Expected close delimiter '{quote_end}' before EOF."), + ) + } + } + /// Read a single quoted string, starting with the opening quote. fn tokenize_escaped_single_quoted_string( &self, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 9492946d3..857d378bc 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -157,6 +157,8 @@ fn parse_delimited_identifiers() { } redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); + // An alias starting with a number + redshift().verified_stmt(r#"CREATE TABLE "foo" ("1" INT)"#); redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#); //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } @@ -203,7 +205,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "o_orderkey".to_string(), @@ -226,7 +228,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Bracket { key: Expr::Value(Value::SingleQuotedString("id".to_owned())) @@ -250,7 +252,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Bracket { key: Expr::Value(Value::SingleQuotedString("id".to_owned())) @@ -260,6 +262,31 @@ fn test_redshift_json_path() { }, expr_from_projection(only(&select.projection)) ); + + let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM customer_orders_lineitem"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + &Expr::JsonAccess { + value: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("db1"), + Ident::new("sc1"), + Ident::new("tbl1"), + Ident::new("col1") + ])), + path: JsonPath { + path: vec![ + JsonPathElem::Bracket { + key: Expr::Value(number("0")) + }, + JsonPathElem::Dot { + key: "id".to_string(), + quoted: true, + } + ] + } + }, + expr_from_projection(only(&select.projection)) + ); } #[test] @@ -276,7 +303,7 @@ fn test_parse_json_path_from() { &Some(JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "a".to_string(), @@ -300,7 +327,7 @@ fn test_parse_json_path_from() { &Some(JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "a".to_string(), @@ -334,3 +361,24 @@ fn test_parse_json_path_from() { _ => panic!(), } } + +#[test] +fn test_parse_select_numbered_columns() { + // An alias starting with a number + redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#); + redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1abc" FROM a"#); +} + +#[test] +fn test_parse_nested_quoted_identifier() { + redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#); + // trim spaces + redshift().one_statement_parses_to(r#"SELECT 1 AS [ " 1 " ]"#, r#"SELECT 1 AS [" 1 "]"#); + // invalid query + assert!(redshift() + .parse_sql_statements(r#"SELECT 1 AS ["1]"#) + .is_err()); +} From c69839102ae4854cd5d50a799ab7fd48c8919eda Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Tue, 17 Dec 2024 08:39:59 +0100 Subject: [PATCH 10/11] Add the alter table ON COMMIT option to Snowflake (#1606) --- src/dialect/snowflake.rs | 4 ++++ src/parser/mod.rs | 36 ++++++++++++++++++++---------------- tests/sqlparser_snowflake.rs | 9 +++++++++ 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 77d2ccff1..50e383db2 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -377,6 +377,10 @@ pub fn parse_create_table( parser.expect_token(&Token::RParen)?; builder = builder.with_tags(Some(tags)); } + Keyword::ON if parser.parse_keyword(Keyword::COMMIT) => { + let on_commit = Some(parser.parse_create_table_on_commit()?); + builder = builder.on_commit(on_commit); + } _ => { return parser.expected("end of statement", next_token); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7d70460b4..ca46bb604 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6155,22 +6155,11 @@ impl<'a> Parser<'a> { None }; - let on_commit: Option = - if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DELETE, Keyword::ROWS]) - { - Some(OnCommit::DeleteRows) - } else if self.parse_keywords(&[ - Keyword::ON, - Keyword::COMMIT, - Keyword::PRESERVE, - Keyword::ROWS, - ]) { - Some(OnCommit::PreserveRows) - } else if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DROP]) { - Some(OnCommit::Drop) - } else { - None - }; + let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) { + Some(self.parse_create_table_on_commit()?) + } else { + None + }; let strict = self.parse_keyword(Keyword::STRICT); @@ -6226,6 +6215,21 @@ impl<'a> Parser<'a> { .build()) } + pub(crate) fn parse_create_table_on_commit(&mut self) -> Result { + if self.parse_keywords(&[Keyword::DELETE, Keyword::ROWS]) { + Ok(OnCommit::DeleteRows) + } else if self.parse_keywords(&[Keyword::PRESERVE, Keyword::ROWS]) { + Ok(OnCommit::PreserveRows) + } else if self.parse_keywords(&[Keyword::DROP]) { + Ok(OnCommit::Drop) + } else { + parser_err!( + "Expecting DELETE ROWS, PRESERVE ROWS or DROP", + self.peek_token() + ) + } + } + /// Parse configuration like partitioning, clustering information during the table creation. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index adb8f8133..9fe14783c 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -355,6 +355,15 @@ fn test_snowflake_create_table_column_comment() { } } +#[test] +fn test_snowflake_create_table_on_commit() { + snowflake().verified_stmt( + r#"CREATE LOCAL TEMPORARY TABLE "AAA"."foo" ("bar" INTEGER) ON COMMIT PRESERVE ROWS"#, + ); + snowflake().verified_stmt(r#"CREATE TABLE "AAA"."foo" ("bar" INTEGER) ON COMMIT DELETE ROWS"#); + snowflake().verified_stmt(r#"CREATE TABLE "AAA"."foo" ("bar" INTEGER) ON COMMIT DROP"#); +} + #[test] fn test_snowflake_create_local_table() { match snowflake().verified_stmt("CREATE TABLE my_table (a INT)") { From 8fcdf48e5c8325e0fdea2c5b2948bda69ba9b907 Mon Sep 17 00:00:00 2001 From: cjw Date: Tue, 17 Dec 2024 23:03:12 +0800 Subject: [PATCH 11/11] Support parsing `EXPLAIN ESTIMATE` of Clickhouse (#1605) Co-authored-by: Kermit --- src/ast/mod.rs | 7 +++++++ src/keywords.rs | 1 + src/parser/mod.rs | 4 ++++ tests/sqlparser_common.rs | 30 ++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ccb2ed1bc..6e3f20472 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3239,6 +3239,9 @@ pub enum Statement { /// /// [SQLite](https://sqlite.org/lang_explain.html) query_plan: bool, + /// `EXPLAIN ESTIMATE` + /// [Clickhouse](https://clickhouse.com/docs/en/sql-reference/statements/explain#explain-estimate) + estimate: bool, /// A SQL query that specifies what to explain statement: Box, /// Optional output format of explain @@ -3471,6 +3474,7 @@ impl fmt::Display for Statement { verbose, analyze, query_plan, + estimate, statement, format, options, @@ -3483,6 +3487,9 @@ impl fmt::Display for Statement { if *analyze { write!(f, "ANALYZE ")?; } + if *estimate { + write!(f, "ESTIMATE ")?; + } if *verbose { write!(f, "VERBOSE ")?; diff --git a/src/keywords.rs b/src/keywords.rs index 7e3354078..bbfd00ca0 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -298,6 +298,7 @@ define_keywords!( ERROR, ESCAPE, ESCAPED, + ESTIMATE, EVENT, EVERY, EXCEPT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ca46bb604..94d63cf80 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9091,6 +9091,7 @@ impl<'a> Parser<'a> { let mut analyze = false; let mut verbose = false; let mut query_plan = false; + let mut estimate = false; let mut format = None; let mut options = None; @@ -9103,6 +9104,8 @@ impl<'a> Parser<'a> { options = Some(self.parse_utility_options()?) } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { query_plan = true; + } else if self.parse_keyword(Keyword::ESTIMATE) { + estimate = true; } else { analyze = self.parse_keyword(Keyword::ANALYZE); verbose = self.parse_keyword(Keyword::VERBOSE); @@ -9120,6 +9123,7 @@ impl<'a> Parser<'a> { analyze, verbose, query_plan, + estimate, statement: Box::new(statement), format, options, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0f1813c2f..1bf9383af 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4375,6 +4375,7 @@ fn run_explain_analyze( analyze, verbose, query_plan, + estimate, statement, format, options, @@ -4384,6 +4385,7 @@ fn run_explain_analyze( assert_eq!(format, expected_format); assert_eq!(options, exepcted_options); assert!(!query_plan); + assert!(!estimate); assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string()); } _ => panic!("Unexpected Statement, must be Explain"), @@ -4528,6 +4530,34 @@ fn parse_explain_query_plan() { ); } +#[test] +fn parse_explain_estimate() { + let statement = all_dialects().verified_stmt("EXPLAIN ESTIMATE SELECT sqrt(id) FROM foo"); + + match &statement { + Statement::Explain { + query_plan, + estimate, + analyze, + verbose, + statement, + .. + } => { + assert!(estimate); + assert!(!query_plan); + assert!(!analyze); + assert!(!verbose); + assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string()); + } + _ => unreachable!(), + } + + assert_eq!( + "EXPLAIN ESTIMATE SELECT sqrt(id) FROM foo", + statement.to_string() + ); +} + #[test] fn parse_named_argument_function() { let dialects = all_dialects_where(|d| {