From e381bf6a640d246357608479541d010b42e2a1a7 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Wed, 25 Oct 2023 18:57:33 +0200 Subject: [PATCH] Support for BigQuery `struct`, `array` and `bytes` , `int64`, `float64` datatypes (#1003) --- src/ast/data_type.rs | 67 ++++- src/ast/mod.rs | 61 ++++- src/keywords.rs | 5 + src/parser/mod.rs | 249 ++++++++++++++++-- tests/sqlparser_bigquery.rs | 489 ++++++++++++++++++++++++++++++++++- tests/sqlparser_common.rs | 105 +++++--- tests/sqlparser_postgres.rs | 8 +- tests/sqlparser_snowflake.rs | 2 +- 8 files changed, 910 insertions(+), 76 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index f79fdead3..315d22b5a 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::ObjectName; +use crate::ast::{display_comma_separated, ObjectName, StructField}; use super::value::escape_single_quote_string; @@ -71,6 +71,10 @@ pub enum DataType { /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type /// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html Blob(Option), + /// Variable-length binary data with optional length. + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type + Bytes(Option), /// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type @@ -125,6 +129,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int4(Option), + /// Integer type in [bigquery] + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + Int64, /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED @@ -149,6 +157,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float4, + /// Floating point in [bigquery] + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + Float64, /// Floating point e.g. REAL Real, /// Float8 as alias for Double in [postgresql] @@ -190,18 +202,23 @@ pub enum DataType { Regclass, /// Text Text, - /// String - String, + /// String with optional length. + String(Option), /// Bytea Bytea, /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays - Array(Option>), + Array(ArrayElemTypeDef), /// Enums Enum(Vec), /// Set Set(Vec), + /// Struct + /// + /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Struct(Vec), } impl fmt::Display for DataType { @@ -231,6 +248,7 @@ impl fmt::Display for DataType { format_type_with_optional_length(f, "VARBINARY", size, false) } DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false), + DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false), DataType::Numeric(info) => { write!(f, "NUMERIC{info}") } @@ -274,6 +292,9 @@ impl fmt::Display for DataType { DataType::Int4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, false) } + DataType::Int64 => { + write!(f, "INT64") + } DataType::UnsignedInt4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -297,6 +318,7 @@ impl fmt::Display for DataType { } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), + DataType::Float64 => write!(f, "FLOAT64"), DataType::Double => write!(f, "DOUBLE"), DataType::Float8 => write!(f, "FLOAT8"), DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"), @@ -316,15 +338,13 @@ impl fmt::Display for DataType { DataType::JSON => write!(f, "JSON"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), - DataType::String => write!(f, "STRING"), + DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false), DataType::Bytea => write!(f, "BYTEA"), - DataType::Array(ty) => { - if let Some(t) = &ty { - write!(f, "{t}[]") - } else { - write!(f, "ARRAY") - } - } + DataType::Array(ty) => match ty { + ArrayElemTypeDef::None => write!(f, "ARRAY"), + ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"), + ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), + }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { write!(f, "{ty}") @@ -352,6 +372,13 @@ impl fmt::Display for DataType { } write!(f, ")") } + DataType::Struct(fields) => { + if !fields.is_empty() { + write!(f, "STRUCT<{}>", display_comma_separated(fields)) + } else { + write!(f, "STRUCT") + } + } } } } @@ -544,3 +571,19 @@ impl fmt::Display for CharLengthUnits { } } } + +/// Represents the data type of the elements in an array (if any) as well as +/// the syntax used to declare the array. +/// +/// For example: Bigquery/Hive use `ARRAY` whereas snowflake uses ARRAY. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ArrayElemTypeDef { + /// `ARRAY` + None, + /// `ARRAY` + AngleBracket(Box), + /// `[]INT` + SquareBracket(Box), +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 632a796a1..9122f440e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; pub use self::data_type::{ - CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, + ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, }; pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ @@ -360,6 +360,27 @@ impl fmt::Display for JsonOperator { } } +/// A field definition within a struct. +/// +/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct StructField { + pub field_name: Option>, + pub field_type: DataType, +} + +impl fmt::Display for StructField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(name) = &self.field_name { + write!(f, "{name} {}", self.field_type) + } else { + write!(f, "{}", self.field_type) + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -698,6 +719,29 @@ pub enum Expr { Rollup(Vec>), /// ROW / TUPLE a single value, such as `SELECT (1, 2)` Tuple(Vec), + /// `BigQuery` specific `Struct` literal expression [1] + /// Syntax: + /// ```sql + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// ``` + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Struct { + /// Struct values. + values: Vec, + /// Struct field definitions. + fields: Vec, + }, + /// `BigQuery` specific: An named expression in a typeless struct [1] + /// + /// Syntax + /// ```sql + /// 1 AS A + /// ``` + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Named { + expr: Box, + name: WithSpan, + }, /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` ArrayIndex { obj: Box, indexes: Vec }, /// An array expression e.g. `ARRAY[1, 2]` @@ -1093,6 +1137,21 @@ impl fmt::Display for Expr { Expr::Tuple(exprs) => { write!(f, "({})", display_comma_separated(exprs)) } + Expr::Struct { values, fields } => { + if !fields.is_empty() { + write!( + f, + "STRUCT<{}>({})", + display_comma_separated(fields), + display_comma_separated(values) + ) + } else { + write!(f, "STRUCT({})", display_comma_separated(values)) + } + } + Expr::Named { expr, name } => { + write!(f, "{} AS {}", expr, name) + } Expr::ArrayIndex { obj, indexes } => { write!(f, "{obj}")?; for i in indexes { diff --git a/src/keywords.rs b/src/keywords.rs index a900a8b5a..e5d3f1546 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -122,6 +122,7 @@ define_keywords!( BY, BYPASSRLS, BYTEA, + BYTES, CACHE, CALL, CALLED, @@ -274,6 +275,7 @@ define_keywords!( FIRST_VALUE, FLOAT, FLOAT4, + FLOAT64, FLOAT8, FLOOR, FOLLOWING, @@ -297,6 +299,7 @@ define_keywords!( FUSION, GENERATE, GENERATED, + GEOGRAPHY, GET, GLOBAL, GRANT, @@ -333,6 +336,7 @@ define_keywords!( INT, INT2, INT4, + INT64, INT8, INTEGER, INTERSECT, @@ -591,6 +595,7 @@ define_keywords!( STORED, STRICT, STRING, + STRUCT, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f68d0163d..42d4a4161 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -33,7 +33,7 @@ use IsOptional::*; use crate::ast::helpers::stmt_create_table::CreateTableBuilder; use crate::ast::*; use crate::dialect::*; -use crate::keywords::{self, Keyword}; +use crate::keywords::{self, Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; mod alter; @@ -198,6 +198,26 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 41; +/// Composite types declarations using angle brackets syntax can be arbitrary +/// nested such that the following declaration is possible: +/// `ARRAY>` +/// But the tokenizer recognizes the `>>` as a ShiftRight token. +/// We work-around that limitation when parsing a data type by accepting +/// either a `>` or `>>` token in such cases, remembering which variant we +/// matched. +/// In the latter case having matched a `>>`, the parent type will not look to +/// match its closing `>` as a result since that will have taken place at the +/// child type. +/// +/// See [Parser::parse_data_type] for details +struct MatchedTrailingBracket(bool); + +impl From for MatchedTrailingBracket { + fn from(value: bool) -> Self { + Self(value) + } +} + /// Options that control how the [`Parser`] parses SQL text #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { @@ -821,6 +841,10 @@ impl<'a> Parser<'a> { Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { + self.prev_token(); + self.parse_bigquery_struct_literal() + } // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -1671,6 +1695,175 @@ impl<'a> Parser<'a> { })) } + /// Bigquery specific: Parse a struct literal + /// Syntax + /// ```sql + /// -- typed + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// -- typeless + /// STRUCT( expr1 [AS field_name] [, ... ]) + /// ``` + fn parse_bigquery_struct_literal(&mut self) -> Result { + let (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!( + "unmatched > in STRUCT literal", + self.peek_token().span.start + ); + } + + self.expect_token(&Token::LParen)?; + let values = self + .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; + self.expect_token(&Token::RParen)?; + + Ok(Expr::Struct { values, fields }) + } + + /// Parse an expression value for a bigquery struct [1] + /// Syntax + /// ```sql + /// expr [AS name] + /// ``` + /// + /// Parameter typed_syntax is set to true if the expression + /// is to be parsed as a field expression declared using typed + /// struct syntax [2], and false if using typeless struct syntax [3]. + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct + /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::AS) { + if typed_syntax { + return parser_err!("Typed syntax does not allow AS", { + self.prev_token(); + self.peek_token().span.start + }); + } + let field_name = self.parse_identifier()?; + Ok(Expr::Named { + expr: expr.into(), + name: field_name, + }) + } else { + Ok(expr) + } + } + + /// Parse a Struct type definition as a sequence of field-value pairs. + /// The syntax of the Struct elem differs by dialect so it is customised + /// by the `elem_parser` argument. + /// + /// Syntax + /// ```sql + /// Hive: + /// STRUCT + /// + /// BigQuery: + /// STRUCT<[field_name] field_type> + /// ``` + fn parse_struct_type_def( + &mut self, + mut elem_parser: F, + ) -> Result<(Vec, MatchedTrailingBracket), ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, + { + let start_token = self.peek_token(); + self.expect_keyword(Keyword::STRUCT)?; + + // Nothing to do if we have no type information. + if Token::Lt != self.peek_token() { + return Ok((Default::default(), false.into())); + } + self.next_token(); + + let mut field_defs = vec![]; + let trailing_bracket = loop { + let (def, trailing_bracket) = elem_parser(self)?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break trailing_bracket; + } + + // Angle brackets are balanced so we only expect the trailing `>>` after + // we've matched all field types for the current struct. + // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT definition", start_token.span.start); + } + }; + + Ok(( + field_defs, + self.expect_closing_angle_bracket(trailing_bracket)?, + )) + } + + /// Parse a field definition in a BigQuery struct. + /// Syntax: + /// + /// ```sql + /// [field_name] field_type + /// ``` + fn parse_big_query_struct_field_def( + &mut self, + ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { + let is_anonymous_field = if let Token::Word(w) = self.peek_token().token { + ALL_KEYWORDS + .binary_search(&w.value.to_uppercase().as_str()) + .is_ok() + } else { + false + }; + + let field_name = if is_anonymous_field { + None + } else { + Some(self.parse_identifier()?) + }; + + let (field_type, trailing_bracket) = self.parse_data_type_helper()?; + + Ok(( + StructField { + field_name, + field_type, + }, + trailing_bracket, + )) + } + + /// For nested types that use the angle bracket syntax, this matches either + /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously + /// matched `trailing_bracket` argument). It returns whether there is a trailing + /// left to be matched - (i.e. if '>>' was matched). + fn expect_closing_angle_bracket( + &mut self, + trailing_bracket: MatchedTrailingBracket, + ) -> Result { + let trailing_bracket = if !trailing_bracket.0 { + match self.peek_token().token { + Token::Gt => { + self.next_token(); + false.into() + } + Token::ShiftRight => { + self.next_token(); + true.into() + } + _ => return self.expected(">", self.peek_token()), + } + } else { + false.into() + }; + + Ok(trailing_bracket) + } + /// Parse an operator following an expression pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { // allow the dialect to override infix parsing @@ -5092,17 +5285,32 @@ impl<'a> Parser<'a> { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { + let (ty, trailing_bracket) = self.parse_data_type_helper()?; + if trailing_bracket.0 { + return parser_err!( + format!("unmatched > after parsing data type {ty}"), + self.peek_token() + ); + } + + Ok(ty) + } + + fn parse_data_type_helper( + &mut self, + ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { if dialect_of!(self is ClickHouseDialect) { // ClickHouse allows to specify data type as Nullable(DateTime64(8)) let type_name = self.parse_object_name()?; return if let Some(modifiers) = self.parse_clickhouse_type_modifiers()? { - Ok(DataType::Custom(type_name, modifiers)) + Ok((DataType::Custom(type_name, modifiers), false.into())) } else { - Ok(DataType::Custom(type_name, vec![])) + Ok((DataType::Custom(type_name, vec![]), false.into())) }; } let next_token = self.next_token(); + let mut trailing_bracket = false.into(); let mut data = match next_token.token { Token::Word(w) => match w.keyword { Keyword::BOOLEAN => Ok(DataType::Boolean), @@ -5110,6 +5318,7 @@ impl<'a> Parser<'a> { Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT64 => Ok(DataType::Float64), Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { if self.parse_keyword(Keyword::PRECISION) { @@ -5166,6 +5375,7 @@ impl<'a> Parser<'a> { Ok(DataType::Int4(optional_precision?)) } } + Keyword::INT64 => Ok(DataType::Int64), Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -5220,6 +5430,7 @@ impl<'a> Parser<'a> { Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), + Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), @@ -5263,7 +5474,7 @@ impl<'a> Parser<'a> { Keyword::INTERVAL => Ok(DataType::Interval), Keyword::JSON => Ok(DataType::JSON), Keyword::REGCLASS => Ok(DataType::Regclass), - Keyword::STRING => Ok(DataType::String), + Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), Keyword::TEXT => Ok(DataType::Text), Keyword::BYTEA => Ok(DataType::Bytea), Keyword::NUMERIC => Ok(DataType::Numeric( @@ -5285,23 +5496,23 @@ impl<'a> Parser<'a> { Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { - Ok(DataType::Array(None)) - } else if dialect_of!(self is ClickHouseDialect) { - // Clickhouse uses Array(T) syntax - self.expect_token(&Token::LParen)?; - let inside_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(DataType::Array(Some(Box::new(inside_type)))) + Ok(DataType::Array(ArrayElemTypeDef::None)) } else { - // Hive array syntax. Note that nesting arrays - or other Hive syntax - // that ends with > will fail due to "C++" problem - >> is parsed as - // Token::ShiftRight self.expect_token(&Token::Lt)?; - let inside_type = self.parse_data_type()?; - self.expect_token(&Token::Gt)?; - Ok(DataType::Array(Some(Box::new(inside_type)))) + let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; + trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; + Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + inside_type, + )))) } } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect) => { + self.prev_token(); + let (field_defs, _trailing_bracket) = + self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + trailing_bracket = _trailing_bracket; + Ok(DataType::Struct(field_defs)) + } _ => { self.prev_token(); let type_name = self.parse_object_name()?; @@ -5319,9 +5530,9 @@ impl<'a> Parser<'a> { // Keyword::ARRAY syntax from above while self.consume_token(&Token::LBracket) { self.expect_token(&Token::RBracket)?; - data = DataType::Array(Some(Box::new(data))) + data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data))) } - Ok(data) + Ok((data, trailing_bracket)) } pub fn parse_string_values(&mut self) -> Result, ParserError> { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 35f18e582..037018932 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -13,10 +13,12 @@ #[macro_use] mod test_utils; +use sqlparser::ast; use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; +use sqlparser::parser::ParserError; use test_utils::*; #[test] @@ -84,6 +86,491 @@ fn parse_raw_literal() { panic!("invalid query") } +#[test] +fn parse_nested_data_types() { + let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; + match bigquery().one_statement_parses_to(sql, sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("x").empty_span(), + data_type: DataType::Struct(vec![ + StructField { + field_name: Some(Ident::new("a").empty_span()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( + Box::new(DataType::Int64,) + )) + }, + StructField { + field_name: Some(Ident::new("b").empty_span()), + field_type: DataType::Bytes(Some(42)) + }, + ]), + collation: None, + codec: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("y").empty_span(), + data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Int64, + }]), + ))), + collation: None, + codec: None, + options: vec![], + }, + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_invalid_brackets() { + let sql = "SELECT STRUCT>(NULL)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("unmatched > in STRUCT literal".to_string()) + ); + + let sql = "SELECT STRUCT>>(NULL)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "Expected (, found: >\nNear `SELECT STRUCT>`".to_string() + ) + ); + + let sql = "CREATE TABLE table (x STRUCT>>)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "Expected ',' or ')' after column definition, found: >\nNear ` (x STRUCT>`".to_string() + ) + ); +} + +#[test] +fn parse_tuple_struct_literal() { + // tuple syntax: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#tuple_syntax + // syntax: (expr1, expr2 [, ... ]) + let sql = "SELECT (1, 2, 3), (1, 1.0, '123', true)"; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Tuple(vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")), + ]), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Tuple(vec![ + Expr::Value(number("1")), + Expr::Value(number("1.0")), + Expr::Value(Value::SingleQuotedString("123".to_string())), + Expr::Value(Value::Boolean(true)) + ]), + expr_from_projection(&select.projection[1]) + ); +} + +#[test] +fn parse_typeless_struct_syntax() { + // typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + // syntax: STRUCT( expr1 [AS field_name] [, ... ]) + let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)"; + let select = bigquery().verified_only_select(sql); + assert_eq!(5, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")), + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[0]) + ); + + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedString("abc".to_string())),], + fields: Default::default() + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier( + vec![Ident::from("t"), Ident::from("str_col")].empty_span() + ), + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Named { + expr: Expr::Value(number("1")).into(), + name: Ident::from("a").empty_span() + }, + Expr::Named { + expr: Expr::Value(Value::SingleQuotedString("abc".to_string())).into(), + name: Ident::from("b").empty_span() + }, + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Named { + expr: Expr::Identifier(Ident::from("str_col").empty_span()).into(), + name: Ident::from("abc").empty_span() + }], + fields: Default::default() + }, + expr_from_projection(&select.projection[4]) + ); +} + +#[test] +fn parse_typed_struct_syntax() { + // typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + // syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + + let sql = r#"SELECT STRUCT(5), STRUCT(1, t.str_col), STRUCT, str STRUCT>(nested_col)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64, + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier( + vec![ + Ident { + value: "t".into(), + quote_style: None, + }, + Ident { + value: "str_col".into(), + quote_style: None, + }, + ] + .empty_span() + ), + ], + fields: vec![ + StructField { + field_name: Some(Ident::new("x").empty_span()), + field_type: DataType::Int64 + }, + StructField { + field_name: Some(Ident::new("y").empty_span()), + field_type: DataType::String(None) + }, + ] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident::new("nested_col").empty_span()),], + fields: vec![ + StructField { + field_name: Some(Ident::new("arr").empty_span()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Float64 + ))) + }, + StructField { + field_name: Some(Ident::new("str").empty_span()), + field_type: DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Bool + }]) + }, + ] + }, + expr_from_projection(&select.projection[2]) + ); + + let sql = r#"SELECT STRUCT>(nested_col)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident::new("nested_col").empty_span()),], + fields: vec![ + StructField { + field_name: Some(Ident::new("x").empty_span()), + field_type: DataType::Struct(Default::default()) + }, + StructField { + field_name: Some(Ident::new("y").empty_span()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(Default::default()) + ))) + }, + ] + }, + expr_from_projection(&select.projection[0]) + ); + + let sql = r#"SELECT STRUCT(true), STRUCT(B'abc')"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::Boolean(true)),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bool + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral( + "abc".into() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bytes(Some(42)) + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT("2011-05-05"), STRUCT(DATETIME '1999-01-01 01:23:34.45'), STRUCT(5.0), STRUCT(1)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(4, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::DoubleQuotedString( + "2011-05-05".to_string() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Date + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::Datetime(None), + value: "1999-01-01 01:23:34.45".to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::Datetime(None) + }] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5.0")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Float64 + }] + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("1")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64 + }] + }, + expr_from_projection(&select.projection[3]) + ); + + let sql = r#"SELECT STRUCT(INTERVAL '1-2 3 4:5:6.789999'), STRUCT(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Interval(ast::Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "1-2 3 4:5:6.789999".to_string() + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None + }),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Interval + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::JSON, + value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::JSON + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT("foo"), STRUCT(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT