Skip to content

Commit

Permalink
Support for BigQuery struct, array and bytes , int64, `float6…
Browse files Browse the repository at this point in the history
…4` datatypes (apache#1003)
  • Loading branch information
iffyio authored and serprex committed Nov 6, 2023
1 parent a71b258 commit 47bd477
Show file tree
Hide file tree
Showing 8 changed files with 901 additions and 65 deletions.
67 changes: 55 additions & 12 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};

use crate::ast::ObjectName;
use crate::ast::{display_comma_separated, ObjectName, StructField};

use super::value::escape_single_quote_string;

Expand Down Expand Up @@ -71,6 +71,10 @@ pub enum DataType {
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
Blob(Option<u64>),
/// Variable-length binary data with optional length.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type
Bytes(Option<u64>),
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
Expand Down Expand Up @@ -125,6 +129,10 @@ pub enum DataType {
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Int4(Option<u64>),
/// Integer type in [bigquery]
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
Int64,
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
Integer(Option<u64>),
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
Expand All @@ -149,6 +157,10 @@ pub enum DataType {
///
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
Float4,
/// Floating point in [bigquery]
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
Float64,
/// Floating point e.g. REAL
Real,
/// Float8 as alias for Double in [postgresql]
Expand Down Expand Up @@ -192,18 +204,23 @@ pub enum DataType {
Regclass,
/// Text
Text,
/// String
String,
/// String with optional length.
String(Option<u64>),
/// Bytea
Bytea,
/// Custom type such as enums
Custom(ObjectName, Vec<String>),
/// Arrays
Array(Option<Box<DataType>>),
Array(ArrayElemTypeDef),
/// Enums
Enum(Vec<String>),
/// Set
Set(Vec<String>),
/// Struct
///
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
}

impl fmt::Display for DataType {
Expand Down Expand Up @@ -233,6 +250,7 @@ impl fmt::Display for DataType {
format_type_with_optional_length(f, "VARBINARY", size, false)
}
DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false),
DataType::Numeric(info) => {
write!(f, "NUMERIC{info}")
}
Expand Down Expand Up @@ -276,6 +294,9 @@ impl fmt::Display for DataType {
DataType::Int4(zerofill) => {
format_type_with_optional_length(f, "INT4", zerofill, false)
}
DataType::Int64 => {
write!(f, "INT64")
}
DataType::UnsignedInt4(zerofill) => {
format_type_with_optional_length(f, "INT4", zerofill, true)
}
Expand All @@ -299,6 +320,7 @@ impl fmt::Display for DataType {
}
DataType::Real => write!(f, "REAL"),
DataType::Float4 => write!(f, "FLOAT4"),
DataType::Float64 => write!(f, "FLOAT64"),
DataType::Double => write!(f, "DOUBLE"),
DataType::Float8 => write!(f, "FLOAT8"),
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
Expand All @@ -318,15 +340,13 @@ impl fmt::Display for DataType {
DataType::JSON => write!(f, "JSON"),
DataType::Regclass => write!(f, "REGCLASS"),
DataType::Text => write!(f, "TEXT"),
DataType::String => write!(f, "STRING"),
DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false),
DataType::Bytea => write!(f, "BYTEA"),
DataType::Array(ty) => {
if let Some(t) = &ty {
write!(f, "{t}[]")
} else {
write!(f, "ARRAY")
}
}
DataType::Array(ty) => match ty {
ArrayElemTypeDef::None => write!(f, "ARRAY"),
ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"),
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
},
DataType::Custom(ty, modifiers) => {
if modifiers.is_empty() {
write!(f, "{ty}")
Expand Down Expand Up @@ -355,6 +375,13 @@ impl fmt::Display for DataType {
write!(f, ")")
}
DataType::SnowflakeTimestamp => write!(f, "TIMESTAMP_NTZ"),
DataType::Struct(fields) => {
if !fields.is_empty() {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
} else {
write!(f, "STRUCT")
}
}
}
}
}
Expand Down Expand Up @@ -536,3 +563,19 @@ impl fmt::Display for CharLengthUnits {
}
}
}

/// Represents the data type of the elements in an array (if any) as well as
/// the syntax used to declare the array.
///
/// For example: Bigquery/Hive use `ARRAY<INT>` whereas snowflake uses ARRAY.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum ArrayElemTypeDef {
/// `ARRAY`
None,
/// `ARRAY<INT>`
AngleBracket(Box<DataType>),
/// `[]INT`
SquareBracket(Box<DataType>),
}
58 changes: 57 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};

pub use self::data_type::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{
Expand Down Expand Up @@ -323,6 +323,27 @@ impl fmt::Display for JsonOperator {
}
}

/// A field definition within a struct.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct StructField {
pub field_name: Option<Ident>,
pub field_type: DataType,
}

impl fmt::Display for StructField {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(name) = &self.field_name {
write!(f, "{name} {}", self.field_type)
} else {
write!(f, "{}", self.field_type)
}
}
}

/// Options for `CAST` / `TRY_CAST`
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
Expand Down Expand Up @@ -597,6 +618,26 @@ pub enum Expr {
Rollup(Vec<Vec<Expr>>),
/// ROW / TUPLE a single value, such as `SELECT (1, 2)`
Tuple(Vec<Expr>),
/// `BigQuery` specific `Struct` literal expression [1]
/// Syntax:
/// ```sql
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
/// ```
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct {
/// Struct values.
values: Vec<Expr>,
/// Struct field definitions.
fields: Vec<StructField>,
},
/// `BigQuery` specific: An named expression in a typeless struct [1]
///
/// Syntax
/// ```sql
/// 1 AS A
/// ```
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Named { expr: Box<Expr>, name: Ident },
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
/// An array expression e.g. `ARRAY[1, 2]`
Expand Down Expand Up @@ -997,6 +1038,21 @@ impl fmt::Display for Expr {
Expr::Tuple(exprs) => {
write!(f, "({})", display_comma_separated(exprs))
}
Expr::Struct { values, fields } => {
if !fields.is_empty() {
write!(
f,
"STRUCT<{}>({})",
display_comma_separated(fields),
display_comma_separated(values)
)
} else {
write!(f, "STRUCT({})", display_comma_separated(values))
}
}
Expr::Named { expr, name } => {
write!(f, "{} AS {}", expr, name)
}
Expr::ArrayIndex { obj, indexes } => {
write!(f, "{obj}")?;
for i in indexes {
Expand Down
5 changes: 5 additions & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ define_keywords!(
BY,
BYPASSRLS,
BYTEA,
BYTES,
CACHE,
CALL,
CALLED,
Expand Down Expand Up @@ -273,6 +274,7 @@ define_keywords!(
FIRST_VALUE,
FLOAT,
FLOAT4,
FLOAT64,
FLOAT8,
FLOOR,
FOLLOWING,
Expand All @@ -296,6 +298,7 @@ define_keywords!(
FUSION,
GENERATE,
GENERATED,
GEOGRAPHY,
GET,
GLOBAL,
GRANT,
Expand Down Expand Up @@ -331,6 +334,7 @@ define_keywords!(
INT,
INT2,
INT4,
INT64,
INT8,
INTEGER,
INTERSECT,
Expand Down Expand Up @@ -596,6 +600,7 @@ define_keywords!(
STORED,
STRICT,
STRING,
STRUCT,
SUBMULTISET,
SUBSTRING,
SUBSTRING_REGEX,
Expand Down
Loading

0 comments on commit 47bd477

Please sign in to comment.