-
Notifications
You must be signed in to change notification settings - Fork 551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added ability to parse STRUCT and MAP fields as well as nested arrays #966
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -678,6 +678,25 @@ impl fmt::Display for ColumnOption { | |
} | ||
} | ||
|
||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||
pub struct StructField { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add docstrings that explain what this struct is for (with an example SQL snippet) |
||
pub(crate) name: Ident, | ||
pub(crate) data_type: DataType, | ||
pub(crate) options: Option<ColumnOption>, | ||
} | ||
|
||
impl fmt::Display for StructField { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}: {}", self.name, self.data_type)?; | ||
if let Some(option) = self.options.as_ref() { | ||
write!(f, "{option}")?; | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
/// `GeneratedAs`s are modifiers that follow a column option in a `generated`. | ||
/// 'ExpStored' is PostgreSQL specific | ||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
vec::Vec, | ||
}; | ||
use core::fmt; | ||
use std::ops::Rem; | ||
|
||
use log::debug; | ||
|
||
|
@@ -114,6 +115,7 @@ | |
Self { remaining_depth } | ||
} | ||
} | ||
|
||
impl Drop for DepthGuard { | ||
fn drop(&mut self) { | ||
self.remaining_depth.fetch_add(1, Ordering::SeqCst); | ||
|
@@ -257,6 +259,7 @@ | |
options: ParserOptions, | ||
/// ensure the stack does not overflow by limiting recursion depth | ||
recursion_counter: RecursionCounter, | ||
max_depth: usize, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please document what this field is foe. Also, could you please explain why you didn't use the counter in |
||
} | ||
|
||
impl<'a> Parser<'a> { | ||
|
@@ -282,6 +285,7 @@ | |
dialect, | ||
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH), | ||
options: ParserOptions::default(), | ||
max_depth: 1, | ||
} | ||
} | ||
|
||
|
@@ -2181,7 +2185,7 @@ | |
token => { | ||
return token | ||
.cloned() | ||
.unwrap_or_else(|| TokenWithLocation::wrap(Token::EOF)) | ||
.unwrap_or_else(|| TokenWithLocation::wrap(Token::EOF)); | ||
} | ||
} | ||
} | ||
|
@@ -4648,6 +4652,13 @@ | |
|
||
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) | ||
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> { | ||
self.parse_data_type_with_depth(1) | ||
} | ||
|
||
pub fn parse_data_type_with_depth(&mut self, depth: usize) -> Result<DataType, ParserError> { | ||
if depth > self.max_depth { | ||
self.max_depth = depth - 1; | ||
} | ||
let next_token = self.next_token(); | ||
let mut data = match next_token.token { | ||
Token::Word(w) => match w.keyword { | ||
|
@@ -4837,10 +4848,57 @@ | |
// that ends with > will fail due to "C++" problem - >> is parsed as | ||
// Token::ShiftRight | ||
self.expect_token(&Token::Lt)?; | ||
let inside_type = self.parse_data_type()?; | ||
|
||
let inside_type = self.parse_data_type_with_depth(depth + 1)?; | ||
dbg!(depth, self.max_depth); | ||
|
||
if depth <= 1 { | ||
dbg!("First Level"); | ||
if (depth == 1 && self.max_depth == depth) | ||
|| (self.peek_previous_token()? == &Token::ShiftRight | ||
&& self.max_depth.rem(2) != 0) | ||
{ | ||
self.expect_token(&Token::Gt)?; | ||
} | ||
} else if depth.rem(2) == 0 && depth != self.max_depth { | ||
} else { | ||
dbg!("Else Level"); | ||
self.expect_token(&Token::ShiftRight)?; | ||
} | ||
|
||
if dialect_of!(self is PostgreSqlDialect) { | ||
Ok(DataType::BracketArray(Some(Box::new(inside_type)))) | ||
} else { | ||
Ok(DataType::Array(Some(Box::new(inside_type)))) | ||
} | ||
} | ||
} | ||
Keyword::MAP => { | ||
self.expect_token(&Token::Lt)?; | ||
let key = self.parse_data_type_with_depth(depth + 1)?; | ||
let tok = self.consume_token(&Token::Comma); | ||
debug!("Tok: {tok}"); | ||
let value = self.parse_data_type_with_depth(depth + 1)?; | ||
let tok = self.peek_token().token; | ||
debug!("Next Tok: {tok}"); | ||
if tok == Token::ShiftRight { | ||
self.expect_token(&Token::ShiftRight)?; | ||
} else if tok == Token::Gt { | ||
self.expect_token(&Token::Gt)?; | ||
Ok(DataType::Array(Some(Box::new(inside_type)))) | ||
} | ||
Ok(DataType::Map(Box::new(key), Box::new(value))) | ||
} | ||
Keyword::STRUCT => { | ||
self.expect_token(&Token::Lt)?; | ||
let fields = self.parse_comma_separated(Parser::parse_struct_fields)?; | ||
let tok = self.peek_token().token; | ||
debug!("Next Tok: {tok}"); | ||
if tok == Token::ShiftRight { | ||
self.expect_token(&Token::ShiftRight)?; | ||
} else if tok == Token::Gt { | ||
self.expect_token(&Token::Gt)?; | ||
} | ||
Ok(DataType::Struct(fields)) | ||
} | ||
_ => { | ||
self.prev_token(); | ||
|
@@ -4855,15 +4913,31 @@ | |
_ => self.expected("a data type name", next_token), | ||
}?; | ||
|
||
// Parse array data types. Note: this is postgresql-specific and different from | ||
// Keyword::ARRAY syntax from above | ||
while self.consume_token(&Token::LBracket) { | ||
self.expect_token(&Token::RBracket)?; | ||
data = DataType::Array(Some(Box::new(data))) | ||
data = DataType::BracketArray(Some(Box::new(data))) | ||
} | ||
Ok(data) | ||
} | ||
|
||
pub fn peek_previous_token(&mut self) -> Result<&TokenWithLocation, ParserError> { | ||
Ok(&self.tokens[self.index - 1]) | ||
} | ||
|
||
pub fn parse_struct_fields(&mut self) -> Result<StructField, ParserError> { | ||
let name = self.parse_identifier()?; | ||
self.expect_token(&Token::Colon)?; | ||
let data_type = self.parse_data_type()?; | ||
let options = self.parse_optional_column_option()?; | ||
Ok(StructField { | ||
name, | ||
data_type, | ||
options, | ||
}) | ||
} | ||
|
||
pub fn parse_string_values(&mut self) -> Result<Vec<String>, ParserError> { | ||
self.expect_token(&Token::LParen)?; | ||
let mut values = Vec::new(); | ||
|
@@ -5028,12 +5102,12 @@ | |
Token::EOF => { | ||
return Err(ParserError::ParserError( | ||
"Empty input when parsing identifier".to_string(), | ||
))? | ||
))?; | ||
} | ||
token => { | ||
return Err(ParserError::ParserError(format!( | ||
"Unexpected token in identifier: {token}" | ||
)))? | ||
)))?; | ||
} | ||
}; | ||
|
||
|
@@ -5046,19 +5120,19 @@ | |
Token::EOF => { | ||
return Err(ParserError::ParserError( | ||
"Trailing period in identifier".to_string(), | ||
))? | ||
))?; | ||
} | ||
token => { | ||
return Err(ParserError::ParserError(format!( | ||
"Unexpected token following period in identifier: {token}" | ||
)))? | ||
)))?; | ||
} | ||
}, | ||
Token::EOF => break, | ||
token => { | ||
return Err(ParserError::ParserError(format!( | ||
"Unexpected token in identifier: {token}" | ||
)))? | ||
)))?; | ||
} | ||
} | ||
} | ||
|
@@ -6031,7 +6105,7 @@ | |
_ => { | ||
return Err(ParserError::ParserError(format!( | ||
"expected OUTER, SEMI, ANTI or JOIN after {kw:?}" | ||
))) | ||
))); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please provide a link to what SQL dialect supports this syntax?