Skip to content

Commit

Permalink
Support trailing commas in FROM clause (#1645)
Browse files Browse the repository at this point in the history
Co-authored-by: Ifeanyi Ubah <[email protected]>
  • Loading branch information
barsela1 and iffyio authored Jan 14, 2025
1 parent 6507484 commit 36db176
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 12 deletions.
12 changes: 12 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ pub trait Dialect: Debug + Any {
self.supports_trailing_commas()
}

/// Returns true if the dialect supports trailing commas in the `FROM` clause of a `SELECT` statement.
/// /// Example: `SELECT 1 FROM T, U, LIMIT 1`
fn supports_from_trailing_commas(&self) -> bool {
false
}

/// Returns true if the dialect supports double dot notation for object names
///
/// Example
Expand Down Expand Up @@ -775,6 +781,12 @@ pub trait Dialect: Debug + Any {
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
}

// Returns reserved keywords when looking to parse a [TableFactor].
/// See [Self::supports_from_trailing_commas]
fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
keywords::RESERVED_FOR_TABLE_FACTOR
}

/// Returns true if this dialect supports the `TABLESAMPLE` option
/// before the table alias option. For example:
///
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ impl Dialect for SnowflakeDialect {
true
}

fn supports_from_trailing_commas(&self) -> bool {
true
}

// Snowflake supports double-dot notation when the schema name is not specified
// In this case the default PUBLIC schema is used
//
Expand Down
10 changes: 10 additions & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,16 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
Keyword::END,
];

// Global list of reserved keywords alloweed after FROM.
// Parser should call Dialect::get_reserved_keyword_after_from
// to allow for each dialect to customize the list.
pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
Keyword::INTO,
Keyword::LIMIT,
Keyword::HAVING,
Keyword::WHERE,
];

/// Global list of reserved keywords that cannot be parsed as identifiers
/// without special handling like quoting. Parser should call `Dialect::is_reserved_for_identifier`
/// to allow for each dialect to customize the list.
Expand Down
42 changes: 31 additions & 11 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3940,7 +3940,11 @@ impl<'a> Parser<'a> {
let trailing_commas =
self.options.trailing_commas | self.dialect.supports_projection_trailing_commas();

self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas)
self.parse_comma_separated_with_trailing_commas(
|p| p.parse_select_item(),
trailing_commas,
None,
)
}

pub fn parse_actions_list(&mut self) -> Result<Vec<ParsedAction>, ParserError> {
Expand All @@ -3966,20 +3970,32 @@ impl<'a> Parser<'a> {
Ok(values)
}

/// Parse a list of [TableWithJoins]
fn parse_table_with_joins(&mut self) -> Result<Vec<TableWithJoins>, ParserError> {
let trailing_commas = self.dialect.supports_from_trailing_commas();

self.parse_comma_separated_with_trailing_commas(
Parser::parse_table_and_joins,
trailing_commas,
Some(self.dialect.get_reserved_keywords_for_table_factor()),
)
}

/// Parse the comma of a comma-separated syntax element.
/// Allows for control over trailing commas
/// Returns true if there is a next element
fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool {
fn is_parse_comma_separated_end_with_trailing_commas(
&mut self,
trailing_commas: bool,
reserved_keywords: Option<&[Keyword]>,
) -> bool {
let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS);
if !self.consume_token(&Token::Comma) {
true
} else if trailing_commas {
let token = self.peek_token().token;
match token {
Token::Word(ref kw)
if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) =>
{
true
}
Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true,
Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => {
true
}
Expand All @@ -3993,15 +4009,15 @@ impl<'a> Parser<'a> {
/// Parse the comma of a comma-separated syntax element.
/// Returns true if there is a next element
fn is_parse_comma_separated_end(&mut self) -> bool {
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas)
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None)
}

/// Parse a comma-separated list of 1+ items accepted by `F`
pub fn parse_comma_separated<T, F>(&mut self, f: F) -> Result<Vec<T>, ParserError>
where
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
{
self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas)
self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None)
}

/// Parse a comma-separated list of 1+ items accepted by `F`
Expand All @@ -4010,14 +4026,18 @@ impl<'a> Parser<'a> {
&mut self,
mut f: F,
trailing_commas: bool,
reserved_keywords: Option<&[Keyword]>,
) -> Result<Vec<T>, ParserError>
where
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
{
let mut values = vec![];
loop {
values.push(f(self)?);
if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) {
if self.is_parse_comma_separated_end_with_trailing_commas(
trailing_commas,
reserved_keywords,
) {
break;
}
}
Expand Down Expand Up @@ -10073,7 +10093,7 @@ impl<'a> Parser<'a> {
// or `from`.

let from = if self.parse_keyword(Keyword::FROM) {
self.parse_comma_separated(Parser::parse_table_and_joins)?
self.parse_table_with_joins()?
} else {
vec![]
};
Expand Down
32 changes: 31 additions & 1 deletion tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12957,8 +12957,38 @@ fn parse_update_from_before_select() {
parse_sql_statements(query).unwrap_err()
);
}

#[test]
fn parse_overlaps() {
verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')");
}

#[test]
fn test_trailing_commas_in_from() {
let dialects = all_dialects_where(|d| d.supports_from_trailing_commas());
dialects.verified_only_select_with_canonical("SELECT 1, 2 FROM t,", "SELECT 1, 2 FROM t");

dialects
.verified_only_select_with_canonical("SELECT 1, 2 FROM t1, t2,", "SELECT 1, 2 FROM t1, t2");

let sql = "SELECT a, FROM b, LIMIT 1";
let _ = dialects.parse_sql_statements(sql).unwrap();

let sql = "INSERT INTO a SELECT b FROM c,";
let _ = dialects.parse_sql_statements(sql).unwrap();

let sql = "SELECT a FROM b, HAVING COUNT(*) > 1";
let _ = dialects.parse_sql_statements(sql).unwrap();

let sql = "SELECT a FROM b, WHERE c = 1";
let _ = dialects.parse_sql_statements(sql).unwrap();

// nasted
let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),";
let _ = dialects.parse_sql_statements(sql).unwrap();

// multiple_subqueries
dialects.verified_only_select_with_canonical(
"SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2),",
"SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2)",
);
}

0 comments on commit 36db176

Please sign in to comment.