From 464fca39f3909b25584467e38787b67c4f396756 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Mon, 9 Dec 2024 15:27:38 +0100 Subject: [PATCH] Keep source syntax --- src/ast/mod.rs | 3 +- src/ast/query.rs | 99 +++++++++++++++++++++----- src/parser/mod.rs | 133 +++++++++++++++++++---------------- tests/sqlparser_snowflake.rs | 32 +++------ 4 files changed, 164 insertions(+), 103 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c4603d462..440dfe1a0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -70,7 +70,8 @@ pub use self::query::{ RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSampleBernoulli, - TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, TableSampleSystem, + TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, + TableSampleMethodName, TableSampleSeed, TableSampleSeedModifier, TableSampleSystem, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; diff --git a/src/ast/query.rs b/src/ast/query.rs index 76c834e7c..f5b52a4e9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1172,21 +1172,96 @@ pub enum TableSampleMethod { Implicit(TableSampleImplicit), } +/// The table sample method names +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleMethodName { + Row, + Bernoulli, + System, + Block, +} + +impl fmt::Display for TableSampleMethodName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleMethodName::Bernoulli => write!(f, "BERNOULLI"), + TableSampleMethodName::Row => write!(f, "ROW"), + TableSampleMethodName::System => write!(f, "SYSTEM"), + TableSampleMethodName::Block => write!(f, "BLOCK"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableSampleBernoulli { - pub probability: Option, - pub value: Option, + pub name: TableSampleMethodName, + pub probability: Option, + pub value: Option, pub unit: Option, } +impl fmt::Display for TableSampleBernoulli { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " {} (", self.name)?; + if let Some(probability) = &self.probability { + write!(f, "{})", probability)?; + } else if let Some(value) = &self.value { + write!(f, "{}", value)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + write!(f, ")")?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableSampleSystem { - pub probability: Expr, - pub repeatable: Option, + pub name: TableSampleMethodName, + pub probability: Value, + pub seed: Option, +} + +impl fmt::Display for TableSampleSystem { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " {} ({})", self.name, self.probability)?; + if let Some(seed) = &self.seed { + write!(f, " {} ({})", seed.modifier, seed.value)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleSeed { + pub modifier: TableSampleSeedModifier, + pub value: Value, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleSeedModifier { + Repeatable, + Seed, +} + +impl fmt::Display for TableSampleSeedModifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleSeedModifier::Repeatable => write!(f, "REPEATABLE"), + TableSampleSeedModifier::Seed => write!(f, "SEED"), + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -1248,22 +1323,10 @@ impl fmt::Display for TableSampleMethod { write!(f, " TABLESAMPLE")?; match self { TableSampleMethod::Bernoulli(sample) => { - write!(f, " BERNOULLI (")?; - if let Some(probability) = &sample.probability { - write!(f, "{})", probability)?; - } else if let Some(value) = &sample.value { - write!(f, "{}", value)?; - if let Some(unit) = &sample.unit { - write!(f, " {}", unit)?; - } - write!(f, ")")?; - } + write!(f, "{}", sample)?; } TableSampleMethod::System(sample) => { - write!(f, " SYSTEM ({})", sample.probability)?; - if let Some(repeatable) = &sample.repeatable { - write!(f, " REPEATABLE ({})", repeatable)?; - } + write!(f, "{}", sample)?; } TableSampleMethod::Bucket(sample) => { write!(f, " ({})", sample)?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f41dcc96b..adb099937 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10660,48 +10660,14 @@ impl<'a> Parser<'a> { } // Try to parse based on an explicit table sample method keyword - let sample = if self - .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW]) - .is_some() - { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - - let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) { - (None, Some(expr), Some(TableSampleUnit::Rows)) - } else if self.parse_keyword(Keyword::PERCENT) { - (None, Some(expr), Some(TableSampleUnit::Percent)) - } else { - (Some(expr), None, None) - }; - self.expect_token(&Token::RParen)?; - TableSampleMethod::Bernoulli(TableSampleBernoulli { - probability, - value, - unit, - }) - } else if self - .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK]) - .is_some() - { - self.expect_token(&Token::LParen)?; - let probability = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let seed = if self - .parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED]) - .is_some() - { - self.expect_token(&Token::LParen)?; - let seed = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(seed) - } else { - None - }; - TableSampleMethod::System(TableSampleSystem { - probability, - repeatable: seed, - }) + let sample = if self.parse_keyword(Keyword::BERNOULLI) { + self.parse_table_sample_bernoulli(TableSampleMethodName::Bernoulli)? + } else if self.parse_keyword(Keyword::ROW) { + self.parse_table_sample_bernoulli(TableSampleMethodName::Row)? + } else if self.parse_keyword(Keyword::SYSTEM) { + self.parse_table_sample_system(TableSampleMethodName::System)? + } else if self.parse_keyword(Keyword::BLOCK) { + self.parse_table_sample_system(TableSampleMethodName::Block)? // Try to parse without an explicit table sample method keyword } else if self.consume_token(&Token::LParen) { if self.parse_keyword(Keyword::BUCKET) { @@ -10729,29 +10695,19 @@ impl<'a> Parser<'a> { } } }; - if !self.dialect.supports_implicit_table_sample_method() - && self.consume_token(&Token::RParen) - { - TableSampleMethod::Bernoulli(TableSampleBernoulli { - probability: Some(Expr::Value(value)), - unit: None, - value: None, - }) + let unit = if self.parse_keyword(Keyword::ROWS) { + Some(TableSampleUnit::Rows) + } else if self.parse_keyword(Keyword::PERCENT) { + Some(TableSampleUnit::Percent) } else { - let unit = if self.parse_keyword(Keyword::ROWS) { - Some(TableSampleUnit::Rows) - } else if self.parse_keyword(Keyword::PERCENT) { - Some(TableSampleUnit::Percent) - } else { - None - }; - self.expect_token(&Token::RParen)?; - TableSampleMethod::Implicit(TableSampleImplicit { value, unit }) - } + None + }; + self.expect_token(&Token::RParen)?; + TableSampleMethod::Implicit(TableSampleImplicit { value, unit }) } } else { return parser_err!( - "Expecting BERNOULLI, ROW, SYSTEM or BLOCK", + "Expecting BERNOULLI, ROW, SYSTEM, BLOCK or a valid TABLESAMPLE expression in parenthesis", self.peek_token().span.start ); }; @@ -10759,6 +10715,61 @@ impl<'a> Parser<'a> { Ok(Some(Box::new(sample))) } + fn parse_table_sample_bernoulli( + &mut self, + name: TableSampleMethodName, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_number_value()?; + let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) { + (None, Some(value), Some(TableSampleUnit::Rows)) + } else if self.parse_keyword(Keyword::PERCENT) { + (None, Some(value), Some(TableSampleUnit::Percent)) + } else { + (Some(value), None, None) + }; + self.expect_token(&Token::RParen)?; + Ok(TableSampleMethod::Bernoulli(TableSampleBernoulli { + name, + probability, + value, + unit, + })) + } + + fn parse_table_sample_system( + &mut self, + name: TableSampleMethodName, + ) -> Result { + self.expect_token(&Token::LParen)?; + let probability = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + + let seed = if self.parse_keyword(Keyword::REPEATABLE) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Repeatable)?) + } else if self.parse_keyword(Keyword::SEED) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Seed)?) + } else { + None + }; + + Ok(TableSampleMethod::System(TableSampleSystem { + name, + probability, + seed, + })) + } + + fn parse_table_sample_seed( + &mut self, + modifier: TableSampleSeedModifier, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(TableSampleSeed { modifier, value }) + } + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, /// assuming the `OPENJSON` keyword was already consumed. fn parse_open_json_table_factor(&mut self) -> Result { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7c0ba9a90..dd22cd290 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2961,30 +2961,16 @@ fn parse_insert_overwrite() { #[test] fn test_table_sample() { - snowflake_and_generic() - .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); - - // In Snowflake we translate implicit table sample method to bernoulli - snowflake().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE (10)", - "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)", - ); - - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable TABLESAMPLE ROW (20.3)", - "SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)", - ); - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE BLOCK (3) SEED (82)", - "SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)", - ); - - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)", - "SELECT * FROM testtable TABLESAMPLE SYSTEM (0.012) REPEATABLE (99992)", + "SELECT * FROM testtable SAMPLE (10)", + "SELECT * FROM testtable TABLESAMPLE (10)", ); - snowflake_and_generic() - .verified_stmt("SELECT * FROM testtable TABLESAMPLE BERNOULLI (10 ROWS)"); + .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE BLOCK (3) SEED (82)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)"); }