Skip to content

Commit

Permalink
Keep source syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
yoavcloud committed Dec 13, 2024
1 parent 353ce58 commit 464fca3
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 103 deletions.
3 changes: 2 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ pub use self::query::{
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table,
TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSampleBernoulli,
TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, TableSampleSystem,
TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod,
TableSampleMethodName, TableSampleSeed, TableSampleSeedModifier, TableSampleSystem,
TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values,
WildcardAdditionalOptions, With, WithFill,
};
Expand Down
99 changes: 81 additions & 18 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1172,21 +1172,96 @@ pub enum TableSampleMethod {
Implicit(TableSampleImplicit),
}

/// The table sample method names
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum TableSampleMethodName {
Row,
Bernoulli,
System,
Block,
}

impl fmt::Display for TableSampleMethodName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TableSampleMethodName::Bernoulli => write!(f, "BERNOULLI"),
TableSampleMethodName::Row => write!(f, "ROW"),
TableSampleMethodName::System => write!(f, "SYSTEM"),
TableSampleMethodName::Block => write!(f, "BLOCK"),
}
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct TableSampleBernoulli {
pub probability: Option<Expr>,
pub value: Option<Expr>,
pub name: TableSampleMethodName,
pub probability: Option<Value>,
pub value: Option<Value>,
pub unit: Option<TableSampleUnit>,
}

impl fmt::Display for TableSampleBernoulli {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, " {} (", self.name)?;
if let Some(probability) = &self.probability {
write!(f, "{})", probability)?;
} else if let Some(value) = &self.value {
write!(f, "{}", value)?;
if let Some(unit) = &self.unit {
write!(f, " {}", unit)?;
}
write!(f, ")")?;
}
Ok(())
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct TableSampleSystem {
pub probability: Expr,
pub repeatable: Option<Expr>,
pub name: TableSampleMethodName,
pub probability: Value,
pub seed: Option<TableSampleSeed>,
}

impl fmt::Display for TableSampleSystem {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, " {} ({})", self.name, self.probability)?;
if let Some(seed) = &self.seed {
write!(f, " {} ({})", seed.modifier, seed.value)?;
}
Ok(())
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct TableSampleSeed {
pub modifier: TableSampleSeedModifier,
pub value: Value,
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum TableSampleSeedModifier {
Repeatable,
Seed,
}

impl fmt::Display for TableSampleSeedModifier {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TableSampleSeedModifier::Repeatable => write!(f, "REPEATABLE"),
TableSampleSeedModifier::Seed => write!(f, "SEED"),
}
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
Expand Down Expand Up @@ -1248,22 +1323,10 @@ impl fmt::Display for TableSampleMethod {
write!(f, " TABLESAMPLE")?;
match self {
TableSampleMethod::Bernoulli(sample) => {
write!(f, " BERNOULLI (")?;
if let Some(probability) = &sample.probability {
write!(f, "{})", probability)?;
} else if let Some(value) = &sample.value {
write!(f, "{}", value)?;
if let Some(unit) = &sample.unit {
write!(f, " {}", unit)?;
}
write!(f, ")")?;
}
write!(f, "{}", sample)?;
}
TableSampleMethod::System(sample) => {
write!(f, " SYSTEM ({})", sample.probability)?;
if let Some(repeatable) = &sample.repeatable {
write!(f, " REPEATABLE ({})", repeatable)?;
}
write!(f, "{}", sample)?;
}
TableSampleMethod::Bucket(sample) => {
write!(f, " ({})", sample)?;
Expand Down
133 changes: 72 additions & 61 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10660,48 +10660,14 @@ impl<'a> Parser<'a> {
}

// Try to parse based on an explicit table sample method keyword
let sample = if self
.parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW])
.is_some()
{
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;

let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) {
(None, Some(expr), Some(TableSampleUnit::Rows))
} else if self.parse_keyword(Keyword::PERCENT) {
(None, Some(expr), Some(TableSampleUnit::Percent))
} else {
(Some(expr), None, None)
};
self.expect_token(&Token::RParen)?;
TableSampleMethod::Bernoulli(TableSampleBernoulli {
probability,
value,
unit,
})
} else if self
.parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK])
.is_some()
{
self.expect_token(&Token::LParen)?;
let probability = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
let seed = if self
.parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED])
.is_some()
{
self.expect_token(&Token::LParen)?;
let seed = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
Some(seed)
} else {
None
};
TableSampleMethod::System(TableSampleSystem {
probability,
repeatable: seed,
})
let sample = if self.parse_keyword(Keyword::BERNOULLI) {
self.parse_table_sample_bernoulli(TableSampleMethodName::Bernoulli)?
} else if self.parse_keyword(Keyword::ROW) {
self.parse_table_sample_bernoulli(TableSampleMethodName::Row)?
} else if self.parse_keyword(Keyword::SYSTEM) {
self.parse_table_sample_system(TableSampleMethodName::System)?
} else if self.parse_keyword(Keyword::BLOCK) {
self.parse_table_sample_system(TableSampleMethodName::Block)?
// Try to parse without an explicit table sample method keyword
} else if self.consume_token(&Token::LParen) {
if self.parse_keyword(Keyword::BUCKET) {
Expand Down Expand Up @@ -10729,36 +10695,81 @@ impl<'a> Parser<'a> {
}
}
};
if !self.dialect.supports_implicit_table_sample_method()
&& self.consume_token(&Token::RParen)
{
TableSampleMethod::Bernoulli(TableSampleBernoulli {
probability: Some(Expr::Value(value)),
unit: None,
value: None,
})
let unit = if self.parse_keyword(Keyword::ROWS) {
Some(TableSampleUnit::Rows)
} else if self.parse_keyword(Keyword::PERCENT) {
Some(TableSampleUnit::Percent)
} else {
let unit = if self.parse_keyword(Keyword::ROWS) {
Some(TableSampleUnit::Rows)
} else if self.parse_keyword(Keyword::PERCENT) {
Some(TableSampleUnit::Percent)
} else {
None
};
self.expect_token(&Token::RParen)?;
TableSampleMethod::Implicit(TableSampleImplicit { value, unit })
}
None
};
self.expect_token(&Token::RParen)?;
TableSampleMethod::Implicit(TableSampleImplicit { value, unit })
}
} else {
return parser_err!(
"Expecting BERNOULLI, ROW, SYSTEM or BLOCK",
"Expecting BERNOULLI, ROW, SYSTEM, BLOCK or a valid TABLESAMPLE expression in parenthesis",
self.peek_token().span.start
);
};

Ok(Some(Box::new(sample)))
}

fn parse_table_sample_bernoulli(
&mut self,
name: TableSampleMethodName,
) -> Result<TableSampleMethod, ParserError> {
self.expect_token(&Token::LParen)?;
let value = self.parse_number_value()?;
let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) {
(None, Some(value), Some(TableSampleUnit::Rows))
} else if self.parse_keyword(Keyword::PERCENT) {
(None, Some(value), Some(TableSampleUnit::Percent))
} else {
(Some(value), None, None)
};
self.expect_token(&Token::RParen)?;
Ok(TableSampleMethod::Bernoulli(TableSampleBernoulli {
name,
probability,
value,
unit,
}))
}

fn parse_table_sample_system(
&mut self,
name: TableSampleMethodName,
) -> Result<TableSampleMethod, ParserError> {
self.expect_token(&Token::LParen)?;
let probability = self.parse_number_value()?;
self.expect_token(&Token::RParen)?;

let seed = if self.parse_keyword(Keyword::REPEATABLE) {
Some(self.parse_table_sample_seed(TableSampleSeedModifier::Repeatable)?)
} else if self.parse_keyword(Keyword::SEED) {
Some(self.parse_table_sample_seed(TableSampleSeedModifier::Seed)?)
} else {
None
};

Ok(TableSampleMethod::System(TableSampleSystem {
name,
probability,
seed,
}))
}

fn parse_table_sample_seed(
&mut self,
modifier: TableSampleSeedModifier,
) -> Result<TableSampleSeed, ParserError> {
self.expect_token(&Token::LParen)?;
let value = self.parse_number_value()?;
self.expect_token(&Token::RParen)?;
Ok(TableSampleSeed { modifier, value })
}

/// Parses `OPENJSON( jsonExpression [ , path ] ) [ <with_clause> ]` clause,
/// assuming the `OPENJSON` keyword was already consumed.
fn parse_open_json_table_factor(&mut self) -> Result<TableFactor, ParserError> {
Expand Down
32 changes: 9 additions & 23 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2961,30 +2961,16 @@ fn parse_insert_overwrite() {

#[test]
fn test_table_sample() {
snowflake_and_generic()
.verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)");

// In Snowflake we translate implicit table sample method to bernoulli
snowflake().one_statement_parses_to(
"SELECT * FROM testtable SAMPLE (10)",
"SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)",
);

snowflake_and_generic().one_statement_parses_to(
"SELECT * FROM testtable TABLESAMPLE ROW (20.3)",
"SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)",
);

snowflake_and_generic().one_statement_parses_to(
"SELECT * FROM testtable SAMPLE BLOCK (3) SEED (82)",
"SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)",
);

snowflake_and_generic().one_statement_parses_to(
"SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)",
"SELECT * FROM testtable TABLESAMPLE SYSTEM (0.012) REPEATABLE (99992)",
"SELECT * FROM testtable SAMPLE (10)",
"SELECT * FROM testtable TABLESAMPLE (10)",
);

snowflake_and_generic()
.verified_stmt("SELECT * FROM testtable TABLESAMPLE BERNOULLI (10 ROWS)");
.verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)");
snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10)");
snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10 ROWS)");
snowflake_and_generic()
.verified_stmt("SELECT * FROM testtable TABLESAMPLE BLOCK (3) SEED (82)");
snowflake_and_generic()
.verified_stmt("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)");
}

0 comments on commit 464fca3

Please sign in to comment.