From df7b1ba96f6127ed32218ce44b8e43f628db4e45 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sat, 7 Dec 2024 06:58:48 +0100 Subject: [PATCH 01/10] Add support for table sample, initial commit --- src/ast/mod.rs | 6 +- src/ast/query.rs | 129 ++++++++++++ src/ast/spans.rs | 2 + src/dialect/hive.rs | 5 + src/dialect/mod.rs | 7 + src/keywords.rs | 8 + src/parser/mod.rs | 128 ++++++++++++ src/test_utils.rs | 19 ++ tests/sqlparser_bigquery.rs | 28 +-- tests/sqlparser_clickhouse.rs | 15 +- tests/sqlparser_common.rs | 355 ++++++++-------------------------- tests/sqlparser_databricks.rs | 11 +- tests/sqlparser_duckdb.rs | 38 +--- tests/sqlparser_hive.rs | 11 ++ tests/sqlparser_mssql.rs | 35 ++-- tests/sqlparser_mysql.rs | 49 ++--- tests/sqlparser_postgres.rs | 4 +- tests/sqlparser_redshift.rs | 70 +++---- tests/sqlparser_snowflake.rs | 33 +++- tests/sqlparser_sqlite.rs | 11 +- 20 files changed, 506 insertions(+), 458 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cfd0ac089..32379270e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -69,8 +69,10 @@ pub use self::query::{ OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, - Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample, + TableSampleBernoulli, TableSampleBucket, TableSampleImplicit, TableSampleSystem, + TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, + WildcardAdditionalOptions, With, WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index ad7fd261e..00c4f6f5f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1002,6 +1002,12 @@ pub enum TableFactor { partitions: Vec, /// Optional PartiQL JsonPath: json_path: Option, + /// Optional table sample modifier + /// See: + sample: Option, + /// Position of the table sample modifier in the table factor. Default is after the table alias + /// e.g. `SELECT * FROM tbl t TABLESAMPLE (10 ROWS)`. See `Dialect::supports_table_sample_before_alias`. + sample_before_alias: bool, }, Derived { lateral: bool, @@ -1146,6 +1152,121 @@ pub enum TableFactor { }, } +/// The table sample modifier options +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSample { + Bernoulli(TableSampleBernoulli), + System(TableSampleSystem), + Bucket(TableSampleBucket), + Implicit(TableSampleImplicit), +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleBernoulli { + pub probability: Option, + pub value: Option, + pub unit: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleSystem { + pub probability: Expr, + pub seed: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleUnit { + Rows, + Percent, +} + +impl fmt::Display for TableSampleUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleUnit::Percent => write!(f, "PERCENT"), + TableSampleUnit::Rows => write!(f, "ROWS"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleBucket { + pub bucket: Value, + pub total: Value, + pub on: Option, +} + +impl fmt::Display for TableSampleBucket { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BUCKET {} OUT OF {}", self.bucket, self.total)?; + if let Some(on) = &self.on { + write!(f, " ON {}", on)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleImplicit { + pub value: Value, + pub unit: Option, +} + +impl fmt::Display for TableSampleImplicit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.value)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + Ok(()) + } +} + +impl fmt::Display for TableSample { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " TABLESAMPLE")?; + match self { + TableSample::Bernoulli(sample) => { + write!(f, " BERNOULLI (")?; + if let Some(probability) = &sample.probability { + write!(f, "{})", probability)?; + } else if let Some(value) = &sample.value { + write!(f, "{}", value)?; + if let Some(unit) = &sample.unit { + write!(f, " {}", unit)?; + } + write!(f, ")")?; + } + } + TableSample::System(sample) => { + write!(f, " SYSTEM ({})", sample.probability)?; + if let Some(seed) = &sample.seed { + write!(f, " SEED ({})", seed)?; + } + } + TableSample::Bucket(sample) => { + write!(f, " ({})", sample)?; + } + TableSample::Implicit(sample) => { + write!(f, " ({})", sample)?; + } + } + Ok(()) + } +} + /// The source of values in a `PIVOT` operation. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1404,6 +1525,8 @@ impl fmt::Display for TableFactor { partitions, with_ordinality, json_path, + sample, + sample_before_alias, } => { write!(f, "{name}")?; if let Some(json_path) = json_path { @@ -1426,6 +1549,9 @@ impl fmt::Display for TableFactor { if *with_ordinality { write!(f, " WITH ORDINALITY")?; } + if let (Some(sample), true) = (sample, sample_before_alias) { + write!(f, "{sample}")?; + } if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1435,6 +1561,9 @@ impl fmt::Display for TableFactor { if let Some(version) = version { write!(f, "{version}")?; } + if let (Some(sample), false) = (sample, sample_before_alias) { + write!(f, "{sample}")?; + } Ok(()) } TableFactor::Derived { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 88e0fbdf2..e369bdc90 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1699,6 +1699,8 @@ impl Spanned for TableFactor { with_ordinality: _, partitions: _, json_path: _, + sample: _, + sample_before_alias: _, } => union_spans( name.0 .iter() diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 571f9b9ba..80f44cf7c 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -61,4 +61,9 @@ impl Dialect for HiveDialect { fn supports_load_data(&self) -> bool { true } + + /// See Hive + fn supports_table_sample_before_alias(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index f40cba719..0c1a83f4f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -707,6 +707,13 @@ pub trait Dialect: Debug + Any { fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } + + /// Returns true if the dialect supports the `TABLESAMPLE` option + /// before the table alias option. + /// + fn supports_table_sample_before_alias(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/keywords.rs b/src/keywords.rs index d0cfcd05b..7e3354078 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -120,6 +120,7 @@ define_keywords!( BEGIN, BEGIN_FRAME, BEGIN_PARTITION, + BERNOULLI, BETWEEN, BIGDECIMAL, BIGINT, @@ -128,12 +129,14 @@ define_keywords!( BINDING, BIT, BLOB, + BLOCK, BLOOMFILTER, BOOL, BOOLEAN, BOTH, BROWSE, BTREE, + BUCKET, BUCKETS, BY, BYPASSRLS, @@ -680,6 +683,7 @@ define_keywords!( RUN, SAFE, SAFE_CAST, + SAMPLE, SAVEPOINT, SCHEMA, SCHEMAS, @@ -690,6 +694,7 @@ define_keywords!( SECONDARY, SECRET, SECURITY, + SEED, SELECT, SEMI, SENSITIVE, @@ -932,6 +937,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::CONNECT, // Reserved for snowflake MATCH_RECOGNIZE Keyword::MATCH_RECOGNIZE, + // Reserved for Snowflake table sample + Keyword::SAMPLE, + Keyword::TABLESAMPLE, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 39ab2db24..e0cd33810 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10594,6 +10594,15 @@ impl<'a> Parser<'a> { let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let mut sample = None; + let mut sample_before_alias = false; + if self.dialect.supports_table_sample_before_alias() { + sample = self.parse_optional_table_sample()?; + if sample.is_some() { + sample_before_alias = true; + } + } + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -10608,6 +10617,11 @@ impl<'a> Parser<'a> { } }; + if !self.dialect.supports_table_sample_before_alias() { + sample = self.parse_optional_table_sample()?; + sample_before_alias = false; + } + let mut table = TableFactor::Table { name, alias, @@ -10617,6 +10631,8 @@ impl<'a> Parser<'a> { partitions, with_ordinality, json_path, + sample, + sample_before_alias, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { @@ -10637,6 +10653,118 @@ impl<'a> Parser<'a> { } } + fn parse_optional_table_sample(&mut self) -> Result, ParserError> { + if self + .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE]) + .is_none() + { + return Ok(None); + } + + if self + .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW]) + .is_some() + { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + + let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) { + (None, Some(expr), Some(TableSampleUnit::Rows)) + } else if self.parse_keyword(Keyword::PERCENT) { + (None, Some(expr), Some(TableSampleUnit::Percent)) + } else { + (Some(expr), None, None) + }; + self.expect_token(&Token::RParen)?; + Ok(Some(TableSample::Bernoulli(TableSampleBernoulli { + probability, + value, + unit, + }))) + } else if self + .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK]) + .is_some() + { + self.expect_token(&Token::LParen)?; + let probability = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + let seed = if self + .parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED]) + .is_some() + { + self.expect_token(&Token::LParen)?; + let seed = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(seed) + } else { + None + }; + Ok(Some(TableSample::System(TableSampleSystem { + probability, + seed, + }))) + } else if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + if self.parse_keyword(Keyword::BUCKET) { + let bucket = self.parse_number_value()?; + self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; + let total = self.parse_number_value()?; + let on = if self.parse_keyword(Keyword::ON) { + Some(self.parse_expr()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Some(TableSample::Bucket(TableSampleBucket { + bucket, + total, + on, + }))) + } else { + let value = match self.try_parse(|p| p.parse_number_value()) { + Ok(num) => num, + _ => { + if let Token::Word(w) = self.next_token().token { + Value::Placeholder(w.value) + } else { + return parser_err!( + "Expecting number or byte length e.g. 100M", + self.peek_token().span.start + ); + } + } + }; + if self.peek_token().token == Token::RParen && dialect_of!(self is SnowflakeDialect) + { + self.expect_token(&Token::RParen)?; + Ok(Some(TableSample::Bernoulli(TableSampleBernoulli { + probability: Some(Expr::Value(value)), + unit: None, + value: None, + }))) + } else { + let unit = if self.parse_keyword(Keyword::ROWS) { + Some(TableSampleUnit::Rows) + } else if self.parse_keyword(Keyword::PERCENT) { + Some(TableSampleUnit::Percent) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Some(TableSample::Implicit(TableSampleImplicit { + value, + unit, + }))) + } + } + } else { + return parser_err!( + "Expecting BERNOULLI, ROW, SYSTEM or BLOCK", + self.peek_token().span.start + ); + } + } + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, /// assuming the `OPENJSON` keyword was already consumed. fn parse_open_json_table_factor(&mut self) -> Result { diff --git a/src/test_utils.rs b/src/test_utils.rs index 6e60a31c1..97a16b873 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -346,6 +346,23 @@ pub fn table(name: impl Into) -> TableFactor { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, + } +} + +pub fn table_from_name(name: ObjectName) -> TableFactor { + TableFactor::Table { + name, + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + sample_before_alias: false, } } @@ -362,6 +379,8 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2be128a8c..11373d2e1 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -222,16 +222,7 @@ fn parse_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::with_quote('"', "table")])), from[0].relation ); } @@ -1379,16 +1370,7 @@ fn parse_table_identifiers() { assert_eq!( select.from, vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(expected), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(expected)), joins: vec![] },] ); @@ -1562,6 +1544,8 @@ fn parse_table_time_travel() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![] },] @@ -1661,6 +1645,8 @@ fn parse_merge() { partitions: Default::default(), with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, table ); @@ -1677,6 +1663,8 @@ fn parse_merge() { partitions: Default::default(), with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, source ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9d785576f..98f4c0f27 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -63,16 +63,7 @@ fn parse_map_access_expr() { })], into: None, from: vec![TableWithJoins { - relation: Table { - name: ObjectName(vec![Ident::new("foos")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("foos")])), joins: vec![], }], lateral_views: vec![], @@ -175,9 +166,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7dfb98d6f..9a7caff34 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -41,7 +41,7 @@ use sqlparser::tokenizer::Span; use sqlparser::tokenizer::Tokenizer; use test_utils::{ all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection, - join, number, only, table, table_alias, TestedDialects, + join, number, only, table, table_alias, table_from_name, TestedDialects, }; #[macro_use] @@ -359,16 +359,7 @@ fn parse_update_set_from() { stmt, Statement::Update { table: TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t1")])), joins: vec![], }, assignments: vec![Assignment { @@ -391,16 +382,7 @@ fn parse_update_set_from() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t1")])), joins: vec![], }], lateral_views: vec![], @@ -480,6 +462,8 @@ fn parse_update_with_table_alias() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![], }, @@ -572,6 +556,8 @@ fn parse_select_with_table_alias() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![], }] @@ -601,16 +587,7 @@ fn parse_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::with_quote('"', "table")])), from[0].relation ); } @@ -649,29 +626,17 @@ fn parse_delete_statement_for_multi_tables() { tables[1] ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), from[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), from[0].joins[0].relation ); } @@ -689,55 +654,31 @@ fn parse_delete_statement_for_multi_tables_with_using() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), from[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), from[1].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), using[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), using[0].joins[0].relation ); } @@ -760,16 +701,7 @@ fn parse_where_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("foo")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::new("foo")])), from[0].relation, ); @@ -815,6 +747,8 @@ fn parse_where_delete_with_alias_statement() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, from[0].relation, ); @@ -832,6 +766,8 @@ fn parse_where_delete_with_alias_statement() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![], }]), @@ -4920,20 +4856,11 @@ fn test_parse_named_window() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "aggregate_test_100".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "aggregate_test_100".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -5511,20 +5438,11 @@ fn parse_interval_and_or_xor() { }))], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -6132,29 +6050,11 @@ fn parse_implicit_join() { assert_eq!( vec![ TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t1".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1".into()])), joins: vec![], }, TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t2".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2".into()])), joins: vec![], }, ], @@ -6166,53 +6066,17 @@ fn parse_implicit_join() { assert_eq!( vec![ TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t1a".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1a".into()])), joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t1b".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1b".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t2a".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2a".into()])), joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t2b".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2b".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6228,16 +6092,7 @@ fn parse_cross_join() { let select = verified_only_select(sql); assert_eq!( Join { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t2")])), global: false, join_operator: JoinOperator::CrossJoin, }, @@ -6263,6 +6118,8 @@ fn parse_joins_on() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, global, join_operator: f(JoinConstraint::On(Expr::BinaryOp { @@ -6391,6 +6248,8 @@ fn parse_joins_using() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, global: false, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), @@ -6465,6 +6324,8 @@ fn parse_natural_join() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, global: false, join_operator: f(JoinConstraint::Natural), @@ -6728,16 +6589,7 @@ fn parse_derived_tables() { }), }, joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t2".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -7668,20 +7520,11 @@ fn lateral_function() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "customer".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "customer".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![Join { relation: TableFactor::Function { lateral: true, @@ -8499,6 +8342,8 @@ fn parse_merge() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, } ); assert_eq!(table, table_no_into); @@ -8519,16 +8364,10 @@ fn parse_merge() { )], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident::new("s"), + Ident::new("foo") + ])), joins: vec![], }], lateral_views: vec![], @@ -9611,6 +9450,8 @@ fn parse_pivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }), aggregate_functions: vec![ expected_function("a", None), @@ -9686,6 +9527,8 @@ fn parse_unpivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }), value: Ident { value: "quantity".to_string(), @@ -9756,6 +9599,8 @@ fn parse_pivot_unpivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }), value: Ident { value: "population".to_string(), @@ -10165,16 +10010,7 @@ fn parse_unload() { projection: vec![UnnamedExpr(Expr::Identifier(Ident::new("cola"))),], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("tab")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("tab")])), joins: vec![], }], lateral_views: vec![], @@ -10348,16 +10184,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("employees")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("employees")])), joins: vec![], }], into: None, @@ -10437,16 +10264,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("employees")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("employees")])), joins: vec![], }], into: None, @@ -10601,16 +10419,7 @@ fn test_match_recognize() { use MatchRecognizeSymbol::*; use RepetitionQuantifier::*; - let table = TableFactor::Table { - name: ObjectName(vec![Ident::new("my_table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }; + let table = table_from_name(ObjectName(vec![Ident::new("my_table")])); fn check(options: &str, expect: TableFactor) { let select = all_dialects_where(|d| d.supports_match_recognize()).verified_only_select( diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index d73c088a7..b9ca55d13 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -185,16 +185,7 @@ fn test_values_clause() { "SELECT * FROM values", )); assert_eq!( - Some(&TableFactor::Table { - name: ObjectName(vec![Ident::new("values")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }), + Some(&table_from_name(ObjectName(vec![Ident::new("values")]))), query .body .as_select() diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a0fc49b9f..d441cd195 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -268,20 +268,11 @@ fn test_select_union_by_name() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -306,20 +297,11 @@ fn test_select_union_by_name() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 981218388..5cff7a10a 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -459,6 +459,8 @@ fn parse_delimited_identifiers() { with_ordinality: _, partitions: _, json_path: _, + sample: _, + sample_before_alias: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -537,6 +539,15 @@ fn parse_use() { ); } +#[test] +fn test_tample_sample() { + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (BUCKET 3 OUT OF 32 ON rand()) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (BUCKET 3 OUT OF 16 ON id)"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (100M) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (0.1 PERCENT) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (10 ROWS)"); +} + fn hive() -> TestedDialects { TestedDialects::new(vec![Box::new(HiveDialect {})]) } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 66e40f46b..673df1c64 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -73,6 +73,8 @@ fn parse_table_time_travel() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![] },] @@ -221,6 +223,8 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -279,6 +283,8 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -338,6 +344,8 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -396,6 +404,8 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -434,6 +444,8 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -611,9 +623,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -1082,20 +1092,11 @@ fn parse_substring_in_select() { })], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![] }], lateral_views: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index cac1af852..45362e07a 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1884,16 +1884,9 @@ fn parse_select_with_numeric_prefix_column_name() { )))], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::with_quote( + '"', "table" + )])), joins: vec![] }], lateral_views: vec![], @@ -1943,16 +1936,9 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::with_quote( + '"', "table" + )])), joins: vec![] }], lateral_views: vec![], @@ -2020,6 +2006,8 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -2034,6 +2022,8 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + sample_before_alias: false, }, global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { @@ -2464,20 +2454,11 @@ fn parse_substring_in_select() { })], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![] }], lateral_views: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2e204d9bc..aaf4e65db 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3581,9 +3581,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 2fd855a09..9492946d3 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -39,27 +39,18 @@ fn test_square_brackets_over_db_schema_table_name() { assert_eq!( select.from[0], TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![ - Ident { - value: "test_schema".to_string(), - quote_style: Some('['), - span: Span::empty(), - }, - Ident { - value: "test_table".to_string(), - quote_style: Some('['), - span: Span::empty(), - } - ]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('['), + span: Span::empty(), + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('['), + span: Span::empty(), + } + ])), joins: vec![], } ); @@ -90,27 +81,18 @@ fn test_double_quotes_over_db_schema_table_name() { assert_eq!( select.from[0], TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![ - Ident { - value: "test_schema".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }, - Ident { - value: "test_table".to_string(), - quote_style: Some('"'), - span: Span::empty(), - } - ]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('"'), + span: Span::empty(), + } + ])), joins: vec![], } ); @@ -130,9 +112,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index d6774c317..9e41e6099 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1188,9 +1188,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -2960,3 +2958,32 @@ fn parse_insert_overwrite() { let insert_overwrite_into = r#"INSERT OVERWRITE INTO schema.table SELECT a FROM b"#; snowflake().verified_stmt(insert_overwrite_into); } + +#[test] +fn test_table_sample() { + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); + + snowflake().one_statement_parses_to( + "SELECT * FROM testtable SAMPLE (10)", + "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)", + ); + + snowflake_and_generic().one_statement_parses_to( + "SELECT * FROM testtable TABLESAMPLE ROW (20.3)", + "SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)", + ); + + snowflake_and_generic().one_statement_parses_to( + "SELECT * FROM testtable SAMPLE BLOCK (3) SEED (82)", + "SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)", + ); + + snowflake_and_generic().one_statement_parses_to( + "SELECT * FROM testtable SAMPLE BLOCK (0.012) REPEATABLE (99992)", + "SELECT * FROM testtable TABLESAMPLE SYSTEM (0.012) SEED (99992)", + ); + + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE BERNOULLI (10 ROWS)"); +} diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 987b1263d..ff0b54ef7 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -479,16 +479,7 @@ fn parse_update_tuple_row_values() { }], selection: None, table: TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("x")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("x")])), joins: vec![], }, from: None, From 3f6822f88ffce513b7f72890a2e16f65008c81a2 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sat, 7 Dec 2024 07:37:36 +0100 Subject: [PATCH 02/10] Model implicit table sample logic in dialect --- src/dialect/hive.rs | 5 +++++ src/dialect/mod.rs | 8 +++++++- src/parser/mod.rs | 11 +++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 80f44cf7c..d967ec991 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -66,4 +66,9 @@ impl Dialect for HiveDialect { fn supports_table_sample_before_alias(&self) -> bool { true } + + /// See Hive + fn supports_implicit_table_sample(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 0c1a83f4f..37dcf7005 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -708,12 +708,18 @@ pub trait Dialect: Debug + Any { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } - /// Returns true if the dialect supports the `TABLESAMPLE` option + /// Returns true if this dialect supports the `TABLESAMPLE` option /// before the table alias option. /// fn supports_table_sample_before_alias(&self) -> bool { false } + + /// Returns true if this dialect support not specifying a table sample method. + /// + fn supports_implicit_table_sample(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e0cd33810..d9856cb17 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10597,8 +10597,10 @@ impl<'a> Parser<'a> { let mut sample = None; let mut sample_before_alias = false; if self.dialect.supports_table_sample_before_alias() { - sample = self.parse_optional_table_sample()?; + sample = self.maybe_parse_table_sample()?; if sample.is_some() { + // No need to modify the default is no sample option + // exists on the statement sample_before_alias = true; } } @@ -10618,7 +10620,7 @@ impl<'a> Parser<'a> { }; if !self.dialect.supports_table_sample_before_alias() { - sample = self.parse_optional_table_sample()?; + sample = self.maybe_parse_table_sample()?; sample_before_alias = false; } @@ -10653,7 +10655,7 @@ impl<'a> Parser<'a> { } } - fn parse_optional_table_sample(&mut self) -> Result, ParserError> { + fn maybe_parse_table_sample(&mut self) -> Result, ParserError> { if self .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE]) .is_none() @@ -10734,7 +10736,8 @@ impl<'a> Parser<'a> { } } }; - if self.peek_token().token == Token::RParen && dialect_of!(self is SnowflakeDialect) + if self.peek_token().token == Token::RParen + && !self.dialect.supports_implicit_table_sample() { self.expect_token(&Token::RParen)?; Ok(Some(TableSample::Bernoulli(TableSampleBernoulli { From 3a70360bf56de63300f7188c46f836d6e38d9188 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sat, 7 Dec 2024 07:52:54 +0100 Subject: [PATCH 03/10] Add unit tests for common --- src/ast/query.rs | 6 +++--- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 16 ++++++++++++++++ tests/sqlparser_snowflake.rs | 7 ++++--- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 00c4f6f5f..2db554b35 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1177,7 +1177,7 @@ pub struct TableSampleBernoulli { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableSampleSystem { pub probability: Expr, - pub seed: Option, + pub repeatable: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -1252,8 +1252,8 @@ impl fmt::Display for TableSample { } TableSample::System(sample) => { write!(f, " SYSTEM ({})", sample.probability)?; - if let Some(seed) = &sample.seed { - write!(f, " SEED ({})", seed)?; + if let Some(repeatable) = &sample.repeatable { + write!(f, " REPEATABLE ({})", repeatable)?; } } TableSample::Bucket(sample) => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d9856cb17..8cb452f7d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10703,7 +10703,7 @@ impl<'a> Parser<'a> { }; Ok(Some(TableSample::System(TableSampleSystem { probability, - seed, + repeatable: seed, }))) } else if self.peek_token().token == Token::LParen { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9a7caff34..c574d72c5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12394,3 +12394,19 @@ fn parse_create_table_with_enum_types() { ParserError::ParserError("Expected: literal string, found: 2".to_string()) ); } + +#[test] +fn test_table_sample() { + let dialects = all_dialects_where(|d| !d.supports_implicit_table_sample()); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); + + // The only dialect that supports implicit tablesample is Hive and it requires aliase after the table sample + let dialects = all_dialects_where(|d| { + d.supports_implicit_table_sample() && d.supports_table_sample_before_alias() + }); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50) AS t"); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 ROWS) AS t"); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 PERCENT) AS t"); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9e41e6099..7c0ba9a90 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2964,6 +2964,7 @@ fn test_table_sample() { snowflake_and_generic() .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); + // In Snowflake we translate implicit table sample method to bernoulli snowflake().one_statement_parses_to( "SELECT * FROM testtable SAMPLE (10)", "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)", @@ -2976,12 +2977,12 @@ fn test_table_sample() { snowflake_and_generic().one_statement_parses_to( "SELECT * FROM testtable SAMPLE BLOCK (3) SEED (82)", - "SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)", + "SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)", ); snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE BLOCK (0.012) REPEATABLE (99992)", - "SELECT * FROM testtable TABLESAMPLE SYSTEM (0.012) SEED (99992)", + "SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)", + "SELECT * FROM testtable TABLESAMPLE SYSTEM (0.012) REPEATABLE (99992)", ); snowflake_and_generic() From aa907f1d9e0dc96273ed565496fbe4f17fbae397 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sat, 7 Dec 2024 08:34:59 +0100 Subject: [PATCH 04/10] Box TableSample to avoid increasing the size of Table --- src/ast/query.rs | 2 +- src/parser/mod.rs | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 2db554b35..8d35202bb 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1004,7 +1004,7 @@ pub enum TableFactor { json_path: Option, /// Optional table sample modifier /// See: - sample: Option, + sample: Option>, /// Position of the table sample modifier in the table factor. Default is after the table alias /// e.g. `SELECT * FROM tbl t TABLESAMPLE (10 ROWS)`. See `Dialect::supports_table_sample_before_alias`. sample_before_alias: bool, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8cb452f7d..68ab6e123 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10655,7 +10655,7 @@ impl<'a> Parser<'a> { } } - fn maybe_parse_table_sample(&mut self) -> Result, ParserError> { + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { if self .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE]) .is_none() @@ -10663,7 +10663,7 @@ impl<'a> Parser<'a> { return Ok(None); } - if self + let sample = if self .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW]) .is_some() { @@ -10678,11 +10678,11 @@ impl<'a> Parser<'a> { (Some(expr), None, None) }; self.expect_token(&Token::RParen)?; - Ok(Some(TableSample::Bernoulli(TableSampleBernoulli { + TableSample::Bernoulli(TableSampleBernoulli { probability, value, unit, - }))) + }) } else if self .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK]) .is_some() @@ -10701,10 +10701,10 @@ impl<'a> Parser<'a> { } else { None }; - Ok(Some(TableSample::System(TableSampleSystem { + TableSample::System(TableSampleSystem { probability, repeatable: seed, - }))) + }) } else if self.peek_token().token == Token::LParen { self.expect_token(&Token::LParen)?; if self.parse_keyword(Keyword::BUCKET) { @@ -10717,11 +10717,11 @@ impl<'a> Parser<'a> { None }; self.expect_token(&Token::RParen)?; - Ok(Some(TableSample::Bucket(TableSampleBucket { + TableSample::Bucket(TableSampleBucket { bucket, total, on, - }))) + }) } else { let value = match self.try_parse(|p| p.parse_number_value()) { Ok(num) => num, @@ -10740,11 +10740,11 @@ impl<'a> Parser<'a> { && !self.dialect.supports_implicit_table_sample() { self.expect_token(&Token::RParen)?; - Ok(Some(TableSample::Bernoulli(TableSampleBernoulli { + TableSample::Bernoulli(TableSampleBernoulli { probability: Some(Expr::Value(value)), unit: None, value: None, - }))) + }) } else { let unit = if self.parse_keyword(Keyword::ROWS) { Some(TableSampleUnit::Rows) @@ -10754,10 +10754,10 @@ impl<'a> Parser<'a> { None }; self.expect_token(&Token::RParen)?; - Ok(Some(TableSample::Implicit(TableSampleImplicit { + TableSample::Implicit(TableSampleImplicit { value, unit, - }))) + }) } } } else { @@ -10765,7 +10765,9 @@ impl<'a> Parser<'a> { "Expecting BERNOULLI, ROW, SYSTEM or BLOCK", self.peek_token().span.start ); - } + }; + + Ok(Some(Box::new(sample))) } /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, From 92091a9a03fdfb925c12d6ecc6310e39b81a84e9 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sat, 7 Dec 2024 08:36:05 +0100 Subject: [PATCH 05/10] Box TableSample to avoid increasing the size of Table --- src/parser/mod.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 68ab6e123..ed6f685aa 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10717,11 +10717,7 @@ impl<'a> Parser<'a> { None }; self.expect_token(&Token::RParen)?; - TableSample::Bucket(TableSampleBucket { - bucket, - total, - on, - }) + TableSample::Bucket(TableSampleBucket { bucket, total, on }) } else { let value = match self.try_parse(|p| p.parse_number_value()) { Ok(num) => num, @@ -10754,10 +10750,7 @@ impl<'a> Parser<'a> { None }; self.expect_token(&Token::RParen)?; - TableSample::Implicit(TableSampleImplicit { - value, - unit, - }) + TableSample::Implicit(TableSampleImplicit { value, unit }) } } } else { From 8113dd6eb92f262c17fa7c9f656d6f37bb638948 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sun, 8 Dec 2024 17:49:18 +0100 Subject: [PATCH 06/10] Code review comments --- src/ast/mod.rs | 4 ++-- src/ast/query.rs | 34 +++++++++++++++++++++------------- src/ast/spans.rs | 1 - src/dialect/hive.rs | 2 +- src/dialect/mod.rs | 14 +++++++++++--- src/parser/mod.rs | 34 ++++++++++++++++------------------ src/test_utils.rs | 3 --- tests/sqlparser_bigquery.rs | 3 --- tests/sqlparser_common.rs | 15 ++------------- tests/sqlparser_hive.rs | 1 - tests/sqlparser_mssql.rs | 6 ------ tests/sqlparser_mysql.rs | 2 -- 12 files changed, 53 insertions(+), 66 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 32379270e..c4603d462 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -69,8 +69,8 @@ pub use self::query::{ OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample, - TableSampleBernoulli, TableSampleBucket, TableSampleImplicit, TableSampleSystem, + TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSampleBernoulli, + TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, TableSampleSystem, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; diff --git a/src/ast/query.rs b/src/ast/query.rs index 8d35202bb..76c834e7c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1004,10 +1004,7 @@ pub enum TableFactor { json_path: Option, /// Optional table sample modifier /// See: - sample: Option>, - /// Position of the table sample modifier in the table factor. Default is after the table alias - /// e.g. `SELECT * FROM tbl t TABLESAMPLE (10 ROWS)`. See `Dialect::supports_table_sample_before_alias`. - sample_before_alias: bool, + sample: Option, }, Derived { lateral: bool, @@ -1156,7 +1153,19 @@ pub enum TableFactor { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum TableSample { + +pub enum TableSampleKind { + /// Table sample located before the table alias option + BeforeTableAlias(Box), + /// Table sample located after the table alias option + AfterTableAlias(Box), +} + +/// The table sample method options +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleMethod { Bernoulli(TableSampleBernoulli), System(TableSampleSystem), Bucket(TableSampleBucket), @@ -1234,11 +1243,11 @@ impl fmt::Display for TableSampleImplicit { } } -impl fmt::Display for TableSample { +impl fmt::Display for TableSampleMethod { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, " TABLESAMPLE")?; match self { - TableSample::Bernoulli(sample) => { + TableSampleMethod::Bernoulli(sample) => { write!(f, " BERNOULLI (")?; if let Some(probability) = &sample.probability { write!(f, "{})", probability)?; @@ -1250,16 +1259,16 @@ impl fmt::Display for TableSample { write!(f, ")")?; } } - TableSample::System(sample) => { + TableSampleMethod::System(sample) => { write!(f, " SYSTEM ({})", sample.probability)?; if let Some(repeatable) = &sample.repeatable { write!(f, " REPEATABLE ({})", repeatable)?; } } - TableSample::Bucket(sample) => { + TableSampleMethod::Bucket(sample) => { write!(f, " ({})", sample)?; } - TableSample::Implicit(sample) => { + TableSampleMethod::Implicit(sample) => { write!(f, " ({})", sample)?; } } @@ -1526,7 +1535,6 @@ impl fmt::Display for TableFactor { with_ordinality, json_path, sample, - sample_before_alias, } => { write!(f, "{name}")?; if let Some(json_path) = json_path { @@ -1549,7 +1557,7 @@ impl fmt::Display for TableFactor { if *with_ordinality { write!(f, " WITH ORDINALITY")?; } - if let (Some(sample), true) = (sample, sample_before_alias) { + if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample { write!(f, "{sample}")?; } if let Some(alias) = alias { @@ -1561,7 +1569,7 @@ impl fmt::Display for TableFactor { if let Some(version) = version { write!(f, "{version}")?; } - if let (Some(sample), false) = (sample, sample_before_alias) { + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { write!(f, "{sample}")?; } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index e369bdc90..c2c7c14f0 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1700,7 +1700,6 @@ impl Spanned for TableFactor { partitions: _, json_path: _, sample: _, - sample_before_alias: _, } => union_spans( name.0 .iter() diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index d967ec991..303843984 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -68,7 +68,7 @@ impl Dialect for HiveDialect { } /// See Hive - fn supports_implicit_table_sample(&self) -> bool { + fn supports_implicit_table_sample_method(&self) -> bool { true } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 37dcf7005..18417ca41 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -709,15 +709,23 @@ pub trait Dialect: Debug + Any { } /// Returns true if this dialect supports the `TABLESAMPLE` option - /// before the table alias option. + /// before the table alias option. For example: + /// + /// Table sample before alias: `SELECT * FROM tbl AS t TABLESAMPLE (10)` + /// Table sample after alias: `SELECT * FROM tbl TABLESAMPLE (10) AS t` + /// /// fn supports_table_sample_before_alias(&self) -> bool { false } - /// Returns true if this dialect support not specifying a table sample method. + /// Returns true if this dialect support not specifying a table sample method. For example: + /// + /// Implicit table sample method: `SELECT * FROM tbl TABLESAMPLE (10)` + /// Explicit table sample method: `SELECT * FROM tbl TABLESAMPLE BERNOULLI (10)` + /// /// - fn supports_implicit_table_sample(&self) -> bool { + fn supports_implicit_table_sample_method(&self) -> bool { false } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ed6f685aa..e65802830 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10595,13 +10595,9 @@ impl<'a> Parser<'a> { let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let mut sample = None; - let mut sample_before_alias = false; if self.dialect.supports_table_sample_before_alias() { - sample = self.maybe_parse_table_sample()?; - if sample.is_some() { - // No need to modify the default is no sample option - // exists on the statement - sample_before_alias = true; + if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + sample = Some(TableSampleKind::BeforeTableAlias(parsed_sample)); } } @@ -10620,8 +10616,9 @@ impl<'a> Parser<'a> { }; if !self.dialect.supports_table_sample_before_alias() { - sample = self.maybe_parse_table_sample()?; - sample_before_alias = false; + if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + sample = Some(TableSampleKind::AfterTableAlias(parsed_sample)); + } } let mut table = TableFactor::Table { @@ -10634,7 +10631,6 @@ impl<'a> Parser<'a> { with_ordinality, json_path, sample, - sample_before_alias, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { @@ -10655,7 +10651,7 @@ impl<'a> Parser<'a> { } } - fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { if self .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE]) .is_none() @@ -10663,6 +10659,7 @@ impl<'a> Parser<'a> { return Ok(None); } + // Try to parse based on an explicit table sample method keyword let sample = if self .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW]) .is_some() @@ -10678,7 +10675,7 @@ impl<'a> Parser<'a> { (Some(expr), None, None) }; self.expect_token(&Token::RParen)?; - TableSample::Bernoulli(TableSampleBernoulli { + TableSampleMethod::Bernoulli(TableSampleBernoulli { probability, value, unit, @@ -10701,10 +10698,11 @@ impl<'a> Parser<'a> { } else { None }; - TableSample::System(TableSampleSystem { + TableSampleMethod::System(TableSampleSystem { probability, repeatable: seed, }) + // Try to parse without an explicit table sample method keyword } else if self.peek_token().token == Token::LParen { self.expect_token(&Token::LParen)?; if self.parse_keyword(Keyword::BUCKET) { @@ -10717,10 +10715,10 @@ impl<'a> Parser<'a> { None }; self.expect_token(&Token::RParen)?; - TableSample::Bucket(TableSampleBucket { bucket, total, on }) + TableSampleMethod::Bucket(TableSampleBucket { bucket, total, on }) } else { - let value = match self.try_parse(|p| p.parse_number_value()) { - Ok(num) => num, + let value = match self.maybe_parse(|p| p.parse_number_value()) { + Ok(Some(num)) => num, _ => { if let Token::Word(w) = self.next_token().token { Value::Placeholder(w.value) @@ -10733,10 +10731,10 @@ impl<'a> Parser<'a> { } }; if self.peek_token().token == Token::RParen - && !self.dialect.supports_implicit_table_sample() + && !self.dialect.supports_implicit_table_sample_method() { self.expect_token(&Token::RParen)?; - TableSample::Bernoulli(TableSampleBernoulli { + TableSampleMethod::Bernoulli(TableSampleBernoulli { probability: Some(Expr::Value(value)), unit: None, value: None, @@ -10750,7 +10748,7 @@ impl<'a> Parser<'a> { None }; self.expect_token(&Token::RParen)?; - TableSample::Implicit(TableSampleImplicit { value, unit }) + TableSampleMethod::Implicit(TableSampleImplicit { value, unit }) } } } else { diff --git a/src/test_utils.rs b/src/test_utils.rs index 97a16b873..e76cdb87a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -347,7 +347,6 @@ pub fn table(name: impl Into) -> TableFactor { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, } } @@ -362,7 +361,6 @@ pub fn table_from_name(name: ObjectName) -> TableFactor { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, } } @@ -380,7 +378,6 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 11373d2e1..9bfe901f2 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1545,7 +1545,6 @@ fn parse_table_time_travel() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![] },] @@ -1646,7 +1645,6 @@ fn parse_merge() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, table ); @@ -1664,7 +1662,6 @@ fn parse_merge() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, source ); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c574d72c5..90e36e8de 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -463,7 +463,6 @@ fn parse_update_with_table_alias() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![], }, @@ -557,7 +556,6 @@ fn parse_select_with_table_alias() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![], }] @@ -748,7 +746,6 @@ fn parse_where_delete_with_alias_statement() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, from[0].relation, ); @@ -767,7 +764,6 @@ fn parse_where_delete_with_alias_statement() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![], }]), @@ -6119,7 +6115,6 @@ fn parse_joins_on() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, global, join_operator: f(JoinConstraint::On(Expr::BinaryOp { @@ -6249,7 +6244,6 @@ fn parse_joins_using() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, global: false, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), @@ -6325,7 +6319,6 @@ fn parse_natural_join() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, global: false, join_operator: f(JoinConstraint::Natural), @@ -8343,7 +8336,6 @@ fn parse_merge() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, } ); assert_eq!(table, table_no_into); @@ -9451,7 +9443,6 @@ fn parse_pivot_table() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }), aggregate_functions: vec![ expected_function("a", None), @@ -9528,7 +9519,6 @@ fn parse_unpivot_table() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }), value: Ident { value: "quantity".to_string(), @@ -9600,7 +9590,6 @@ fn parse_pivot_unpivot_table() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }), value: Ident { value: "population".to_string(), @@ -12397,14 +12386,14 @@ fn parse_create_table_with_enum_types() { #[test] fn test_table_sample() { - let dialects = all_dialects_where(|d| !d.supports_implicit_table_sample()); + let dialects = all_dialects_where(|d| !d.supports_implicit_table_sample_method()); dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); // The only dialect that supports implicit tablesample is Hive and it requires aliase after the table sample let dialects = all_dialects_where(|d| { - d.supports_implicit_table_sample() && d.supports_table_sample_before_alias() + d.supports_implicit_table_sample_method() && d.supports_table_sample_before_alias() }); dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50) AS t"); dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 ROWS) AS t"); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 5cff7a10a..5349f1207 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -460,7 +460,6 @@ fn parse_delimited_identifiers() { partitions: _, json_path: _, sample: _, - sample_before_alias: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 673df1c64..ecc874af8 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -74,7 +74,6 @@ fn parse_table_time_travel() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![] },] @@ -224,7 +223,6 @@ fn parse_mssql_openjson() { partitions: vec![], json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -284,7 +282,6 @@ fn parse_mssql_openjson() { partitions: vec![], json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -345,7 +342,6 @@ fn parse_mssql_openjson() { partitions: vec![], json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -405,7 +401,6 @@ fn parse_mssql_openjson() { partitions: vec![], json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -445,7 +440,6 @@ fn parse_mssql_openjson() { partitions: vec![], json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 45362e07a..bc7bf2f88 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2007,7 +2007,6 @@ fn parse_update_with_joins() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -2023,7 +2022,6 @@ fn parse_update_with_joins() { with_ordinality: false, json_path: None, sample: None, - sample_before_alias: false, }, global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { From 353ce582f2773b3c1ea80a49d166cf826a5200de Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Sun, 8 Dec 2024 20:07:05 +0100 Subject: [PATCH 07/10] Code review comments --- src/parser/mod.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e65802830..f41dcc96b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10703,8 +10703,7 @@ impl<'a> Parser<'a> { repeatable: seed, }) // Try to parse without an explicit table sample method keyword - } else if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; + } else if self.consume_token(&Token::LParen) { if self.parse_keyword(Keyword::BUCKET) { let bucket = self.parse_number_value()?; self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; @@ -10717,9 +10716,9 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; TableSampleMethod::Bucket(TableSampleBucket { bucket, total, on }) } else { - let value = match self.maybe_parse(|p| p.parse_number_value()) { - Ok(Some(num)) => num, - _ => { + let value = match self.maybe_parse(|p| p.parse_number_value())? { + Some(num) => num, + None => { if let Token::Word(w) = self.next_token().token { Value::Placeholder(w.value) } else { @@ -10730,10 +10729,9 @@ impl<'a> Parser<'a> { } } }; - if self.peek_token().token == Token::RParen - && !self.dialect.supports_implicit_table_sample_method() + if !self.dialect.supports_implicit_table_sample_method() + && self.consume_token(&Token::RParen) { - self.expect_token(&Token::RParen)?; TableSampleMethod::Bernoulli(TableSampleBernoulli { probability: Some(Expr::Value(value)), unit: None, From 464fca39f3909b25584467e38787b67c4f396756 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Mon, 9 Dec 2024 15:27:38 +0100 Subject: [PATCH 08/10] Keep source syntax --- src/ast/mod.rs | 3 +- src/ast/query.rs | 99 +++++++++++++++++++++----- src/parser/mod.rs | 133 +++++++++++++++++++---------------- tests/sqlparser_snowflake.rs | 32 +++------ 4 files changed, 164 insertions(+), 103 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c4603d462..440dfe1a0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -70,7 +70,8 @@ pub use self::query::{ RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSampleBernoulli, - TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, TableSampleSystem, + TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, + TableSampleMethodName, TableSampleSeed, TableSampleSeedModifier, TableSampleSystem, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; diff --git a/src/ast/query.rs b/src/ast/query.rs index 76c834e7c..f5b52a4e9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1172,21 +1172,96 @@ pub enum TableSampleMethod { Implicit(TableSampleImplicit), } +/// The table sample method names +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleMethodName { + Row, + Bernoulli, + System, + Block, +} + +impl fmt::Display for TableSampleMethodName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleMethodName::Bernoulli => write!(f, "BERNOULLI"), + TableSampleMethodName::Row => write!(f, "ROW"), + TableSampleMethodName::System => write!(f, "SYSTEM"), + TableSampleMethodName::Block => write!(f, "BLOCK"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableSampleBernoulli { - pub probability: Option, - pub value: Option, + pub name: TableSampleMethodName, + pub probability: Option, + pub value: Option, pub unit: Option, } +impl fmt::Display for TableSampleBernoulli { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " {} (", self.name)?; + if let Some(probability) = &self.probability { + write!(f, "{})", probability)?; + } else if let Some(value) = &self.value { + write!(f, "{}", value)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + write!(f, ")")?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableSampleSystem { - pub probability: Expr, - pub repeatable: Option, + pub name: TableSampleMethodName, + pub probability: Value, + pub seed: Option, +} + +impl fmt::Display for TableSampleSystem { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " {} ({})", self.name, self.probability)?; + if let Some(seed) = &self.seed { + write!(f, " {} ({})", seed.modifier, seed.value)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleSeed { + pub modifier: TableSampleSeedModifier, + pub value: Value, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleSeedModifier { + Repeatable, + Seed, +} + +impl fmt::Display for TableSampleSeedModifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleSeedModifier::Repeatable => write!(f, "REPEATABLE"), + TableSampleSeedModifier::Seed => write!(f, "SEED"), + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -1248,22 +1323,10 @@ impl fmt::Display for TableSampleMethod { write!(f, " TABLESAMPLE")?; match self { TableSampleMethod::Bernoulli(sample) => { - write!(f, " BERNOULLI (")?; - if let Some(probability) = &sample.probability { - write!(f, "{})", probability)?; - } else if let Some(value) = &sample.value { - write!(f, "{}", value)?; - if let Some(unit) = &sample.unit { - write!(f, " {}", unit)?; - } - write!(f, ")")?; - } + write!(f, "{}", sample)?; } TableSampleMethod::System(sample) => { - write!(f, " SYSTEM ({})", sample.probability)?; - if let Some(repeatable) = &sample.repeatable { - write!(f, " REPEATABLE ({})", repeatable)?; - } + write!(f, "{}", sample)?; } TableSampleMethod::Bucket(sample) => { write!(f, " ({})", sample)?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f41dcc96b..adb099937 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10660,48 +10660,14 @@ impl<'a> Parser<'a> { } // Try to parse based on an explicit table sample method keyword - let sample = if self - .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW]) - .is_some() - { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - - let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) { - (None, Some(expr), Some(TableSampleUnit::Rows)) - } else if self.parse_keyword(Keyword::PERCENT) { - (None, Some(expr), Some(TableSampleUnit::Percent)) - } else { - (Some(expr), None, None) - }; - self.expect_token(&Token::RParen)?; - TableSampleMethod::Bernoulli(TableSampleBernoulli { - probability, - value, - unit, - }) - } else if self - .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK]) - .is_some() - { - self.expect_token(&Token::LParen)?; - let probability = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let seed = if self - .parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED]) - .is_some() - { - self.expect_token(&Token::LParen)?; - let seed = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(seed) - } else { - None - }; - TableSampleMethod::System(TableSampleSystem { - probability, - repeatable: seed, - }) + let sample = if self.parse_keyword(Keyword::BERNOULLI) { + self.parse_table_sample_bernoulli(TableSampleMethodName::Bernoulli)? + } else if self.parse_keyword(Keyword::ROW) { + self.parse_table_sample_bernoulli(TableSampleMethodName::Row)? + } else if self.parse_keyword(Keyword::SYSTEM) { + self.parse_table_sample_system(TableSampleMethodName::System)? + } else if self.parse_keyword(Keyword::BLOCK) { + self.parse_table_sample_system(TableSampleMethodName::Block)? // Try to parse without an explicit table sample method keyword } else if self.consume_token(&Token::LParen) { if self.parse_keyword(Keyword::BUCKET) { @@ -10729,29 +10695,19 @@ impl<'a> Parser<'a> { } } }; - if !self.dialect.supports_implicit_table_sample_method() - && self.consume_token(&Token::RParen) - { - TableSampleMethod::Bernoulli(TableSampleBernoulli { - probability: Some(Expr::Value(value)), - unit: None, - value: None, - }) + let unit = if self.parse_keyword(Keyword::ROWS) { + Some(TableSampleUnit::Rows) + } else if self.parse_keyword(Keyword::PERCENT) { + Some(TableSampleUnit::Percent) } else { - let unit = if self.parse_keyword(Keyword::ROWS) { - Some(TableSampleUnit::Rows) - } else if self.parse_keyword(Keyword::PERCENT) { - Some(TableSampleUnit::Percent) - } else { - None - }; - self.expect_token(&Token::RParen)?; - TableSampleMethod::Implicit(TableSampleImplicit { value, unit }) - } + None + }; + self.expect_token(&Token::RParen)?; + TableSampleMethod::Implicit(TableSampleImplicit { value, unit }) } } else { return parser_err!( - "Expecting BERNOULLI, ROW, SYSTEM or BLOCK", + "Expecting BERNOULLI, ROW, SYSTEM, BLOCK or a valid TABLESAMPLE expression in parenthesis", self.peek_token().span.start ); }; @@ -10759,6 +10715,61 @@ impl<'a> Parser<'a> { Ok(Some(Box::new(sample))) } + fn parse_table_sample_bernoulli( + &mut self, + name: TableSampleMethodName, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_number_value()?; + let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) { + (None, Some(value), Some(TableSampleUnit::Rows)) + } else if self.parse_keyword(Keyword::PERCENT) { + (None, Some(value), Some(TableSampleUnit::Percent)) + } else { + (Some(value), None, None) + }; + self.expect_token(&Token::RParen)?; + Ok(TableSampleMethod::Bernoulli(TableSampleBernoulli { + name, + probability, + value, + unit, + })) + } + + fn parse_table_sample_system( + &mut self, + name: TableSampleMethodName, + ) -> Result { + self.expect_token(&Token::LParen)?; + let probability = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + + let seed = if self.parse_keyword(Keyword::REPEATABLE) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Repeatable)?) + } else if self.parse_keyword(Keyword::SEED) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Seed)?) + } else { + None + }; + + Ok(TableSampleMethod::System(TableSampleSystem { + name, + probability, + seed, + })) + } + + fn parse_table_sample_seed( + &mut self, + modifier: TableSampleSeedModifier, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(TableSampleSeed { modifier, value }) + } + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, /// assuming the `OPENJSON` keyword was already consumed. fn parse_open_json_table_factor(&mut self) -> Result { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7c0ba9a90..dd22cd290 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2961,30 +2961,16 @@ fn parse_insert_overwrite() { #[test] fn test_table_sample() { - snowflake_and_generic() - .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); - - // In Snowflake we translate implicit table sample method to bernoulli - snowflake().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE (10)", - "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)", - ); - - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable TABLESAMPLE ROW (20.3)", - "SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)", - ); - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE BLOCK (3) SEED (82)", - "SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)", - ); - - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)", - "SELECT * FROM testtable TABLESAMPLE SYSTEM (0.012) REPEATABLE (99992)", + "SELECT * FROM testtable SAMPLE (10)", + "SELECT * FROM testtable TABLESAMPLE (10)", ); - snowflake_and_generic() - .verified_stmt("SELECT * FROM testtable TABLESAMPLE BERNOULLI (10 ROWS)"); + .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE BLOCK (3) SEED (82)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)"); } From f58f9bb85b38a04076333fecc530c53239fdd341 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Mon, 9 Dec 2024 16:21:36 +0100 Subject: [PATCH 09/10] Remove redundant code now that syntax roundtrip is more accurate --- src/dialect/hive.rs | 5 ----- src/dialect/mod.rs | 10 ---------- tests/sqlparser_common.rs | 15 ++++++--------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 303843984..80f44cf7c 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -66,9 +66,4 @@ impl Dialect for HiveDialect { fn supports_table_sample_before_alias(&self) -> bool { true } - - /// See Hive - fn supports_implicit_table_sample_method(&self) -> bool { - true - } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 18417ca41..8cce6a353 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -718,16 +718,6 @@ pub trait Dialect: Debug + Any { fn supports_table_sample_before_alias(&self) -> bool { false } - - /// Returns true if this dialect support not specifying a table sample method. For example: - /// - /// Implicit table sample method: `SELECT * FROM tbl TABLESAMPLE (10)` - /// Explicit table sample method: `SELECT * FROM tbl TABLESAMPLE BERNOULLI (10)` - /// - /// - fn supports_implicit_table_sample_method(&self) -> bool { - false - } } /// This represents the operators for which precedence must be defined diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 90e36e8de..0f1813c2f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12386,16 +12386,13 @@ fn parse_create_table_with_enum_types() { #[test] fn test_table_sample() { - let dialects = all_dialects_where(|d| !d.supports_implicit_table_sample_method()); - dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); - dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); - dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); - - // The only dialect that supports implicit tablesample is Hive and it requires aliase after the table sample - let dialects = all_dialects_where(|d| { - d.supports_implicit_table_sample_method() && d.supports_table_sample_before_alias() - }); + let dialects = all_dialects_where(|d| d.supports_table_sample_before_alias()); dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50) AS t"); dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 ROWS) AS t"); dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 PERCENT) AS t"); + + let dialects = all_dialects_where(|d| !d.supports_table_sample_before_alias()); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); } From ef04565bd79de26b3a2e2422c342b6048e824f9b Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Fri, 13 Dec 2024 21:14:44 +0100 Subject: [PATCH 10/10] Add support for table sample in Clickhouse --- src/ast/mod.rs | 10 +- src/ast/query.rs | 136 +++++++++++++-------------- src/parser/mod.rs | 168 ++++++++++++++++------------------ tests/sqlparser_clickhouse.rs | 8 ++ tests/sqlparser_snowflake.rs | 8 +- 5 files changed, 158 insertions(+), 172 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 440dfe1a0..ccb2ed1bc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -69,11 +69,11 @@ pub use self::query::{ OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSampleBernoulli, - TableSampleBucket, TableSampleImplicit, TableSampleKind, TableSampleMethod, - TableSampleMethodName, TableSampleSeed, TableSampleSeedModifier, TableSampleSystem, - TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, - WildcardAdditionalOptions, With, WithFill, + TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample, + TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, + TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, + TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + WithFill, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index f5b52a4e9..948febd26 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1156,86 +1156,85 @@ pub enum TableFactor { pub enum TableSampleKind { /// Table sample located before the table alias option - BeforeTableAlias(Box), + BeforeTableAlias(Box), /// Table sample located after the table alias option - AfterTableAlias(Box), + AfterTableAlias(Box), } -/// The table sample method options #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum TableSampleMethod { - Bernoulli(TableSampleBernoulli), - System(TableSampleSystem), - Bucket(TableSampleBucket), - Implicit(TableSampleImplicit), +pub struct TableSample { + pub modifier: TableSampleModifier, + pub name: Option, + pub quantity: Option, + pub seed: Option, + pub bucket: Option, + pub offset: Option, } -/// The table sample method names #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum TableSampleMethodName { - Row, - Bernoulli, - System, - Block, +pub enum TableSampleModifier { + Sample, + TableSample, } -impl fmt::Display for TableSampleMethodName { +impl fmt::Display for TableSampleModifier { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - TableSampleMethodName::Bernoulli => write!(f, "BERNOULLI"), - TableSampleMethodName::Row => write!(f, "ROW"), - TableSampleMethodName::System => write!(f, "SYSTEM"), - TableSampleMethodName::Block => write!(f, "BLOCK"), + TableSampleModifier::Sample => write!(f, "SAMPLE")?, + TableSampleModifier::TableSample => write!(f, "TABLESAMPLE")?, } + Ok(()) } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct TableSampleBernoulli { - pub name: TableSampleMethodName, - pub probability: Option, - pub value: Option, +pub struct TableSampleQuantity { + pub parenthesized: bool, + pub value: Expr, pub unit: Option, } -impl fmt::Display for TableSampleBernoulli { +impl fmt::Display for TableSampleQuantity { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, " {} (", self.name)?; - if let Some(probability) = &self.probability { - write!(f, "{})", probability)?; - } else if let Some(value) = &self.value { - write!(f, "{}", value)?; - if let Some(unit) = &self.unit { - write!(f, " {}", unit)?; - } + if self.parenthesized { + write!(f, "(")?; + } + write!(f, "{}", self.value)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + if self.parenthesized { write!(f, ")")?; } Ok(()) } } +/// The table sample method names #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct TableSampleSystem { - pub name: TableSampleMethodName, - pub probability: Value, - pub seed: Option, +pub enum TableSampleMethod { + Row, + Bernoulli, + System, + Block, } -impl fmt::Display for TableSampleSystem { +impl fmt::Display for TableSampleMethod { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, " {} ({})", self.name, self.probability)?; - if let Some(seed) = &self.seed { - write!(f, " {} ({})", seed.modifier, seed.value)?; + match self { + TableSampleMethod::Bernoulli => write!(f, "BERNOULLI"), + TableSampleMethod::Row => write!(f, "ROW"), + TableSampleMethod::System => write!(f, "SYSTEM"), + TableSampleMethod::Block => write!(f, "BLOCK"), } - Ok(()) } } @@ -1247,6 +1246,13 @@ pub struct TableSampleSeed { pub value: Value, } +impl fmt::Display for TableSampleSeed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ({})", self.modifier, self.value)?; + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -1299,41 +1305,23 @@ impl fmt::Display for TableSampleBucket { Ok(()) } } - -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct TableSampleImplicit { - pub value: Value, - pub unit: Option, -} - -impl fmt::Display for TableSampleImplicit { +impl fmt::Display for TableSample { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.value)?; - if let Some(unit) = &self.unit { - write!(f, " {}", unit)?; + write!(f, " {}", self.modifier)?; + if let Some(name) = &self.name { + write!(f, " {}", name)?; } - Ok(()) - } -} - -impl fmt::Display for TableSampleMethod { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, " TABLESAMPLE")?; - match self { - TableSampleMethod::Bernoulli(sample) => { - write!(f, "{}", sample)?; - } - TableSampleMethod::System(sample) => { - write!(f, "{}", sample)?; - } - TableSampleMethod::Bucket(sample) => { - write!(f, " ({})", sample)?; - } - TableSampleMethod::Implicit(sample) => { - write!(f, " ({})", sample)?; - } + if let Some(quantity) = &self.quantity { + write!(f, " {}", quantity)?; + } + if let Some(seed) = &self.seed { + write!(f, " {}", seed)?; + } + if let Some(bucket) = &self.bucket { + write!(f, " ({})", bucket)?; + } + if let Some(offset) = &self.offset { + write!(f, " OFFSET {}", offset)?; } Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index adb099937..09eab9be5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10651,99 +10651,80 @@ impl<'a> Parser<'a> { } } - fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { - if self - .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE]) - .is_none() - { + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { + let modifier = if self.parse_keyword(Keyword::TABLESAMPLE) { + TableSampleModifier::TableSample + } else if self.parse_keyword(Keyword::SAMPLE) { + TableSampleModifier::Sample + } else { return Ok(None); - } + }; - // Try to parse based on an explicit table sample method keyword - let sample = if self.parse_keyword(Keyword::BERNOULLI) { - self.parse_table_sample_bernoulli(TableSampleMethodName::Bernoulli)? - } else if self.parse_keyword(Keyword::ROW) { - self.parse_table_sample_bernoulli(TableSampleMethodName::Row)? - } else if self.parse_keyword(Keyword::SYSTEM) { - self.parse_table_sample_system(TableSampleMethodName::System)? - } else if self.parse_keyword(Keyword::BLOCK) { - self.parse_table_sample_system(TableSampleMethodName::Block)? - // Try to parse without an explicit table sample method keyword - } else if self.consume_token(&Token::LParen) { - if self.parse_keyword(Keyword::BUCKET) { - let bucket = self.parse_number_value()?; - self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; - let total = self.parse_number_value()?; - let on = if self.parse_keyword(Keyword::ON) { - Some(self.parse_expr()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - TableSampleMethod::Bucket(TableSampleBucket { bucket, total, on }) - } else { - let value = match self.maybe_parse(|p| p.parse_number_value())? { - Some(num) => num, - None => { - if let Token::Word(w) = self.next_token().token { - Value::Placeholder(w.value) - } else { - return parser_err!( - "Expecting number or byte length e.g. 100M", - self.peek_token().span.start - ); - } - } - }; - let unit = if self.parse_keyword(Keyword::ROWS) { - Some(TableSampleUnit::Rows) - } else if self.parse_keyword(Keyword::PERCENT) { - Some(TableSampleUnit::Percent) - } else { - None - }; - self.expect_token(&Token::RParen)?; - TableSampleMethod::Implicit(TableSampleImplicit { value, unit }) - } - } else { - return parser_err!( - "Expecting BERNOULLI, ROW, SYSTEM, BLOCK or a valid TABLESAMPLE expression in parenthesis", - self.peek_token().span.start - ); + let name = match self.parse_one_of_keywords(&[ + Keyword::BERNOULLI, + Keyword::ROW, + Keyword::SYSTEM, + Keyword::BLOCK, + ]) { + Some(Keyword::BERNOULLI) => Some(TableSampleMethod::Bernoulli), + Some(Keyword::ROW) => Some(TableSampleMethod::Row), + Some(Keyword::SYSTEM) => Some(TableSampleMethod::System), + Some(Keyword::BLOCK) => Some(TableSampleMethod::Block), + _ => None, }; - Ok(Some(Box::new(sample))) - } + let parenthesized = self.consume_token(&Token::LParen); - fn parse_table_sample_bernoulli( - &mut self, - name: TableSampleMethodName, - ) -> Result { - self.expect_token(&Token::LParen)?; - let value = self.parse_number_value()?; - let (probability, value, unit) = if self.parse_keyword(Keyword::ROWS) { - (None, Some(value), Some(TableSampleUnit::Rows)) - } else if self.parse_keyword(Keyword::PERCENT) { - (None, Some(value), Some(TableSampleUnit::Percent)) + let (quantity, bucket) = if parenthesized && self.parse_keyword(Keyword::BUCKET) { + let selected_bucket = self.parse_number_value()?; + self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; + let total = self.parse_number_value()?; + let on = if self.parse_keyword(Keyword::ON) { + Some(self.parse_expr()?) + } else { + None + }; + ( + None, + Some(TableSampleBucket { + bucket: selected_bucket, + total, + on, + }), + ) } else { - (Some(value), None, None) + let value = match self.maybe_parse(|p| p.parse_expr())? { + Some(num) => num, + None => { + if let Token::Word(w) = self.next_token().token { + Expr::Value(Value::Placeholder(w.value)) + } else { + return parser_err!( + "Expecting number or byte length e.g. 100M", + self.peek_token().span.start + ); + } + } + }; + let unit = if self.parse_keyword(Keyword::ROWS) { + Some(TableSampleUnit::Rows) + } else if self.parse_keyword(Keyword::PERCENT) { + Some(TableSampleUnit::Percent) + } else { + None + }; + ( + Some(TableSampleQuantity { + parenthesized, + value, + unit, + }), + None, + ) }; - self.expect_token(&Token::RParen)?; - Ok(TableSampleMethod::Bernoulli(TableSampleBernoulli { - name, - probability, - value, - unit, - })) - } - - fn parse_table_sample_system( - &mut self, - name: TableSampleMethodName, - ) -> Result { - self.expect_token(&Token::LParen)?; - let probability = self.parse_number_value()?; - self.expect_token(&Token::RParen)?; + if parenthesized { + self.expect_token(&Token::RParen)?; + } let seed = if self.parse_keyword(Keyword::REPEATABLE) { Some(self.parse_table_sample_seed(TableSampleSeedModifier::Repeatable)?) @@ -10753,11 +10734,20 @@ impl<'a> Parser<'a> { None }; - Ok(TableSampleMethod::System(TableSampleSystem { + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Some(Box::new(TableSample { + modifier, name, - probability, + quantity, seed, - })) + bucket, + offset, + }))) } fn parse_table_sample_seed( diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 98f4c0f27..d60506d90 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1614,6 +1614,14 @@ fn parse_explain_table() { } } +#[test] +fn parse_table_sample() { + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 0.1"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1000"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10 OFFSET 1 / 2"); +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index dd22cd290..adb8f8133 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2961,10 +2961,8 @@ fn parse_insert_overwrite() { #[test] fn test_table_sample() { - snowflake_and_generic().one_statement_parses_to( - "SELECT * FROM testtable SAMPLE (10)", - "SELECT * FROM testtable TABLESAMPLE (10)", - ); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable SAMPLE (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable TABLESAMPLE (10)"); snowflake_and_generic() .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10)"); @@ -2973,4 +2971,6 @@ fn test_table_sample() { .verified_stmt("SELECT * FROM testtable TABLESAMPLE BLOCK (3) SEED (82)"); snowflake_and_generic() .verified_stmt("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)"); + snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) REPEATABLE (1)"); + snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); }