From 63245f6c3365d0c272ab6c41a25c208df1d401e7 Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy Date: Thu, 12 Dec 2024 10:59:55 -0800 Subject: [PATCH 1/3] fix tokenize number as part of ObjectName --- src/tokenizer.rs | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..33f5c91ee 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1108,15 +1108,29 @@ impl<'a> Tokenizer<'a> { // match one period if let Some('.') = chars.peek() { - s.push('.'); - chars.next(); + // Check if this actually is a float point number + let mut char_clone = chars.peekable.clone(); + char_clone.next(); + // Next char should be a digit, otherwise, it is not a float point number + if char_clone + .peek() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + s.push('.'); + chars.next(); + } else if s.is_empty() { + // Number might be part of period separated construct. Keep the period for next token + // e.g. a-12.b + return Ok(Some(Token::Number(s, false))); + } else { + // No number -> Token::Period + chars.next(); + return Ok(Some(Token::Period)); + } } - s += &peeking_take_while(chars, |ch| ch.is_ascii_digit()); - // No number -> Token::Period - if s == "." { - return Ok(Some(Token::Period)); - } + s += &peeking_take_while(chars, |ch| ch.is_ascii_digit()); let mut exponent_part = String::new(); // Parse exponent as number @@ -2128,6 +2142,23 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_select_float_hyphenated_identifier() { + let sql = String::from("SELECT a-12.b"); + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Minus, + Token::Number(String::from("12"), false), + Token::Period, + Token::make_word("b", None), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_clickhouse_double_equal() { let sql = String::from("SELECT foo=='1'"); From 4f92baa8a1ec1df13cc2d6da204ba1c7f333ec79 Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy Date: Thu, 12 Dec 2024 11:00:51 -0800 Subject: [PATCH 2/3] Fix BigQuery hythenated ObjectName with numbers --- src/parser/mod.rs | 4 +++- tests/sqlparser_bigquery.rs | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 39ab2db24..1f9687526 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8747,7 +8747,9 @@ impl<'a> Parser<'a> { } Token::Number(s, false) if s.chars().all(|c| c.is_ascii_digit()) => { ident.value.push_str(&s); - true + // If next token is period, then it is part of an ObjectName and we don't expect whitespace + // after the number. + !matches!(self.peek_token().token, Token::Period) } _ => { return self diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2be128a8c..387c77fea 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1522,6 +1522,26 @@ fn parse_hyphenated_table_identifiers() { "SELECT * FROM foo-bar AS f JOIN baz-qux AS b ON f.id = b.id", ); + assert_eq!( + bigquery() + .verified_only_select_with_canonical( + "select * from foo-123.bar", + "SELECT * FROM foo-123.bar" + ) + .from[0] + .relation, + TableFactor::Table { + name: ObjectName(vec![Ident::new("foo-123"), Ident::new("bar")]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + } + ); + assert_eq!( bigquery() .verified_only_select_with_canonical( From 0c01b3355daa7647b4a61366e8703fb3d10c6bad Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy Date: Thu, 12 Dec 2024 12:35:16 -0800 Subject: [PATCH 3/3] fix after clippy --- src/tokenizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 33f5c91ee..f692656af 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1119,7 +1119,7 @@ impl<'a> Tokenizer<'a> { { s.push('.'); chars.next(); - } else if s.is_empty() { + } else if !s.is_empty() { // Number might be part of period separated construct. Keep the period for next token // e.g. a-12.b return Ok(Some(Token::Number(s, false)));