From a452a80fbd6166c4f9b1938f67d32c8ae7f1d577 Mon Sep 17 00:00:00 2001 From: Martin Nowak Date: Sun, 27 Aug 2023 22:48:33 +0200 Subject: [PATCH 1/2] feat: show location info in parse errors - add consistent Display for Location - show (optional) token location in parse errors - keep Location line = 0 as noop --- src/parser/mod.rs | 93 ++++++++++++++++++++++++++++++++--------------- src/tokenizer.rs | 48 +++++++++++++----------- 2 files changed, 89 insertions(+), 52 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 846215249..8f7c4b0cf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -44,8 +44,8 @@ pub enum ParserError { // Use `Parser::expected` instead, if possible macro_rules! parser_err { - ($MSG:expr) => { - Err(ParserError::ParserError($MSG.to_string())) + ($MSG:expr, $loc:expr) => { + Err(ParserError::ParserError(format!("{}{}", $MSG, $loc))) }; } @@ -724,6 +724,7 @@ impl<'a> Parser<'a> { // Note also that naively `SELECT date` looks like a syntax error because the `date` type // name is not followed by a string literal, but in fact in PostgreSQL it is a valid // expression that should parse as the column name "date". + let loc = self.peek_token().location; return_ok_if_some!(self.maybe_parse(|parser| { match parser.parse_data_type()? { DataType::Interval => parser.parse_interval(), @@ -734,7 +735,7 @@ impl<'a> Parser<'a> { // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy"), + DataType::Custom(..) => parser_err!("dummy", loc), data_type => Ok(Expr::TypedString { data_type, value: parser.parse_literal_string()?, @@ -909,7 +910,12 @@ impl<'a> Parser<'a> { let tok = self.next_token(); let key = match tok.token { Token::Word(word) => word.to_ident(), - _ => return parser_err!(format!("Expected identifier, found: {tok}")), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.location + ) + } }; Ok(Expr::CompositeAccess { expr: Box::new(expr), @@ -1220,7 +1226,10 @@ impl<'a> Parser<'a> { r#in: Box::new(from), }) } else { - parser_err!("Position function must include IN keyword".to_string()) + parser_err!( + "Position function must include IN keyword".to_string(), + self.peek_token().location + ) } } @@ -1884,7 +1893,10 @@ impl<'a> Parser<'a> { } } // Can only happen if `get_next_precedence` got out of sync with this function - _ => parser_err!(format!("No infix parser for token {:?}", tok.token)), + _ => parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.location + ), } } else if Token::DoubleColon == tok { self.parse_pg_cast(expr) @@ -1935,7 +1947,10 @@ impl<'a> Parser<'a> { }) } else { // Can only happen if `get_next_precedence` got out of sync with this function - parser_err!(format!("No infix parser for token {:?}", tok.token)) + parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.location + ) } } @@ -2213,7 +2228,10 @@ impl<'a> Parser<'a> { /// Report unexpected token pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { - parser_err!(format!("Expected {expected}, found: {found}")) + parser_err!( + format!("Expected {expected}, found: {found}"), + found.location + ) } /// Look for an expected keyword and consume it if it exists @@ -2378,13 +2396,14 @@ impl<'a> Parser<'a> { /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { + let loc = self.peek_token().location; let all = self.parse_keyword(Keyword::ALL); let distinct = self.parse_keyword(Keyword::DISTINCT); if !distinct { return Ok(None); } if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); } let on = self.parse_keyword(Keyword::ON); if !on { @@ -2986,10 +3005,14 @@ impl<'a> Parser<'a> { let mut admin = vec![]; while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.location); match keyword { Keyword::AUTHORIZATION => { if authorization_owner.is_some() { - parser_err!("Found multiple AUTHORIZATION") + parser_err!("Found multiple AUTHORIZATION", loc) } else { authorization_owner = Some(self.parse_object_name()?); Ok(()) @@ -2997,7 +3020,7 @@ impl<'a> Parser<'a> { } Keyword::LOGIN | Keyword::NOLOGIN => { if login.is_some() { - parser_err!("Found multiple LOGIN or NOLOGIN") + parser_err!("Found multiple LOGIN or NOLOGIN", loc) } else { login = Some(keyword == Keyword::LOGIN); Ok(()) @@ -3005,7 +3028,7 @@ impl<'a> Parser<'a> { } Keyword::INHERIT | Keyword::NOINHERIT => { if inherit.is_some() { - parser_err!("Found multiple INHERIT or NOINHERIT") + parser_err!("Found multiple INHERIT or NOINHERIT", loc) } else { inherit = Some(keyword == Keyword::INHERIT); Ok(()) @@ -3013,7 +3036,7 @@ impl<'a> Parser<'a> { } Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { if bypassrls.is_some() { - parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS") + parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) } else { bypassrls = Some(keyword == Keyword::BYPASSRLS); Ok(()) @@ -3021,7 +3044,7 @@ impl<'a> Parser<'a> { } Keyword::CREATEDB | Keyword::NOCREATEDB => { if create_db.is_some() { - parser_err!("Found multiple CREATEDB or NOCREATEDB") + parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) } else { create_db = Some(keyword == Keyword::CREATEDB); Ok(()) @@ -3029,7 +3052,7 @@ impl<'a> Parser<'a> { } Keyword::CREATEROLE | Keyword::NOCREATEROLE => { if create_role.is_some() { - parser_err!("Found multiple CREATEROLE or NOCREATEROLE") + parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) } else { create_role = Some(keyword == Keyword::CREATEROLE); Ok(()) @@ -3037,7 +3060,7 @@ impl<'a> Parser<'a> { } Keyword::SUPERUSER | Keyword::NOSUPERUSER => { if superuser.is_some() { - parser_err!("Found multiple SUPERUSER or NOSUPERUSER") + parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) } else { superuser = Some(keyword == Keyword::SUPERUSER); Ok(()) @@ -3045,7 +3068,7 @@ impl<'a> Parser<'a> { } Keyword::REPLICATION | Keyword::NOREPLICATION => { if replication.is_some() { - parser_err!("Found multiple REPLICATION or NOREPLICATION") + parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) } else { replication = Some(keyword == Keyword::REPLICATION); Ok(()) @@ -3053,7 +3076,7 @@ impl<'a> Parser<'a> { } Keyword::PASSWORD => { if password.is_some() { - parser_err!("Found multiple PASSWORD") + parser_err!("Found multiple PASSWORD", loc) } else { password = if self.parse_keyword(Keyword::NULL) { Some(Password::NullPassword) @@ -3066,7 +3089,7 @@ impl<'a> Parser<'a> { Keyword::CONNECTION => { self.expect_keyword(Keyword::LIMIT)?; if connection_limit.is_some() { - parser_err!("Found multiple CONNECTION LIMIT") + parser_err!("Found multiple CONNECTION LIMIT", loc) } else { connection_limit = Some(Expr::Value(self.parse_number_value()?)); Ok(()) @@ -3075,7 +3098,7 @@ impl<'a> Parser<'a> { Keyword::VALID => { self.expect_keyword(Keyword::UNTIL)?; if valid_until.is_some() { - parser_err!("Found multiple VALID UNTIL") + parser_err!("Found multiple VALID UNTIL", loc) } else { valid_until = Some(Expr::Value(self.parse_value()?)); Ok(()) @@ -3084,14 +3107,14 @@ impl<'a> Parser<'a> { Keyword::IN => { if self.parse_keyword(Keyword::ROLE) { if !in_role.is_empty() { - parser_err!("Found multiple IN ROLE") + parser_err!("Found multiple IN ROLE", loc) } else { in_role = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) } } else if self.parse_keyword(Keyword::GROUP) { if !in_group.is_empty() { - parser_err!("Found multiple IN GROUP") + parser_err!("Found multiple IN GROUP", loc) } else { in_group = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3102,7 +3125,7 @@ impl<'a> Parser<'a> { } Keyword::ROLE => { if !role.is_empty() { - parser_err!("Found multiple ROLE") + parser_err!("Found multiple ROLE", loc) } else { role = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3110,7 +3133,7 @@ impl<'a> Parser<'a> { } Keyword::USER => { if !user.is_empty() { - parser_err!("Found multiple USER") + parser_err!("Found multiple USER", loc) } else { user = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3118,7 +3141,7 @@ impl<'a> Parser<'a> { } Keyword::ADMIN => { if !admin.is_empty() { - parser_err!("Found multiple ADMIN") + parser_err!("Found multiple ADMIN", loc) } else { admin = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3181,14 +3204,19 @@ impl<'a> Parser<'a> { // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(Parser::parse_object_name)?; + + let loc = self.peek_token().location; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); + return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); } if object_type == ObjectType::Role && (cascade || restrict || purge) { - return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE"); + return parser_err!( + "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", + loc + ); } Ok(Statement::Drop { object_type, @@ -4446,7 +4474,11 @@ impl<'a> Parser<'a> { fn parse_literal_char(&mut self) -> Result { let s = self.parse_literal_string()?; if s.len() != 1 { - return parser_err!(format!("Expect a char, found {s:?}")); + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.location); + return parser_err!(format!("Expect a char, found {s:?}"), loc); } Ok(s.chars().next().unwrap()) } @@ -4525,7 +4557,7 @@ impl<'a> Parser<'a> { // (i.e., it returns the input string). Token::Number(ref n, l) => match n.parse() { Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")), + Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), @@ -6465,10 +6497,11 @@ impl<'a> Parser<'a> { .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) .then(|| self.parse_identifier().unwrap()); + let loc = self.peek_token().location; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE"); + return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); } Ok(Statement::Revoke { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f20e01b71..1b1b1e96c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -350,7 +350,7 @@ impl fmt::Display for Whitespace { } /// Location in input string -#[derive(Debug, Eq, PartialEq, Clone)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] pub struct Location { /// Line number, starting from 1 pub line: u64, @@ -358,6 +358,20 @@ pub struct Location { pub column: u64, } +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.line == 0 { + return Ok(()); + } + write!( + f, + // TODO: use standard compiler location syntax (::) + " at Line: {}, Column {}", + self.line, self.column, + ) + } +} + /// A [Token] with [Location] attached to it #[derive(Debug, Eq, PartialEq, Clone)] pub struct TokenWithLocation { @@ -400,17 +414,12 @@ impl fmt::Display for TokenWithLocation { #[derive(Debug, PartialEq, Eq)] pub struct TokenizerError { pub message: String, - pub line: u64, - pub col: u64, + pub location: Location, } impl fmt::Display for TokenizerError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} at Line: {}, Column {}", - self.message, self.line, self.col - ) + write!(f, "{}{}", self.message, self.location,) } } @@ -546,10 +555,7 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = self.next_token(&mut state)? { - tokens.push(TokenWithLocation { - token, - location: location.clone(), - }); + tokens.push(TokenWithLocation { token, location }); location = state.location(); } @@ -1122,8 +1128,7 @@ impl<'a> Tokenizer<'a> { ) -> Result { Err(TokenizerError { message: message.into(), - col: loc.column, - line: loc.line, + location: loc, }) } @@ -1368,8 +1373,7 @@ mod tests { fn tokenizer_error_impl() { let err = TokenizerError { message: "test".into(), - line: 1, - col: 1, + location: Location { line: 1, column: 1 }, }; #[cfg(feature = "std")] { @@ -1694,8 +1698,7 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Unterminated string literal".to_string(), - line: 1, - col: 8 + location: Location { line: 1, column: 8 }, }) ); } @@ -1710,8 +1713,10 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Unterminated string literal".to_string(), - line: 1, - col: 35 + location: Location { + line: 1, + column: 35 + } }) ); } @@ -1873,8 +1878,7 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Expected close delimiter '\"' before EOF.".to_string(), - line: 1, - col: 1 + location: Location { line: 1, column: 1 }, }) ); } From cad04dac2b03bd859eda3d28a3d6a68ff4c36be9 Mon Sep 17 00:00:00 2001 From: Martin Nowak Date: Sun, 27 Aug 2023 23:18:12 +0200 Subject: [PATCH 2/2] feat!: tokenize with locations by default - in parse_sql and try_with_sql - update test_parser_error_loc - change test_utils to parse without locations to avoid maintenance overhead BREAKING CHANGE: changes the default ParserError messages --- src/parser/mod.rs | 8 +++----- src/test_utils.rs | 8 +++++++- tests/sqlparser_common.rs | 30 +++++++++++++++++------------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8f7c4b0cf..6902fc565 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -368,8 +368,8 @@ impl<'a> Parser<'a> { debug!("Parsing sql '{}'...", sql); let tokens = Tokenizer::new(self.dialect, sql) .with_unescape(self.options.unescape) - .tokenize()?; - Ok(self.with_tokens(tokens)) + .tokenize_with_location()?; + Ok(self.with_tokens_with_locations(tokens)) } /// Parse potentially multiple statements @@ -7962,14 +7962,12 @@ mod tests { #[test] fn test_parser_error_loc() { - // TODO: Once we thread token locations through the parser, we should update this - // test to assert the locations of the referenced token let sql = "SELECT this is a syntax error"; let ast = Parser::parse_sql(&GenericDialect, sql); assert_eq!( ast, Err(ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a" + "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16" .to_string() )) ); diff --git a/src/test_utils.rs b/src/test_utils.rs index 91130fb51..a5e9e739d 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -28,6 +28,7 @@ use core::fmt::Debug; use crate::dialect::*; use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Tokenizer; use crate::{ast::*, parser::ParserOptions}; /// Tests use the methods on this struct to invoke the parser on one or @@ -82,8 +83,13 @@ impl TestedDialects { /// the result is the same for all tested dialects. pub fn parse_sql_statements(&self, sql: &str) -> Result, ParserError> { self.one_of_identical_results(|dialect| { + let mut tokenizer = Tokenizer::new(dialect, sql); + if let Some(options) = &self.options { + tokenizer = tokenizer.with_unescape(options.unescape); + } + let tokens = tokenizer.tokenize()?; self.new_parser(dialect) - .try_with_sql(sql)? + .with_tokens(tokens) .parse_statements() }) // To fail the `ensure_multiple_dialects_are_tested` test: diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3fdf3d211..450743360 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -836,7 +836,12 @@ fn test_eof_after_as() { #[test] fn test_no_infix_error() { - let res = Parser::parse_sql(&ClickHouseDialect {}, "ASSERT-URA<<"); + let dialects = TestedDialects { + dialects: vec![Box::new(ClickHouseDialect {})], + options: None, + }; + + let res = dialects.parse_sql_statements("ASSERT-URA<<"); assert_eq!( ParserError::ParserError("No infix parser for token ShiftLeft".to_string()), res.unwrap_err() @@ -3238,19 +3243,21 @@ fn parse_alter_table_alter_column_type() { _ => unreachable!(), } - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT"), - ); + let dialect = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + + let res = + dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); assert_eq!( ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), res.unwrap_err() ); - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'"), - ); + let res = dialect.parse_sql_statements(&format!( + "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" + )); assert_eq!( ParserError::ParserError("Expected end of statement, found: USING".to_string()), res.unwrap_err() @@ -3295,10 +3302,7 @@ fn parse_alter_table_drop_constraint() { _ => unreachable!(), } - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} DROP CONSTRAINT is_active TEXT"), - ); + let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT")); assert_eq!( ParserError::ParserError("Expected end of statement, found: TEXT".to_string()), res.unwrap_err()