Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly look for end delimiter dollar quoted string #1650

Merged
merged 7 commits into from
Jan 12, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 129 additions & 47 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1554,46 +1554,33 @@ impl<'a> Tokenizer<'a> {
if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
chars.next();

'searching_for_end: loop {
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
match chars.peek() {
Some('$') => {
chars.next();
let mut maybe_s = String::from("$");
for c in value.chars() {
if let Some(next_char) = chars.next() {
maybe_s.push(next_char);
if next_char != c {
// This doesn't match the dollar quote delimiter so this
// is not the end of the string.
s.push_str(&maybe_s);
continue 'searching_for_end;
}
} else {
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
);
let mut temp = String::new();
let end_delimiter = format!("${}$", value);

loop {
match chars.next() {
Some(ch) => {
temp.push(ch);

if temp.ends_with(&end_delimiter) {
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
s.push_str(temp);
}
}
if chars.peek() == Some(&'$') {
chars.next();
maybe_s.push('$');
// maybe_s matches the end delimiter
break 'searching_for_end;
} else {
// This also doesn't match the dollar quote delimiter as there are
// more characters before the second dollar so this is not the end
// of the string.
s.push_str(&maybe_s);
continue 'searching_for_end;
break;
}
}
_ => {
None => {
if temp.ends_with(&end_delimiter) {
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
s.push_str(temp);
}
break;
}

return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
)
);
}
}
}
Expand Down Expand Up @@ -2551,20 +2538,67 @@ mod tests {

#[test]
fn tokenize_dollar_quoted_string_tagged() {
let sql = String::from(
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
);
let dialect = GenericDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
tag: Some("tag".into()),
}),
let test_cases = vec![
(
String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"),
vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
tag: Some("tag".into()),
})
]
),
(
String::from("SELECT $abc$x$ab$abc$"),
vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "x$ab".into(),
tag: Some("abc".into()),
})
]
),
(
String::from("SELECT $abc$$abc$"),
vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "".into(),
tag: Some("abc".into()),
})
]
),
(
String::from("0$abc$$abc$1"),
vec![
Token::Number("0".into(), false),
Token::DollarQuotedString(DollarQuotedString {
value: "".into(),
tag: Some("abc".into()),
}),
Token::Number("1".into(), false),
]
),
(
String::from("$function$abc$q$data$q$$function$"),
vec![
Token::DollarQuotedString(DollarQuotedString {
value: "abc$q$data$q$".into(),
tag: Some("function".into()),
}),
]
),
];
compare(expected, tokens);

let dialect = GenericDialect {};
for (sql, expected) in test_cases {
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
compare(expected, tokens);
}
}

#[test]
Expand All @@ -2583,6 +2617,22 @@ mod tests {
);
}

#[test]
fn tokenize_dollar_quoted_string_tagged_unterminated_mirror() {
let sql = String::from("SELECT $abc$abc$");
let dialect = GenericDialect {};
assert_eq!(
Tokenizer::new(&dialect, &sql).tokenize(),
Err(TokenizerError {
message: "Unterminated dollar-quoted, expected $".into(),
location: Location {
line: 1,
column: 17
}
})
);
}

#[test]
fn tokenize_dollar_placeholder() {
let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");
Expand All @@ -2607,6 +2657,38 @@ mod tests {
);
}

#[test]
fn tokenize_nested_dollar_quoted_strings() {
let sql = String::from("SELECT $tag$dollar $nested$ string$tag$");
let dialect = GenericDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "dollar $nested$ string".into(),
tag: Some("tag".into()),
}),
];
compare(expected, tokens);
}

#[test]
fn tokenize_dollar_quoted_string_untagged_empty() {
let sql = String::from("SELECT $$$$");
let dialect = GenericDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "".into(),
tag: None,
}),
];
compare(expected, tokens);
}

#[test]
fn tokenize_dollar_quoted_string_untagged() {
let sql =
Expand Down
Loading