Skip to content

Commit

Permalink
chore: Upgrade to Winnow 0.7.0 (#79)
Browse files Browse the repository at this point in the history
* chore: Upgrade to Winnow 0.6.26

* refactor: Resolve deprecations

* refactor: Switch from Parser to ModalParser

* chore: Upgrade to Winnow 0.7.0

* refactor: Remove use of ErrMode
  • Loading branch information
epage authored Feb 1, 2025
1 parent 7142a12 commit dcde8d1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 50 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ categories = ["development-tools"]

[dependencies]
unicode_categories = "0.1.1"
winnow = { version = "0.6.23", features = ["simd"] }
winnow = { version = "0.7.0", features = ["simd"] }

[dev-dependencies]
criterion = "0.4"
Expand Down
89 changes: 40 additions & 49 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@ use unicode_categories::UnicodeCategories;
use winnow::ascii::{digit0, digit1, till_line_ending, Caseless};
use winnow::combinator::{alt, dispatch, eof, fail, opt, peek, terminated};
use winnow::error::ContextError;
use winnow::error::ErrMode;
use winnow::error::ErrorKind;
use winnow::error::ParserError as _;
use winnow::error::ParserError;
use winnow::prelude::*;
use winnow::stream::{ContainsToken as _, Stream as _};
use winnow::token::{any, one_of, rest, take, take_until, take_while};
use winnow::PResult;
use winnow::Result;

pub(crate) fn tokenize(mut input: &str, named_placeholders: bool) -> Vec<Token<'_>> {
let mut tokens: Vec<Token> = Vec::new();
Expand Down Expand Up @@ -101,7 +98,7 @@ fn get_next_token<'a>(
last_reserved_token: Option<Token<'a>>,
last_reserved_top_level_token: Option<Token<'a>>,
named_placeholders: bool,
) -> PResult<Token<'a>> {
) -> Result<Token<'a>> {
alt((
get_comment_token,
get_string_token,
Expand All @@ -124,14 +121,14 @@ fn get_next_token<'a>(
))
.parse_next(input)
}
fn get_double_colon_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_double_colon_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
"::".parse_next(input).map(|token| Token {
kind: TokenKind::DoubleColon,
value: token,
key: None,
})
}
fn get_whitespace_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_whitespace_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
take_while(1.., char::is_whitespace)
.parse_next(input)
.map(|token| Token {
Expand All @@ -141,7 +138,7 @@ fn get_whitespace_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn get_comment_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_comment_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
dispatch! {any;
'#' => till_line_ending.value(TokenKind::LineComment),
'-' => ('-', till_line_ending).value(TokenKind::LineComment),
Expand Down Expand Up @@ -193,7 +190,7 @@ pub fn take_till_escaping<'a>(
// 3. double quoted string using "" or \" to escape
// 4. single quoted string using '' or \' to escape
// 5. national character quoted string using N'' or N\' to escape
fn get_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_string_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
dispatch! {any;
'`' => (take_till_escaping('`', &['`']), any).void(),
'[' => (take_till_escaping(']', &[']']), any).void(),
Expand All @@ -213,7 +210,7 @@ fn get_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
}

// Like above but it doesn't replace double quotes
fn get_placeholder_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_placeholder_string_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
dispatch! {any;
'`'=>( take_till_escaping('`', &['`']), any).void(),
'['=>( take_till_escaping(']', &[']']), any).void(),
Expand All @@ -231,7 +228,7 @@ fn get_placeholder_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn get_open_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_open_paren_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
alt(("(", terminated(Caseless("CASE"), end_of_word)))
.parse_next(input)
.map(|token| Token {
Expand All @@ -241,7 +238,7 @@ fn get_open_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn get_close_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_close_paren_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
alt((")", terminated(Caseless("END"), end_of_word)))
.parse_next(input)
.map(|token| Token {
Expand All @@ -251,7 +248,7 @@ fn get_close_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn get_placeholder_token<'i>(input: &mut &'i str, named_placeholders: bool) -> PResult<Token<'i>> {
fn get_placeholder_token<'i>(input: &mut &'i str, named_placeholders: bool) -> Result<Token<'i>> {
// The precedence changes based on 'named_placeholders' but not the exhaustiveness.
// This is to ensure the formatting is the same even if parameters aren't used.

Expand All @@ -272,7 +269,7 @@ fn get_placeholder_token<'i>(input: &mut &'i str, named_placeholders: bool) -> P
}
}

fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
alt(((one_of(('?', '$')), digit1).take(), "?"))
.parse_next(input)
.map(|token| Token {
Expand All @@ -294,7 +291,7 @@ fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>>
})
}

fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
(
one_of(('@', ':', '$')),
take_while(1.., |item: char| {
Expand All @@ -313,7 +310,7 @@ fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'
})
}

fn get_string_named_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_string_named_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
(one_of(('@', ':')), get_placeholder_string_token)
.take()
.parse_next(input)
Expand All @@ -332,7 +329,7 @@ fn get_escaped_placeholder_key<'a>(key: &'a str, quote_char: &str) -> Cow<'a, st
Cow::Owned(key.replace(&format!("\\{}", quote_char), quote_char))
}

fn get_number_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_number_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
(opt("-"), alt((scientific_notation, decimal_number, digit1)))
.take()
.parse_next(input)
Expand All @@ -343,11 +340,11 @@ fn get_number_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn decimal_number<'i>(input: &mut &'i str) -> PResult<&'i str> {
fn decimal_number<'i>(input: &mut &'i str) -> Result<&'i str> {
(digit1, ".", digit0).take().parse_next(input)
}

fn scientific_notation<'i>(input: &mut &'i str) -> PResult<&'i str> {
fn scientific_notation<'i>(input: &mut &'i str) -> Result<&'i str> {
(
alt((decimal_number, digit1)),
"e",
Expand All @@ -363,17 +360,17 @@ fn get_reserved_word_token<'a>(
previous_token: Option<Token<'a>>,
last_reserved_token: Option<Token<'a>>,
last_reserved_top_level_token: Option<Token<'a>>,
) -> PResult<Token<'a>> {
) -> Result<Token<'a>> {
// A reserved word cannot be preceded by a "."
// this makes it so in "my_table.from", "from" is not considered a reserved word
if let Some(token) = previous_token {
if token.value == "." {
return Err(ErrMode::from_error_kind(input, ErrorKind::Slice));
return Err(ParserError::from_input(input));
}
}

if !('a'..='z', 'A'..='Z', '$').contains_token(input.chars().next().unwrap_or('\0')) {
return Err(ErrMode::from_error_kind(input, ErrorKind::Slice));
return Err(ParserError::from_input(input));
}

alt((
Expand Down Expand Up @@ -406,7 +403,7 @@ fn get_top_level_reserved_token<'a>(
let first_char = peek(any).parse_next(input)?.to_ascii_uppercase();

// Match keywords based on their first letter
let result: PResult<&str> = match first_char {
let result: Result<&str> = match first_char {
'A' => alt((
terminated("ADD", end_of_word),
terminated("AFTER", end_of_word),
Expand Down Expand Up @@ -462,10 +459,7 @@ fn get_top_level_reserved_token<'a>(
'W' => terminated("WHERE", end_of_word).parse_next(&mut uc_input),

// If the first character doesn't match any of our keywords, fail early
_ => Err(ErrMode::from_error_kind(
&uc_input,
winnow::error::ErrorKind::Tag,
)),
_ => Err(ParserError::from_input(&uc_input)),
};

if let Ok(token) = result {
Expand All @@ -490,7 +484,7 @@ fn get_top_level_reserved_token<'a>(
key: None,
})
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::Tag))
Err(ParserError::from_input(input))
}
}
}
Expand Down Expand Up @@ -554,7 +548,7 @@ fn get_newline_reserved_token<'a>(
));

// Combine all parsers
let result: PResult<&str> = alt((standard_joins, specific_joins, special_joins, operators))
let result: Result<&str> = alt((standard_joins, specific_joins, special_joins, operators))
.parse_next(&mut uc_input);

if let Ok(token) = result {
Expand All @@ -577,16 +571,16 @@ fn get_newline_reserved_token<'a>(
key: None,
})
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
Err(ParserError::from_input(input))
}
}
}

fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> Result<Token<'i>> {
let uc_input = get_uc_words(input, 2);
let mut uc_input = uc_input.as_str();

let result: PResult<&str> = alt((
let result: Result<&str> = alt((
terminated("BEGIN", end_of_word),
terminated("DECLARE", end_of_word),
terminated("INTERSECT", end_of_word),
Expand All @@ -608,19 +602,19 @@ fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> PResult<To
key: None,
})
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
Err(ParserError::from_input(input))
}
}
fn get_plain_reserved_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_plain_reserved_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
alt((get_plain_reserved_two_token, get_plain_reserved_one_token)).parse_next(input)
}
fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
let uc_input = get_uc_words(input, 1);
let mut uc_input = uc_input.as_str();

let first_char = peek(any).parse_next(input)?.to_ascii_uppercase();

let result: PResult<&str> = match first_char {
let result: Result<&str> = match first_char {
'A' => alt((
terminated("ACCESSIBLE", end_of_word),
terminated("ACTION", end_of_word),
Expand Down Expand Up @@ -995,10 +989,7 @@ fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {

'Y' => alt((terminated("YEAR_MONTH", end_of_word),)).parse_next(&mut uc_input),
// If the first character doesn't match any of our keywords, fail early
_ => Err(ErrMode::from_error_kind(
&uc_input,
winnow::error::ErrorKind::Tag,
)),
_ => Err(ParserError::from_input(&uc_input)),
};
if let Ok(token) = result {
let input_end_pos = token.len();
Expand All @@ -1009,14 +1000,14 @@ fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
key: None,
})
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
Err(ParserError::from_input(input))
}
}

fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
let uc_input = get_uc_words(input, 2);
let mut uc_input = uc_input.as_str();
let result: PResult<&str> = alt((
let result: Result<&str> = alt((
terminated("CHARACTER SET", end_of_word),
terminated("ON DELETE", end_of_word),
terminated("ON UPDATE", end_of_word),
Expand All @@ -1032,11 +1023,11 @@ fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
key: None,
})
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
Err(ParserError::from_input(input))
}
}

fn get_word_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_word_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
take_while(1.., is_word_character)
.parse_next(input)
.map(|token| Token {
Expand All @@ -1046,7 +1037,7 @@ fn get_word_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn get_operator_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_operator_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
// Define the allowed operator characters
let allowed_operators = (
'!', '<', '>', '=', '|', ':', '-', '~', '*', '&', '@', '^', '?', '#', '/', '%',
Expand All @@ -1060,7 +1051,7 @@ fn get_operator_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
.parse_next(input)
}
fn get_any_other_char<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
fn get_any_other_char<'i>(input: &mut &'i str) -> Result<Token<'i>> {
one_of(|token| token != '\n' && token != '\r')
.take()
.parse_next(input)
Expand All @@ -1071,7 +1062,7 @@ fn get_any_other_char<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
})
}

fn end_of_word<'i>(input: &mut &'i str) -> PResult<&'i str> {
fn end_of_word<'i>(input: &mut &'i str) -> Result<&'i str> {
peek(alt((
eof,
one_of(|val: char| !is_word_character(val)).take(),
Expand Down

0 comments on commit dcde8d1

Please sign in to comment.