From 29073c0344051830f68061a1c80a68884de9a0a8 Mon Sep 17 00:00:00 2001 From: yuanbohan Date: Mon, 19 Dec 2022 21:40:12 +0800 Subject: [PATCH] misc(lex): use const instead of lazy_static --- src/parser/lex.rs | 39 ++++++++++----------- src/parser/token.rs | 84 --------------------------------------------- 2 files changed, 18 insertions(+), 105 deletions(-) diff --git a/src/parser/lex.rs b/src/parser/lex.rs index bf8f5ff..9fc3bce 100644 --- a/src/parser/lex.rs +++ b/src/parser/lex.rs @@ -13,18 +13,12 @@ // limitations under the License. use crate::parser::token::*; -use lazy_static::lazy_static; use lrlex::{DefaultLexeme, LRNonStreamingLexer}; use lrpar::Lexeme; -use std::{collections::HashSet, fmt::Debug}; - -lazy_static! { - static ref ALL_DURATION_UNITS: HashSet = HashSet::from(['s', 'm', 'h', 'd', 'w', 'y']); - static ref ALL_DURATION_BUT_YEAR_UNITS: HashSet = - HashSet::from(['s', 'm', 'h', 'd', 'w']); - static ref NORMAL_ESCAPE_SYMBOL_SET: HashSet = "abfnrtv\\".chars().into_iter().collect(); - static ref STRING_SYMBOL_SET: HashSet = HashSet::from(['"', '`', '\'']); -} +use std::fmt::Debug; + +const ESCAPE_SYMBOLS: &str = r#"abfnrtv\"#; +const STRING_SYMBOLS: &str = r#"'"`"#; pub type LexemeType = DefaultLexeme; @@ -235,7 +229,7 @@ impl Lexer { // If different orders result in different states, then it has to be fixed. self.state = match self.state { State::Start => self.start(), - State::End => panic!("End state can not shift forward."), + State::End => State::Err("End state can not shift forward.".into()), State::Lexeme(_) => State::Start, State::String(ch) => self.accept_string(ch), State::KeywordOrIdentifier => self.accept_keyword_or_identifier(), @@ -319,7 +313,7 @@ impl Lexer { self.backup(); State::KeywordOrIdentifier } - ch if STRING_SYMBOL_SET.contains(&ch) => State::String(ch), + ch if STRING_SYMBOLS.contains(ch) => State::String(ch), '(' => { self.inc_paren_depth(); State::Lexeme(T_LEFT_PAREN) @@ -480,7 +474,7 @@ impl Lexer { /// true only if the char after duration is not alphanumeric. fn accept_remaining_duration(&mut self) -> bool { // Next two char must be a valid duration. - if !self.accept(|ch| ALL_DURATION_UNITS.contains(&ch)) { + if !self.accept(|ch| "smhdwy".contains(ch)) { return false; } // Support for ms. Bad units like hs, ys will be caught when we actually @@ -491,7 +485,7 @@ impl Lexer { while self.accept(|ch| ch.is_ascii_digit()) { self.accept_run(|ch| ch.is_ascii_digit()); // y is no longer in the list as it should always come first in durations. - if !self.accept(|ch| ALL_DURATION_BUT_YEAR_UNITS.contains(&ch)) { + if !self.accept(|ch| "smhdw".contains(ch)) { return false; } // Support for ms. Bad units like hs, ys will be caught when we actually @@ -508,9 +502,7 @@ impl Lexer { // https://github.com/prometheus/prometheus/blob/0372e259baf014bbade3134fd79bcdfd8cbdef2c/promql/parser/lex.go#L552 fn accept_escape(&mut self, symbol: char) -> State { match self.pop() { - Some(ch) if ch == symbol || NORMAL_ESCAPE_SYMBOL_SET.contains(&ch) => { - State::String(symbol) - } + Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol), Some(_) => State::String(symbol), None => State::Err("escape sequence not terminated".into()), } @@ -539,7 +531,7 @@ impl Lexer { Some(',') => State::Lexeme(T_COMMA), Some(ch) if ch.is_ascii_whitespace() => State::Space, Some(ch) if is_alpha(ch) => State::Identifier, - Some(ch) if STRING_SYMBOL_SET.contains(&ch) => State::String(ch), + Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch), Some('=') => match self.peek() { Some('~') => { self.pop(); @@ -657,6 +649,7 @@ mod tests { fn assert_matches(v: Vec) { let cases: Vec<( + &str, Vec>, Vec>, )> = v @@ -673,12 +666,16 @@ mod tests { let actual: Vec> = Lexer::new(input).into_iter().collect(); - (expected, actual) + (input, expected, actual) }) .collect(); - for (expected, actual) in cases.iter() { - assert_eq!(expected, actual); + for (input, expected, actual) in cases.iter() { + assert_eq!( + expected, actual, + "input: {}, expected: {:?}, actual: {:?}", + input, expected, actual + ); } } diff --git a/src/parser/token.rs b/src/parser/token.rs index 9331c8b..697f9e7 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use lazy_static::lazy_static; -use std::collections::HashMap; use std::fmt::{self, Display}; lrlex::lrlex_mod!("token_map"); @@ -21,90 +19,8 @@ pub use token_map::*; pub type TokenType = u8; -lazy_static! { - static ref TOKEN_DISPLAY: HashMap = - [ - // Token. - (T_EQL, "="), - (T_BLANK, "_"), - (T_COLON, ":"), - (T_COMMA, ","), - (T_COMMENT, "#"), - (T_DURATION, "[du]"), - (T_EOF, ""), - (T_ERROR, "{Err}"), - (T_IDENTIFIER, "{ID}"), - (T_LEFT_BRACE, "{"), - (T_LEFT_BRACKET, "["), - (T_LEFT_PAREN, "("), - (T_METRIC_IDENTIFIER, "{Metric_ID}"), - (T_NUMBER, "{Num}"), - (T_RIGHT_BRACE, "}"), - (T_RIGHT_BRACKET, "]"), - (T_RIGHT_PAREN, ")"), - (T_SEMICOLON, ","), - (T_SPACE, ""), - (T_STRING, "{Str}"), - (T_TIMES, "x"), - - // Operators. - (T_ADD, "+"), - (T_DIV, "/"), - (T_EQLC, "=="), - (T_EQL_REGEX, "=~"), - (T_GTE, ">="), - (T_GTR, ">"), - (T_LAND, "and"), - (T_LOR, "or"), - (T_LSS, "<"), - (T_LTE, "<="), - (T_LUNLESS, "unless"), - (T_MOD, "%"), - (T_MUL, "*"), - (T_NEQ, "!="), - (T_NEQ_REGEX, "!~"), - (T_POW, "^"), - (T_SUB, "-"), - (T_AT, "@"), - (T_ATAN2, "atan2"), - - // Aggregators. - (T_AVG, "avg"), - (T_BOTTOMK, "bottomk"), - (T_COUNT, "count"), - (T_COUNT_VALUES, "count_values"), - (T_GROUP, "group"), - (T_MAX, "max"), - (T_MIN, "min"), - (T_QUANTILE, "quantile"), - (T_STDDEV, "stddev"), - (T_STDVAR, "stdvar"), - (T_SUM, "sum"), - (T_TOPK, "topk"), - - // Keywords. - (T_BOOL, "bool"), - (T_BY, "by"), - (T_GROUP_LEFT, "group_left"), - (T_GROUP_RIGHT, "group_right"), - (T_IGNORING, "ignoring"), - (T_OFFSET, "offset"), - (T_ON, "on"), - (T_WITHOUT, "without"), - - // Preprocessors. - (T_START, "start"), - (T_END, "end") - ].into_iter().collect(); -} - /// this is for debug so far, maybe pretty feature in the future. pub fn token_display(id: TokenType) -> &'static str { - // match TOKEN_DISPLAY.get(&id) { - // Some(&display) => display.into(), - // None => format!("unknown token id <{id}>"), - // } - match id { // Token. T_EQL => "=",