Skip to content

Commit

Permalink
misc(lex): use const instead of lazy_static
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanbohan committed Dec 19, 2022
1 parent 0a6cbe0 commit 29073c0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 105 deletions.
39 changes: 18 additions & 21 deletions src/parser/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,12 @@
// limitations under the License.

use crate::parser::token::*;
use lazy_static::lazy_static;
use lrlex::{DefaultLexeme, LRNonStreamingLexer};
use lrpar::Lexeme;
use std::{collections::HashSet, fmt::Debug};

lazy_static! {
static ref ALL_DURATION_UNITS: HashSet<char> = HashSet::from(['s', 'm', 'h', 'd', 'w', 'y']);
static ref ALL_DURATION_BUT_YEAR_UNITS: HashSet<char> =
HashSet::from(['s', 'm', 'h', 'd', 'w']);
static ref NORMAL_ESCAPE_SYMBOL_SET: HashSet<char> = "abfnrtv\\".chars().into_iter().collect();
static ref STRING_SYMBOL_SET: HashSet<char> = HashSet::from(['"', '`', '\'']);
}
use std::fmt::Debug;

const ESCAPE_SYMBOLS: &str = r#"abfnrtv\"#;
const STRING_SYMBOLS: &str = r#"'"`"#;

pub type LexemeType = DefaultLexeme<TokenType>;

Expand Down Expand Up @@ -235,7 +229,7 @@ impl Lexer {
// If different orders result in different states, then it has to be fixed.
self.state = match self.state {
State::Start => self.start(),
State::End => panic!("End state can not shift forward."),
State::End => State::Err("End state can not shift forward.".into()),
State::Lexeme(_) => State::Start,
State::String(ch) => self.accept_string(ch),
State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
Expand Down Expand Up @@ -319,7 +313,7 @@ impl Lexer {
self.backup();
State::KeywordOrIdentifier
}
ch if STRING_SYMBOL_SET.contains(&ch) => State::String(ch),
ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
'(' => {
self.inc_paren_depth();
State::Lexeme(T_LEFT_PAREN)
Expand Down Expand Up @@ -480,7 +474,7 @@ impl Lexer {
/// true only if the char after duration is not alphanumeric.
fn accept_remaining_duration(&mut self) -> bool {
// Next two char must be a valid duration.
if !self.accept(|ch| ALL_DURATION_UNITS.contains(&ch)) {
if !self.accept(|ch| "smhdwy".contains(ch)) {
return false;
}
// Support for ms. Bad units like hs, ys will be caught when we actually
Expand All @@ -491,7 +485,7 @@ impl Lexer {
while self.accept(|ch| ch.is_ascii_digit()) {
self.accept_run(|ch| ch.is_ascii_digit());
// y is no longer in the list as it should always come first in durations.
if !self.accept(|ch| ALL_DURATION_BUT_YEAR_UNITS.contains(&ch)) {
if !self.accept(|ch| "smhdw".contains(ch)) {
return false;
}
// Support for ms. Bad units like hs, ys will be caught when we actually
Expand All @@ -508,9 +502,7 @@ impl Lexer {
// https://github.com/prometheus/prometheus/blob/0372e259baf014bbade3134fd79bcdfd8cbdef2c/promql/parser/lex.go#L552
fn accept_escape(&mut self, symbol: char) -> State {
match self.pop() {
Some(ch) if ch == symbol || NORMAL_ESCAPE_SYMBOL_SET.contains(&ch) => {
State::String(symbol)
}
Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
Some(_) => State::String(symbol),
None => State::Err("escape sequence not terminated".into()),
}
Expand Down Expand Up @@ -539,7 +531,7 @@ impl Lexer {
Some(',') => State::Lexeme(T_COMMA),
Some(ch) if ch.is_ascii_whitespace() => State::Space,
Some(ch) if is_alpha(ch) => State::Identifier,
Some(ch) if STRING_SYMBOL_SET.contains(&ch) => State::String(ch),
Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
Some('=') => match self.peek() {
Some('~') => {
self.pop();
Expand Down Expand Up @@ -657,6 +649,7 @@ mod tests {

fn assert_matches(v: Vec<MatchTuple>) {
let cases: Vec<(
&str,
Vec<Result<LexemeType, String>>,
Vec<Result<LexemeType, String>>,
)> = v
Expand All @@ -673,12 +666,16 @@ mod tests {

let actual: Vec<Result<LexemeType, String>> =
Lexer::new(input).into_iter().collect();
(expected, actual)
(input, expected, actual)
})
.collect();

for (expected, actual) in cases.iter() {
assert_eq!(expected, actual);
for (input, expected, actual) in cases.iter() {
assert_eq!(
expected, actual,
"input: {}, expected: {:?}, actual: {:?}",
input, expected, actual
);
}
}

Expand Down
84 changes: 0 additions & 84 deletions src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,99 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use lazy_static::lazy_static;
use std::collections::HashMap;
use std::fmt::{self, Display};

lrlex::lrlex_mod!("token_map");
pub use token_map::*;

pub type TokenType = u8;

lazy_static! {
static ref TOKEN_DISPLAY: HashMap<TokenType, &'static str> =
[
// Token.
(T_EQL, "="),
(T_BLANK, "_"),
(T_COLON, ":"),
(T_COMMA, ","),
(T_COMMENT, "#"),
(T_DURATION, "[du]"),
(T_EOF, "<eof>"),
(T_ERROR, "{Err}"),
(T_IDENTIFIER, "{ID}"),
(T_LEFT_BRACE, "{"),
(T_LEFT_BRACKET, "["),
(T_LEFT_PAREN, "("),
(T_METRIC_IDENTIFIER, "{Metric_ID}"),
(T_NUMBER, "{Num}"),
(T_RIGHT_BRACE, "}"),
(T_RIGHT_BRACKET, "]"),
(T_RIGHT_PAREN, ")"),
(T_SEMICOLON, ","),
(T_SPACE, "<space>"),
(T_STRING, "{Str}"),
(T_TIMES, "x"),

// Operators.
(T_ADD, "+"),
(T_DIV, "/"),
(T_EQLC, "=="),
(T_EQL_REGEX, "=~"),
(T_GTE, ">="),
(T_GTR, ">"),
(T_LAND, "and"),
(T_LOR, "or"),
(T_LSS, "<"),
(T_LTE, "<="),
(T_LUNLESS, "unless"),
(T_MOD, "%"),
(T_MUL, "*"),
(T_NEQ, "!="),
(T_NEQ_REGEX, "!~"),
(T_POW, "^"),
(T_SUB, "-"),
(T_AT, "@"),
(T_ATAN2, "atan2"),

// Aggregators.
(T_AVG, "avg"),
(T_BOTTOMK, "bottomk"),
(T_COUNT, "count"),
(T_COUNT_VALUES, "count_values"),
(T_GROUP, "group"),
(T_MAX, "max"),
(T_MIN, "min"),
(T_QUANTILE, "quantile"),
(T_STDDEV, "stddev"),
(T_STDVAR, "stdvar"),
(T_SUM, "sum"),
(T_TOPK, "topk"),

// Keywords.
(T_BOOL, "bool"),
(T_BY, "by"),
(T_GROUP_LEFT, "group_left"),
(T_GROUP_RIGHT, "group_right"),
(T_IGNORING, "ignoring"),
(T_OFFSET, "offset"),
(T_ON, "on"),
(T_WITHOUT, "without"),

// Preprocessors.
(T_START, "start"),
(T_END, "end")
].into_iter().collect();
}

/// this is for debug so far, maybe pretty feature in the future.
pub fn token_display(id: TokenType) -> &'static str {
// match TOKEN_DISPLAY.get(&id) {
// Some(&display) => display.into(),
// None => format!("unknown token id <{id}>"),
// }

match id {
// Token.
T_EQL => "=",
Expand Down

0 comments on commit 29073c0

Please sign in to comment.