From ba3acda81238f9ab0212962a590eb83b0ad1d2d7 Mon Sep 17 00:00:00 2001 From: Mark Date: Sun, 17 Jun 2018 22:05:35 +0200 Subject: [PATCH] Lexer has an infinite loop somewhere #52 --- src/mango/io/util.rs | 3 +- src/mango/lexing/code_lexer.rs | 67 ++++++++++++++++------------- src/mango/util/parsetxt/real.rs | 10 ++++- src/mango/util/strslice/char_ops.rs | 3 +- 4 files changed, 47 insertions(+), 36 deletions(-) diff --git a/src/mango/io/util.rs b/src/mango/io/util.rs index b096b333..a8cbb96d 100644 --- a/src/mango/io/util.rs +++ b/src/mango/io/util.rs @@ -19,8 +19,7 @@ impl RegexCache { match Regex::new(&format!("^ *{}", subpattern)) { Err(err) => panic!(format!( "Invalid regular expression '{}' while adding to library; this is a bug:\n{:?}", - subpattern, - err + subpattern, err )), Ok(regex) => { self.cache.insert(subpattern.to_owned(), regex); diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs index 58c149ab..05fd61ea 100644 --- a/src/mango/lexing/code_lexer.rs +++ b/src/mango/lexing/code_lexer.rs @@ -147,40 +147,45 @@ impl SubLexer for CodeLexer { debug_assert!(token.chars().last().unwrap() == '='); if token.chars().count() > 1 { panic!(); // TODO - return SubLexerResult::single((Tokens::Association(AssociationToken::from_unprefixed()))); + return SubLexerResult::single( + (Tokens::Association(AssociationToken::from_unprefixed())), + ); } else { - return SubLexerResult::single((Tokens::Association(AssociationToken::from_unprefixed()))); + return SubLexerResult::single( + (Tokens::Association(AssociationToken::from_unprefixed())), + ); } } - // // Operator - // let operator_match_res = self - // .reader - // .borrow_mut() - // .matches(OperatorToken::subpattern()); - // if let Match(token) = operator_match_res { - // return Token(Tokens::Operator(OperatorToken::from_str(&token).unwrap())); - // } - // // Grouping symbols - // if let Match(_) = reader.matches(r"\(") { - // return Token(Tokens::ParenthesisOpen(ParenthesisOpenToken::new())); - // } - // if let Match(_) = reader.matches(r"\)") { - // return Token(Tokens::ParenthesisClose(ParenthesisCloseToken::new())); - // } - // - // let unknown_word = reader.matches("[^\\s]+"); - // match unknown_word { - // Match(word) => return Token(Tokens::Unlexable(UnlexableToken::new(word))), - // NoMatch() => { - // println!("END {:?}", self.reader.borrow()); // TODO - // panic!("Do not know how to proceed with parsing") - // } - // EOF() => { - // // TODO: also dedent and end statement here - // End - // } - // } + // Operator + if let Match(token) = reader.matches(OperatorToken::subpattern()) { + return SubLexerResult::single(Tokens::Operator( + OperatorToken::from_str(&token).unwrap(), + )); + } + // Grouping symbols + if let Match(_) = reader.matches(r"\(") { + return SubLexerResult::single(Tokens::ParenthesisOpen(ParenthesisOpenToken::new())); + } + if let Match(_) = reader.matches(r"\)") { + return SubLexerResult::single(Tokens::ParenthesisClose(ParenthesisCloseToken::new())); + } - panic!() // TODO TMP + // If the code gets here, it did not recognize the text as any token + return match reader.matches(r"[^\s]+") { + Match(word) => SubLexerResult::single(Tokens::Unlexable(UnlexableToken::new(word))), + NoMatch() => { + println!("END {:?}", reader); // todo: tmp + panic!("Do not know how to proceed with parsing") + } + EOF() => { + // TODO: also dedent and end statement here + let mut tokens = vec![Tokens::EndStatement(EndStatementToken::new_end_line())]; + for _ in 0..self.indent { + // This line is dedented, make end tokens. + tokens.push(Tokens::EndBlock(EndBlockToken::new(true, false))); + } + SubLexerResult::Result(tokens) + } + }; } } diff --git a/src/mango/util/parsetxt/real.rs b/src/mango/util/parsetxt/real.rs index 50c8fc8d..b99925ff 100644 --- a/src/mango/util/parsetxt/real.rs +++ b/src/mango/util/parsetxt/real.rs @@ -29,7 +29,11 @@ pub fn parse_real>(text: S) -> Result { None => return Err(RealParseFailReason::Invalid), Some(captures) => { - let multiplier = captures.name("multiplier").unwrap().as_str().without_char(&'_') + let multiplier = captures + .name("multiplier") + .unwrap() + .as_str() + .without_char(&'_') .parse::() .unwrap(); match captures.name("exponent") { @@ -39,7 +43,9 @@ pub fn parse_real>(text: S) -> Result } Some(exponent_match) => { // This real is in exponential notation - let exponent = exponent_match.as_str().without_char(&'_') + let exponent = exponent_match + .as_str() + .without_char(&'_') .parse::() .unwrap(); // TODO: is there a numerically smarter way to do this? diff --git a/src/mango/util/strslice/char_ops.rs b/src/mango/util/strslice/char_ops.rs index 73026377..054ba7f3 100644 --- a/src/mango/util/strslice/char_ops.rs +++ b/src/mango/util/strslice/char_ops.rs @@ -1,4 +1,3 @@ - pub trait CharOps { /// Remove all matching characters from the string. // Signature may be changed to support a set of characters, if the need arises. @@ -19,10 +18,12 @@ impl<'a> CharOps for &'a str { impl CharOps for String { fn without_char(&self, strip: &char) -> String { + println!("String.without_char"); (&self).without_char(strip) } fn char_len(&self) -> usize { + println!("String.char_len"); (&self).char_len() } }