From 946f417fb23ecc9390e722ae1af55aac48d9e264 Mon Sep 17 00:00:00 2001 From: Mark Date: Sun, 17 Jun 2018 21:52:29 +0200 Subject: [PATCH] Expand lexing and improvements to string utils #52 --- src/mango/io/util.rs | 3 ++- src/mango/lexing/code_lexer.rs | 23 +++++++++------------ src/mango/lexing/string_lexer.rs | 2 -- src/mango/token/tokens/literal.rs | 2 +- src/mango/util/parsetxt/int.rs | 6 +++--- src/mango/util/parsetxt/real.rs | 6 +++--- src/mango/util/strslice/char_ops.rs | 32 ++++++++++++++++++++++++----- 7 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/mango/io/util.rs b/src/mango/io/util.rs index 9d8710ef..b096b333 100644 --- a/src/mango/io/util.rs +++ b/src/mango/io/util.rs @@ -18,7 +18,8 @@ impl RegexCache { if !self.cache.contains_key(subpattern) { match Regex::new(&format!("^ *{}", subpattern)) { Err(err) => panic!(format!( - "Invalid regular expression while adding to library; this is a bug:\n{:?}", + "Invalid regular expression '{}' while adding to library; this is a bug:\n{:?}", + subpattern, err )), Ok(regex) => { diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs index 64cbf815..58c149ab 100644 --- a/src/mango/lexing/code_lexer.rs +++ b/src/mango/lexing/code_lexer.rs @@ -142,19 +142,16 @@ impl SubLexer for CodeLexer { return SubLexerResult::single(Tokens::Literal(LiteralToken::Real(value))); } - // // Association (before operator) - // let association_match_res = self - // .reader - // .borrow_mut() - // .matches(&AssociationToken::subpattern()); - // if let Match(token) = association_match_res { - // if token.chars().last().unwrap() == '=' { - // // return Token(Tokens::Association(AssociationToken::from_str(token[..1]).unwrap())); - // return Token(Tokens::Association(AssociationToken::from_unprefixed())); // TODO - // } else { - // return Token(Tokens::Association(AssociationToken::from_unprefixed())); - // } - // } + // Association (before operator) + if let Match(token) = reader.matches(&AssociationToken::subpattern()) { + debug_assert!(token.chars().last().unwrap() == '='); + if token.chars().count() > 1 { + panic!(); // TODO + return SubLexerResult::single((Tokens::Association(AssociationToken::from_unprefixed()))); + } else { + return SubLexerResult::single((Tokens::Association(AssociationToken::from_unprefixed()))); + } + } // // Operator // let operator_match_res = self // .reader diff --git a/src/mango/lexing/string_lexer.rs b/src/mango/lexing/string_lexer.rs index ab303c7b..dbc68034 100644 --- a/src/mango/lexing/string_lexer.rs +++ b/src/mango/lexing/string_lexer.rs @@ -1,7 +1,5 @@ use mango::io::typ::Reader; use mango::io::typ::ReaderResult::*; -use mango::lexing::typ::Lexer; -use mango::lexing::typ::MaybeToken; use mango::lexing::typ::SubLexer; use mango::lexing::typ::SubLexerResult; use mango::token::tokens::LiteralToken; diff --git a/src/mango/token/tokens/literal.rs b/src/mango/token/tokens/literal.rs index 814f9d9c..ba1259b3 100644 --- a/src/mango/token/tokens/literal.rs +++ b/src/mango/token/tokens/literal.rs @@ -43,7 +43,7 @@ impl LiteralToken { // TODO: do I want to allow numbers to start with a period? // TODO: for now, only base10 for reals (would 8b11e2 be 9*8^2 or 9*10^2?) // TODO: does not deal with NaN of infinity - r"(?:\+|-*)(?:\d(?:_?\d)*\.\d(?:_?\d)*|\d(?:_?\d)*\.|\.\d(?:_?\d)*)(?:e(?:\+|-|)\d(?:_?\d)*)?" + r"(?:\+|-*)(?:\d(?:_?\d)*\.\d(?:_?\d)*|\d(?:_?\d)*\.|\.\d(?:_?\d)*)(?:e(?:\+|-?)\d(?:_?\d)*)?" } /// Parse a string matching [subpattern_int] to an i64 integer. Overflow is possible. diff --git a/src/mango/util/parsetxt/int.rs b/src/mango/util/parsetxt/int.rs index 744aaca8..38bdb248 100644 --- a/src/mango/util/parsetxt/int.rs +++ b/src/mango/util/parsetxt/int.rs @@ -1,4 +1,4 @@ -use mango::util::strslice::char_ops::char_drop; +use mango::util::strslice::char_ops::CharOps; use regex::Regex; #[derive(Debug)] @@ -40,7 +40,7 @@ pub fn parse_int>(text: S) -> Result { // TODO: implement panic!(format!( "Do not yet know how to deal with {} in base {}", - char_drop(value.as_str(), &'_'), + value.as_str().without_char(&'_'), base.as_str() )) } else { @@ -53,7 +53,7 @@ pub fn parse_int>(text: S) -> Result { Some(value) => { // This is a 'normal' (base10) value. // TODO: check for over/underflow - return Ok(char_drop(value.as_str(), &'_').parse::().unwrap()); + return Ok(value.as_str().without_char(&'_').parse::().unwrap()); } } } diff --git a/src/mango/util/parsetxt/real.rs b/src/mango/util/parsetxt/real.rs index a5de9948..50c8fc8d 100644 --- a/src/mango/util/parsetxt/real.rs +++ b/src/mango/util/parsetxt/real.rs @@ -1,4 +1,4 @@ -use mango::util::strslice::char_ops::char_drop; +use mango::util::strslice::char_ops::CharOps; use regex::Regex; #[derive(Debug)] @@ -29,7 +29,7 @@ pub fn parse_real>(text: S) -> Result { None => return Err(RealParseFailReason::Invalid), Some(captures) => { - let multiplier = char_drop(captures.name("multiplier").unwrap().as_str(), &'_') + let multiplier = captures.name("multiplier").unwrap().as_str().without_char(&'_') .parse::() .unwrap(); match captures.name("exponent") { @@ -39,7 +39,7 @@ pub fn parse_real>(text: S) -> Result } Some(exponent_match) => { // This real is in exponential notation - let exponent = char_drop(exponent_match.as_str(), &'_') + let exponent = exponent_match.as_str().without_char(&'_') .parse::() .unwrap(); // TODO: is there a numerically smarter way to do this? diff --git a/src/mango/util/strslice/char_ops.rs b/src/mango/util/strslice/char_ops.rs index a2e351f4..73026377 100644 --- a/src/mango/util/strslice/char_ops.rs +++ b/src/mango/util/strslice/char_ops.rs @@ -1,6 +1,28 @@ -/// Remove all matching characters from the string. -// Signature may be changed to support a set of characters, if the need arises. -pub fn char_drop>(text: S, strip: &char) -> String { - let text = text.into(); - text.chars().filter(|chr| chr != strip).collect() + +pub trait CharOps { + /// Remove all matching characters from the string. + // Signature may be changed to support a set of characters, if the need arises. + fn without_char(&self, strip: &char) -> String; + + fn char_len(&self) -> usize; +} + +impl<'a> CharOps for &'a str { + fn without_char(&self, strip: &char) -> String { + self.chars().filter(|chr| chr != strip).collect() + } + + fn char_len(&self) -> usize { + self.chars().count() + } +} + +impl CharOps for String { + fn without_char(&self, strip: &char) -> String { + (&self).without_char(strip) + } + + fn char_len(&self) -> usize { + (&self).char_len() + } }