From 946f417fb23ecc9390e722ae1af55aac48d9e264 Mon Sep 17 00:00:00 2001
From: Mark <mverleg.noreply@gmail.com>
Date: Sun, 17 Jun 2018 21:52:29 +0200
Subject: [PATCH] Expand lexing and improvements to string utils #52

---
 src/mango/io/util.rs                |  3 ++-
 src/mango/lexing/code_lexer.rs      | 23 +++++++++------------
 src/mango/lexing/string_lexer.rs    |  2 --
 src/mango/token/tokens/literal.rs   |  2 +-
 src/mango/util/parsetxt/int.rs      |  6 +++---
 src/mango/util/parsetxt/real.rs     |  6 +++---
 src/mango/util/strslice/char_ops.rs | 32 ++++++++++++++++++++++++-----
 7 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/src/mango/io/util.rs b/src/mango/io/util.rs
index 9d8710ef..b096b333 100644
--- a/src/mango/io/util.rs
+++ b/src/mango/io/util.rs
@@ -18,7 +18,8 @@ impl RegexCache {
         if !self.cache.contains_key(subpattern) {
             match Regex::new(&format!("^ *{}", subpattern)) {
                 Err(err) => panic!(format!(
-                    "Invalid regular expression while adding to library; this is a bug:\n{:?}",
+                    "Invalid regular expression '{}' while adding to library; this is a bug:\n{:?}",
+                    subpattern,
                     err
                 )),
                 Ok(regex) => {
diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs
index 64cbf815..58c149ab 100644
--- a/src/mango/lexing/code_lexer.rs
+++ b/src/mango/lexing/code_lexer.rs
@@ -142,19 +142,16 @@ impl SubLexer for CodeLexer {
             return SubLexerResult::single(Tokens::Literal(LiteralToken::Real(value)));
         }
 
-        //        // Association (before operator)
-        //        let association_match_res = self
-        //            .reader
-        //            .borrow_mut()
-        //            .matches(&AssociationToken::subpattern());
-        //        if let Match(token) = association_match_res {
-        //            if token.chars().last().unwrap() == '=' {
-        //                //                        return Token(Tokens::Association(AssociationToken::from_str(token[..1]).unwrap()));
-        //                return Token(Tokens::Association(AssociationToken::from_unprefixed())); // TODO
-        //            } else {
-        //                return Token(Tokens::Association(AssociationToken::from_unprefixed()));
-        //            }
-        //        }
+        // Association (before operator)
+        if let Match(token) = reader.matches(&AssociationToken::subpattern()) {
+            debug_assert!(token.chars().last().unwrap() == '=');
+            if token.chars().count() > 1 {
+                panic!(); // TODO
+                return SubLexerResult::single((Tokens::Association(AssociationToken::from_unprefixed())));
+            } else {
+                return SubLexerResult::single((Tokens::Association(AssociationToken::from_unprefixed())));
+            }
+        }
         //        // Operator
         //        let operator_match_res = self
         //            .reader
diff --git a/src/mango/lexing/string_lexer.rs b/src/mango/lexing/string_lexer.rs
index ab303c7b..dbc68034 100644
--- a/src/mango/lexing/string_lexer.rs
+++ b/src/mango/lexing/string_lexer.rs
@@ -1,7 +1,5 @@
 use mango::io::typ::Reader;
 use mango::io::typ::ReaderResult::*;
-use mango::lexing::typ::Lexer;
-use mango::lexing::typ::MaybeToken;
 use mango::lexing::typ::SubLexer;
 use mango::lexing::typ::SubLexerResult;
 use mango::token::tokens::LiteralToken;
diff --git a/src/mango/token/tokens/literal.rs b/src/mango/token/tokens/literal.rs
index 814f9d9c..ba1259b3 100644
--- a/src/mango/token/tokens/literal.rs
+++ b/src/mango/token/tokens/literal.rs
@@ -43,7 +43,7 @@ impl LiteralToken {
         // TODO: do I want to allow numbers to start with a period?
         // TODO: for now, only base10 for reals (would 8b11e2 be 9*8^2 or 9*10^2?)
         // TODO: does not deal with NaN of infinity
-        r"(?:\+|-*)(?:\d(?:_?\d)*\.\d(?:_?\d)*|\d(?:_?\d)*\.|\.\d(?:_?\d)*)(?:e(?:\+|-|)\d(?:_?\d)*)?"
+        r"(?:\+|-*)(?:\d(?:_?\d)*\.\d(?:_?\d)*|\d(?:_?\d)*\.|\.\d(?:_?\d)*)(?:e(?:\+|-?)\d(?:_?\d)*)?"
     }
 
     /// Parse a string matching [subpattern_int] to an i64 integer. Overflow is possible.
diff --git a/src/mango/util/parsetxt/int.rs b/src/mango/util/parsetxt/int.rs
index 744aaca8..38bdb248 100644
--- a/src/mango/util/parsetxt/int.rs
+++ b/src/mango/util/parsetxt/int.rs
@@ -1,4 +1,4 @@
-use mango::util::strslice::char_ops::char_drop;
+use mango::util::strslice::char_ops::CharOps;
 use regex::Regex;
 
 #[derive(Debug)]
@@ -40,7 +40,7 @@ pub fn parse_int<S: Into<String>>(text: S) -> Result<i64, IntParseFailReason> {
                             // TODO: implement
                             panic!(format!(
                                 "Do not yet know how to deal with {} in base {}",
-                                char_drop(value.as_str(), &'_'),
+                                value.as_str().without_char(&'_'),
                                 base.as_str()
                             ))
                         } else {
@@ -53,7 +53,7 @@ pub fn parse_int<S: Into<String>>(text: S) -> Result<i64, IntParseFailReason> {
                 Some(value) => {
                     // This is a 'normal' (base10) value.
                     // TODO: check for over/underflow
-                    return Ok(char_drop(value.as_str(), &'_').parse::<i64>().unwrap());
+                    return Ok(value.as_str().without_char(&'_').parse::<i64>().unwrap());
                 }
             }
         }
diff --git a/src/mango/util/parsetxt/real.rs b/src/mango/util/parsetxt/real.rs
index a5de9948..50c8fc8d 100644
--- a/src/mango/util/parsetxt/real.rs
+++ b/src/mango/util/parsetxt/real.rs
@@ -1,4 +1,4 @@
-use mango::util::strslice::char_ops::char_drop;
+use mango::util::strslice::char_ops::CharOps;
 use regex::Regex;
 
 #[derive(Debug)]
@@ -29,7 +29,7 @@ pub fn parse_real<S: Into<String>>(text: S) -> Result<f64, RealParseFailReason>
     {
         None => return Err(RealParseFailReason::Invalid),
         Some(captures) => {
-            let multiplier = char_drop(captures.name("multiplier").unwrap().as_str(), &'_')
+            let multiplier = captures.name("multiplier").unwrap().as_str().without_char(&'_')
                 .parse::<f64>()
                 .unwrap();
             match captures.name("exponent") {
@@ -39,7 +39,7 @@ pub fn parse_real<S: Into<String>>(text: S) -> Result<f64, RealParseFailReason>
                 }
                 Some(exponent_match) => {
                     // This real is in exponential notation
-                    let exponent = char_drop(exponent_match.as_str(), &'_')
+                    let exponent = exponent_match.as_str().without_char(&'_')
                         .parse::<f64>()
                         .unwrap();
                     // TODO: is there a numerically smarter way to do this?
diff --git a/src/mango/util/strslice/char_ops.rs b/src/mango/util/strslice/char_ops.rs
index a2e351f4..73026377 100644
--- a/src/mango/util/strslice/char_ops.rs
+++ b/src/mango/util/strslice/char_ops.rs
@@ -1,6 +1,28 @@
-/// Remove all matching characters from the string.
-// Signature may be changed to support a set of characters, if the need arises.
-pub fn char_drop<S: Into<String>>(text: S, strip: &char) -> String {
-    let text = text.into();
-    text.chars().filter(|chr| chr != strip).collect()
+
+pub trait CharOps {
+    /// Remove all matching characters from the string.
+    // Signature may be changed to support a set of characters, if the need arises.
+    fn without_char(&self, strip: &char) -> String;
+
+    fn char_len(&self) -> usize;
+}
+
+impl<'a> CharOps for &'a str {
+    fn without_char(&self, strip: &char) -> String {
+        self.chars().filter(|chr| chr != strip).collect()
+    }
+
+    fn char_len(&self) -> usize {
+        self.chars().count()
+    }
+}
+
+impl CharOps for String {
+    fn without_char(&self, strip: &char) -> String {
+        (&self).without_char(strip)
+    }
+
+    fn char_len(&self) -> usize {
+        (&self).char_len()
+    }
 }