From 28bc91ab220d0b9bb7600528cd8f8a5f419809c7 Mon Sep 17 00:00:00 2001 From: Mark Date: Sun, 17 Jun 2018 19:16:44 +0200 Subject: [PATCH] Some progress on lexing literals #52 --- src/mango/lexing/code_lexer.rs | 24 ++++++++++++++---------- src/mango/lexing/combi_lexer.rs | 14 -------------- src/mango/lexing/string_lexer.rs | 26 ++++++++++++-------------- src/mango/token/tokens/literal.rs | 13 +++++++++++++ 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs index c92c2026..64cbf815 100644 --- a/src/mango/lexing/code_lexer.rs +++ b/src/mango/lexing/code_lexer.rs @@ -6,6 +6,7 @@ use mango::lexing::typ::MaybeToken; use mango::lexing::typ::SubLexer; use mango::lexing::typ::SubLexerResult; use mango::token::special::UnlexableToken; +use mango::token::tokens::literal::LiteralToken; use mango::token::tokens::AssociationToken; use mango::token::tokens::EndBlockToken; use mango::token::tokens::EndStatementToken; @@ -17,8 +18,6 @@ use mango::token::tokens::ParenthesisOpenToken; use mango::token::tokens::StartBlockToken; use mango::token::Tokens; use mango::util::collection::Queue; -use std::cell::RefCell; -use std::rc::Rc; pub struct CodeLexer { indent: i32, @@ -130,14 +129,19 @@ impl SubLexer for CodeLexer { IdentifierToken::from_str(word).unwrap(), )); } - // // Literal - // let string_match_res = reader.matches("[a-z]?\""); - // if let Match(_) = string_match_res { - // let sublexer: Box = - // Box::new(StringLexer::new_double_quoted(self.reader.clone())); - // self.reader_or_delegate = ReaderOrDelegate::Delegate(sublexer); - // return self.lex(); - // } + // Literal + if let Match(_) = reader.matches("[a-z]?\"") { + return Delegate(Box::new(StringLexer::new_double_quoted())); + } + if let Match(nr) = reader.matches(LiteralToken::subpattern_int()) { + let value = LiteralToken::parse_int(nr); + return SubLexerResult::single(Tokens::Literal(LiteralToken::Int(value))); + } + if let Match(nr) = reader.matches(LiteralToken::subpattern_real()) { + let value = LiteralToken::parse_real(nr); + return SubLexerResult::single(Tokens::Literal(LiteralToken::Real(value))); + } + // // Association (before operator) // let association_match_res = self // .reader diff --git a/src/mango/lexing/combi_lexer.rs b/src/mango/lexing/combi_lexer.rs index 7c353ffb..9a3965ec 100644 --- a/src/mango/lexing/combi_lexer.rs +++ b/src/mango/lexing/combi_lexer.rs @@ -1,26 +1,12 @@ use mango::io::typ::Reader; -use mango::io::typ::ReaderResult::*; use mango::lexing::code_lexer::CodeLexer; -use mango::lexing::string_lexer::StringLexer; use mango::lexing::typ::Lexer; use mango::lexing::typ::MaybeToken; use mango::lexing::typ::SubLexer; use mango::lexing::typ::SubLexerResult; -use mango::token::special::UnlexableToken; -use mango::token::tokens::AssociationToken; -use mango::token::tokens::EndBlockToken; -use mango::token::tokens::EndStatementToken; -use mango::token::tokens::IdentifierToken; -use mango::token::tokens::KeywordToken; -use mango::token::tokens::OperatorToken; -use mango::token::tokens::ParenthesisCloseToken; -use mango::token::tokens::ParenthesisOpenToken; -use mango::token::tokens::StartBlockToken; use mango::token::Tokens; use mango::util::collection::Queue; use mango::util::collection::Stack; -use std::cell::RefCell; -use std::rc::Rc; pub struct CombiLexer { reader: Box, diff --git a/src/mango/lexing/string_lexer.rs b/src/mango/lexing/string_lexer.rs index 4a433a00..ab303c7b 100644 --- a/src/mango/lexing/string_lexer.rs +++ b/src/mango/lexing/string_lexer.rs @@ -2,10 +2,10 @@ use mango::io::typ::Reader; use mango::io::typ::ReaderResult::*; use mango::lexing::typ::Lexer; use mango::lexing::typ::MaybeToken; +use mango::lexing::typ::SubLexer; +use mango::lexing::typ::SubLexerResult; use mango::token::tokens::LiteralToken; use mango::token::Tokens; -use std::cell::RefCell; -use std::rc::Rc; pub enum StringType { SingleQuotedInline, @@ -16,33 +16,31 @@ pub enum StringType { /// Lexes a string literal token. // Starts after the opening quote and expected to consume until closing quote. pub struct StringLexer { - reader: Rc>, typ: StringType, } impl StringLexer { // TODO: support other types of strings - pub fn new_double_quoted(reader: Rc>) -> Self { + pub fn new_double_quoted() -> Self { StringLexer { - reader, typ: StringType::DoubleQuotedInline, } } } -impl Lexer for StringLexer { - fn lex(&mut self) -> MaybeToken { +impl SubLexer for StringLexer { + fn lex_pass(&mut self, reader: &mut Box) -> SubLexerResult { // TODO: perhaps there's a library that does parsing a string with escape characters // TODO: doesn't handle escaping etc at all now // TODO: this is going to have a problem if `matches` automatically eats whitespace - match self.reader.borrow_mut().matches("[^\"\\n]*") { - Match(value) => return MaybeToken::Token(Tokens::Literal(LiteralToken::string(value))), + match reader.matches("[^\"\\n]*") { + Match(value) => { + return SubLexerResult::single(Tokens::Literal(LiteralToken::string(value))) + } NoMatch() => panic!("failed to parse string"), // This can't really go wrong since empty pattern matches - EOF() => return MaybeToken::Token(Tokens::Literal(LiteralToken::string("".to_owned()))), // Unclosed string literal, let code parser deal with it + EOF() => { + return SubLexerResult::single(Tokens::Literal(LiteralToken::string("".to_owned()))) + } // Unclosed string literal, let code parser deal with it } } - - // fn get_reader(&self) -> Rc> { - // self.reader.clone() - // } } diff --git a/src/mango/token/tokens/literal.rs b/src/mango/token/tokens/literal.rs index 4836de63..814f9d9c 100644 --- a/src/mango/token/tokens/literal.rs +++ b/src/mango/token/tokens/literal.rs @@ -1,6 +1,8 @@ use mango::token::Token; use mango::util::encdec::ToText; use mango::util::numtype::f64eq; +use mango::util::parsetxt::int::parse_int; +use mango::util::parsetxt::real::parse_real; // LATER: it is likely that this will be refactored when the type system is in place. @@ -40,8 +42,19 @@ impl LiteralToken { pub fn subpattern_real() -> &'static str { // TODO: do I want to allow numbers to start with a period? // TODO: for now, only base10 for reals (would 8b11e2 be 9*8^2 or 9*10^2?) + // TODO: does not deal with NaN of infinity r"(?:\+|-*)(?:\d(?:_?\d)*\.\d(?:_?\d)*|\d(?:_?\d)*\.|\.\d(?:_?\d)*)(?:e(?:\+|-|)\d(?:_?\d)*)?" } + + /// Parse a string matching [subpattern_int] to an i64 integer. Overflow is possible. + pub fn parse_int(text: String) -> i64 { + parse_int(text).unwrap() + } + + /// Parse a string matching [subpattern_real] to a f64 real. Loss of precision or overflow are possible. + pub fn parse_real(text: String) -> f64eq { + f64eq::new(parse_real(text).unwrap()) + } } impl ToText for LiteralToken {