From ed266860a4632e809aeffb94ffcdaf82343d353e Mon Sep 17 00:00:00 2001 From: Mark Date: Tue, 29 May 2018 07:20:36 +0200 Subject: [PATCH] Partially rewrite the lexer delegation to use Rc #52 --- src/mango/lexing/code_lexer.rs | 61 ++++++++++++++++---------------- src/mango/lexing/string_lexer.rs | 12 ++++--- src/mango/lexing/typ.rs | 6 +++- 3 files changed, 42 insertions(+), 37 deletions(-) diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs index 26fccf7c..8a2e3b1b 100644 --- a/src/mango/lexing/code_lexer.rs +++ b/src/mango/lexing/code_lexer.rs @@ -13,35 +13,29 @@ use mango::token::tokens::ParenthesisOpenToken; use mango::token::tokens::StartBlockToken; use mango::token::Tokens; use mango::util::collection::Queue; +use std::cell::RefCell; +use std::rc::Rc; + +// TODO: Preferably there'd be only one Lexer at a time which has a Reader, but I did not get that to work, +// TODO: see this SO question: https://stackoverflow.com/questions/50535022/borrow-checker-problems-for-parser-that-can-delegate enum ReaderOrDelegate { - Reader(Box), + Reader(Rc>), Delegate(Box), } -impl ReaderOrDelegate { - fn end_delegation(self) -> Self { - use self::ReaderOrDelegate::*; - match self { - Delegate(delegate) => Reader(delegate.consume()), - read => read, - } - } -} - pub struct CodeLexer { - // reader: Option<&'r mut Reader>, + // reader: Rc>, indent: i32, - // TODO: both of the next two would be unnecessary with generators... + // This delegate deals with nested structures, like string literals and comments. - // delegate: Option<&'r mut Lexer<'r>>, reader_or_delegate: ReaderOrDelegate, // This is unfortunate, would not be needed with 'yield' but is now for indents. buffer: Queue, } impl CodeLexer { - fn new(reader: Box) -> Self { + fn new(reader: Rc>) -> Self { CodeLexer { reader_or_delegate: ReaderOrDelegate::Reader(reader), indent: 0, @@ -49,7 +43,7 @@ impl CodeLexer { } } - fn lex_indents(&mut self, reader: &mut Box) -> MaybeToken { + fn lex_indents(&mut self, reader: &mut Reader) -> MaybeToken { let mut line_indent = 0; while let Match(_) = reader.matches("\\t") { line_indent += 1; @@ -82,13 +76,20 @@ impl Lexer for CodeLexer { // If currently delegating to a sub-lexer, return from that. match self.reader_or_delegate { ReaderOrDelegate::Delegate(ref mut delegate) => { - match delegate.lex() { + let delegated_token = delegate.lex(); + match delegated_token { + End => { + // Swap back from delegation to direct mode. + let reader = delegate.get_reader().clone(); + self.reader_or_delegate = ReaderOrDelegate::Reader(reader); + self.lex() + } Token(token) => Token(token), - End => self.lex(), } // Code to stop delegation cannot be here, because `self` is still mutably borrowed through `delegate` } - ReaderOrDelegate::Reader(ref mut reader) => { + ReaderOrDelegate::Reader(ref mut reader_refcell) => { + let mut reader = reader_refcell.borrow_mut(); // todo: maybe this branch could be a separate function? // If there is a buffer due to indentation or continuations, return from that. @@ -103,11 +104,11 @@ impl Lexer for CodeLexer { } else if let Match(word) = reader.matches("[^\\n]*\\n\\r?") { return Token(Tokens::Unlexable(UnlexableToken::new(word))); } else { - // TODO: I don't know yet how to deal with ... followed by end-of-file + // TODO: I don't know yet how to deal with '...' followed by end-of-file panic!() } // This is a new line, so there may be indents. - return self.lex_indents(&mut reader); + return self.lex_indents(reader); } if let Match(_) = reader.matches("\\n\\r?") { // Newline WITHOUT line continuation. @@ -121,7 +122,7 @@ impl Lexer for CodeLexer { if let Match(_) = reader.matches("\\n\\r?") { // If semicolon is followed by a newline (redundant), then we need to deal with indents (but ignore the newline itself). // This will return the queue of tokens, including the semicolon. - return self.lex_indents(&mut reader); + return self.lex_indents(reader); } // No newline, can just return the semicolon (which is certainly on the queue, and should be the only thing, but it is fine here if not). return Token(self.buffer.pop().unwrap()); @@ -138,11 +139,10 @@ impl Lexer for CodeLexer { return Token(Tokens::Identifier(IdentifierToken::from_str(word).unwrap())); } // Literal - if let Match(word) = reader.matches("[a-z]?\"") { - // TODO: need to keep delegating to this until it exhausts, how to do that? - self.reader_or_delegate = ReaderOrDelegate::Delegate(Box::new( - StringLexer::new_double_quoted(reader), - )); + if let Match(_) = reader.matches("[a-z]?\"") { + let sublexer: Box = + Box::new(StringLexer::new_double_quoted(reader_refcell.clone())); + self.reader_or_delegate = ReaderOrDelegate::Delegate(sublexer); return self.lex(); } // Operator @@ -163,11 +163,10 @@ impl Lexer for CodeLexer { } } - fn consume(self) -> Box { - assert!(false, "I do not think this is ever called, is it?"); + fn get_reader(&self) -> &Rc> { match self.reader_or_delegate { - ReaderOrDelegate::Reader(reader) => reader, - ReaderOrDelegate::Delegate(delegate) => delegate.consume(), + ReaderOrDelegate::Reader(reader) => &reader, + ReaderOrDelegate::Delegate(delegate) => delegate.get_reader(), } } } diff --git a/src/mango/lexing/string_lexer.rs b/src/mango/lexing/string_lexer.rs index de5c72da..4218b871 100644 --- a/src/mango/lexing/string_lexer.rs +++ b/src/mango/lexing/string_lexer.rs @@ -4,6 +4,8 @@ use mango::lexing::typ::Lexer; use mango::lexing::typ::MaybeToken; use mango::token::tokens::LiteralToken; use mango::token::Tokens; +use std::cell::RefCell; +use std::rc::Rc; pub enum StringType { SingleQuotedInline, @@ -14,13 +16,13 @@ pub enum StringType { /// Lexes a string literal token. // Starts after the opening quote and expected to consume until closing quote. pub struct StringLexer { - reader: Box, + reader: Rc>, typ: StringType, } impl StringLexer { // TODO: support other types of strings - pub fn new_double_quoted(reader: Box) -> Self { + pub fn new_double_quoted(reader: Rc>) -> Self { StringLexer { reader, typ: StringType::DoubleQuotedInline, @@ -33,14 +35,14 @@ impl Lexer for StringLexer { // TODO: perhaps there's a library that does parsing a string with escape characters // TODO: doesn't handle escaping etc at all now // TODO: this is going to have a problem if `matches` automatically eats whitespace - match self.reader.matches("[^\"\\n]*") { + match self.reader.borrow_mut().matches("[^\"\\n]*") { Match(value) => return MaybeToken::Token(Tokens::Literal(LiteralToken::string(value))), NoMatch() => panic!("failed to parse string"), // This can't really go wrong since empty pattern matches EOF() => return MaybeToken::Token(Tokens::Literal(LiteralToken::string("".to_owned()))), // Unclosed string literal, let code parser deal with it } } - fn consume(self) -> Box { - self.reader + fn get_reader(&self) -> &Rc> { + &self.reader } } diff --git a/src/mango/lexing/typ.rs b/src/mango/lexing/typ.rs index 063389e4..d82b19e1 100644 --- a/src/mango/lexing/typ.rs +++ b/src/mango/lexing/typ.rs @@ -1,5 +1,7 @@ use mango::io::typ::Reader; use mango::token::Tokens; +use std::cell::RefCell; +use std::rc::Rc; pub enum MaybeToken { Token(Tokens), @@ -10,8 +12,10 @@ pub trait Lexer { // /// Create a new lexer from a reader instance. // fn new(reader: &'r mut Reader) -> Self; + // fn new(reader: Rc>); + /// Every call to lex returns a token until the end of the input. fn lex(&mut self) -> MaybeToken; - fn consume(self) -> Box; + fn get_reader(&self) -> &Rc>; }