Skip to content

Commit

Permalink
Partially rewrite the lexer delegation to use Rc #52
Browse files Browse the repository at this point in the history
  • Loading branch information
mverleg committed May 29, 2018
1 parent 219463b commit ed26686
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 37 deletions.
61 changes: 30 additions & 31 deletions src/mango/lexing/code_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,37 @@ use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use mango::util::collection::Queue;
use std::cell::RefCell;
use std::rc::Rc;

// TODO: Preferably there'd be only one Lexer at a time which has a Reader, but I did not get that to work,
// TODO: see this SO question: https://stackoverflow.com/questions/50535022/borrow-checker-problems-for-parser-that-can-delegate

enum ReaderOrDelegate {
Reader(Box<Reader>),
Reader(Rc<RefCell<Reader>>),
Delegate(Box<Lexer>),
}

impl ReaderOrDelegate {
fn end_delegation(self) -> Self {
use self::ReaderOrDelegate::*;
match self {
Delegate(delegate) => Reader(delegate.consume()),
read => read,
}
}
}

pub struct CodeLexer {
// reader: Option<&'r mut Reader>,
// reader: Rc<RefCell<Reader>>,
indent: i32,
// TODO: both of the next two would be unnecessary with generators...

// This delegate deals with nested structures, like string literals and comments.
// delegate: Option<&'r mut Lexer<'r>>,
reader_or_delegate: ReaderOrDelegate,
// This is unfortunate, would not be needed with 'yield' but is now for indents.
buffer: Queue<Tokens>,
}

impl CodeLexer {
fn new(reader: Box<Reader>) -> Self {
fn new(reader: Rc<RefCell<Reader>>) -> Self {
CodeLexer {
reader_or_delegate: ReaderOrDelegate::Reader(reader),
indent: 0,
buffer: Queue::new(),
}
}

fn lex_indents(&mut self, reader: &mut Box<Reader>) -> MaybeToken {
fn lex_indents(&mut self, reader: &mut Reader) -> MaybeToken {
let mut line_indent = 0;
while let Match(_) = reader.matches("\\t") {
line_indent += 1;
Expand Down Expand Up @@ -82,13 +76,20 @@ impl Lexer for CodeLexer {
// If currently delegating to a sub-lexer, return from that.
match self.reader_or_delegate {
ReaderOrDelegate::Delegate(ref mut delegate) => {
match delegate.lex() {
let delegated_token = delegate.lex();
match delegated_token {
End => {
// Swap back from delegation to direct mode.
let reader = delegate.get_reader().clone();
self.reader_or_delegate = ReaderOrDelegate::Reader(reader);
self.lex()
}
Token(token) => Token(token),
End => self.lex(),
}
// Code to stop delegation cannot be here, because `self` is still mutably borrowed through `delegate`
}
ReaderOrDelegate::Reader(ref mut reader) => {
ReaderOrDelegate::Reader(ref mut reader_refcell) => {
let mut reader = reader_refcell.borrow_mut();
// todo: maybe this branch could be a separate function?

// If there is a buffer due to indentation or continuations, return from that.
Expand All @@ -103,11 +104,11 @@ impl Lexer for CodeLexer {
} else if let Match(word) = reader.matches("[^\\n]*\\n\\r?") {
return Token(Tokens::Unlexable(UnlexableToken::new(word)));
} else {
// TODO: I don't know yet how to deal with ... followed by end-of-file
// TODO: I don't know yet how to deal with '...' followed by end-of-file
panic!()
}
// This is a new line, so there may be indents.
return self.lex_indents(&mut reader);
return self.lex_indents(reader);
}
if let Match(_) = reader.matches("\\n\\r?") {
// Newline WITHOUT line continuation.
Expand All @@ -121,7 +122,7 @@ impl Lexer for CodeLexer {
if let Match(_) = reader.matches("\\n\\r?") {
// If semicolon is followed by a newline (redundant), then we need to deal with indents (but ignore the newline itself).
// This will return the queue of tokens, including the semicolon.
return self.lex_indents(&mut reader);
return self.lex_indents(reader);
}
// No newline, can just return the semicolon (which is certainly on the queue, and should be the only thing, but it is fine here if not).
return Token(self.buffer.pop().unwrap());
Expand All @@ -138,11 +139,10 @@ impl Lexer for CodeLexer {
return Token(Tokens::Identifier(IdentifierToken::from_str(word).unwrap()));
}
// Literal
if let Match(word) = reader.matches("[a-z]?\"") {
// TODO: need to keep delegating to this until it exhausts, how to do that?
self.reader_or_delegate = ReaderOrDelegate::Delegate(Box::new(
StringLexer::new_double_quoted(reader),
));
if let Match(_) = reader.matches("[a-z]?\"") {
let sublexer: Box<Lexer> =
Box::new(StringLexer::new_double_quoted(reader_refcell.clone()));
self.reader_or_delegate = ReaderOrDelegate::Delegate(sublexer);
return self.lex();
}
// Operator
Expand All @@ -163,11 +163,10 @@ impl Lexer for CodeLexer {
}
}

fn consume(self) -> Box<Reader> {
assert!(false, "I do not think this is ever called, is it?");
fn get_reader(&self) -> &Rc<RefCell<Reader>> {
match self.reader_or_delegate {
ReaderOrDelegate::Reader(reader) => reader,
ReaderOrDelegate::Delegate(delegate) => delegate.consume(),
ReaderOrDelegate::Reader(reader) => &reader,
ReaderOrDelegate::Delegate(delegate) => delegate.get_reader(),
}
}
}
12 changes: 7 additions & 5 deletions src/mango/lexing/string_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use mango::lexing::typ::Lexer;
use mango::lexing::typ::MaybeToken;
use mango::token::tokens::LiteralToken;
use mango::token::Tokens;
use std::cell::RefCell;
use std::rc::Rc;

pub enum StringType {
SingleQuotedInline,
Expand All @@ -14,13 +16,13 @@ pub enum StringType {
/// Lexes a string literal token.
// Starts after the opening quote and expected to consume until closing quote.
pub struct StringLexer {
reader: Box<Reader>,
reader: Rc<RefCell<Reader>>,
typ: StringType,
}

impl StringLexer {
// TODO: support other types of strings
pub fn new_double_quoted(reader: Box<Reader>) -> Self {
pub fn new_double_quoted(reader: Rc<RefCell<Reader>>) -> Self {
StringLexer {
reader,
typ: StringType::DoubleQuotedInline,
Expand All @@ -33,14 +35,14 @@ impl Lexer for StringLexer {
// TODO: perhaps there's a library that does parsing a string with escape characters
// TODO: doesn't handle escaping etc at all now
// TODO: this is going to have a problem if `matches` automatically eats whitespace
match self.reader.matches("[^\"\\n]*") {
match self.reader.borrow_mut().matches("[^\"\\n]*") {
Match(value) => return MaybeToken::Token(Tokens::Literal(LiteralToken::string(value))),
NoMatch() => panic!("failed to parse string"), // This can't really go wrong since empty pattern matches
EOF() => return MaybeToken::Token(Tokens::Literal(LiteralToken::string("".to_owned()))), // Unclosed string literal, let code parser deal with it
}
}

fn consume(self) -> Box<Reader> {
self.reader
fn get_reader(&self) -> &Rc<RefCell<Reader>> {
&self.reader
}
}
6 changes: 5 additions & 1 deletion src/mango/lexing/typ.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use mango::io::typ::Reader;
use mango::token::Tokens;
use std::cell::RefCell;
use std::rc::Rc;

pub enum MaybeToken {
Token(Tokens),
Expand All @@ -10,8 +12,10 @@ pub trait Lexer {
// /// Create a new lexer from a reader instance.
// fn new(reader: &'r mut Reader) -> Self;

// fn new(reader: Rc<RefCell<Reader>>);

/// Every call to lex returns a token until the end of the input.
fn lex(&mut self) -> MaybeToken;

fn consume(self) -> Box<Reader>;
fn get_reader(&self) -> &Rc<RefCell<Reader>>;
}

0 comments on commit ed26686

Please sign in to comment.