Skip to content

Commit

Permalink
Implement the combi-lexer #52
Browse files Browse the repository at this point in the history
  • Loading branch information
mverleg committed Jun 17, 2018
1 parent 71e6518 commit e661070
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 35 deletions.
36 changes: 12 additions & 24 deletions src/mango/lexing/code_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use mango::token::Tokens;
use mango::util::collection::Queue;
use std::cell::RefCell;
use std::rc::Rc;
use mango::lexing::typ::SubLexer;
use mango::lexing::typ::SubLexerResult;

// TODO: Preferably there'd be only one Lexer at a time which has a Reader, but I did not get that to work,
// TODO: see this SO question: https://stackoverflow.com/questions/50535022/borrow-checker-problems-for-parser-that-can-delegate
Expand All @@ -30,24 +32,22 @@ pub struct CodeLexer {
// reader: Rc<RefCell<Reader>>,
indent: i32,

reader: Rc<RefCell<Reader>>,
// This delegate deals with nested structures, like string literals and comments.
reader_or_delegate: ReaderOrDelegate,
// This is unfortunate, would not be needed with 'yield' but is now for indents.
buffer: Queue<Tokens>,
}

impl CodeLexer {
pub fn new(reader: Rc<RefCell<Reader>>) -> Self {
pub fn new() -> Self {
CodeLexer {
reader: reader,
reader_or_delegate: ReaderOrDelegate::Reader(),
indent: 0,
buffer: Queue::new(),
}
}

fn lex_indents(&mut self) -> MaybeToken {
fn lex_indents(&mut self, reader: Box<Reader>) -> MaybeToken {
let mut line_indent = 0;
while let Match(_) = self.reader.borrow_mut().matches("\\t") {
line_indent += 1;
Expand All @@ -73,10 +73,10 @@ impl CodeLexer {
}
}

impl Lexer for CodeLexer {
impl SubLexer for CodeLexer {
// TODO: TURN THIS AROUND: MAKE A FUNCTION THAT RETURNS FROM A QUEUE, AND CALLS ANOTHER TO FILL THE QUEUE IF NO RETURN

fn lex(&mut self) -> MaybeToken {
fn lex_pass(&mut self, reader: Box<Reader>) -> SubLexerResult {
use self::MaybeToken::*;

// If currently delegating to a sub-lexer, return from that.
Expand Down Expand Up @@ -214,12 +214,12 @@ impl Lexer for CodeLexer {
}
}

fn get_reader(&self) -> Rc<RefCell<Reader>> {
match self.reader_or_delegate {
ReaderOrDelegate::Reader() => self.reader.clone(),
ReaderOrDelegate::Delegate(ref delegate) => delegate.get_reader(),
}
}
// fn get_reader(&self) -> Rc<RefCell<Reader>> {
// match self.reader_or_delegate {
// ReaderOrDelegate::Reader() => self.reader.clone(),
// ReaderOrDelegate::Delegate(ref delegate) => delegate.get_reader(),
// }
// }
}

#[cfg(test)]
Expand All @@ -239,7 +239,6 @@ mod tests {
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use std::cell::RefCell;
use std::ops::Generator;
use std::rc::Rc;

fn assert_text_to_tokens(text: &str, tokens: Vec<Tokens>) {
Expand Down Expand Up @@ -287,15 +286,4 @@ mod tests {

#[test]
fn test_lexing_delegation() {}

#[test]
fn generators() {
let mut gen = || {
yield Tokens::Keyword(KeywordToken::from_str("let".to_owned()).unwrap());
yield Tokens::Identifier(IdentifierToken::from_str("x".to_owned()).unwrap());
yield Tokens::Association(AssociationToken::from_unprefixed());
return;
};
let first = unsafe { gen.resume() };
}
}
149 changes: 149 additions & 0 deletions src/mango/lexing/combi_lexer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
use mango::io::typ::Reader;
use mango::io::typ::ReaderResult::*;
use mango::lexing::code_lexer::CodeLexer;
use mango::lexing::string_lexer::StringLexer;
use mango::lexing::typ::Lexer;
use mango::lexing::typ::SubLexer;
use mango::lexing::typ::MaybeToken;
use mango::lexing::typ::SubLexerResult;
use mango::token::special::UnlexableToken;
use mango::token::Tokens;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
use mango::token::tokens::IdentifierToken;
use mango::token::tokens::KeywordToken;
use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::util::collection::Queue;
use mango::util::collection::Stack;
use std::cell::RefCell;
use std::rc::Rc;


pub struct CombiLexer {
reader: Box<Reader>,
lexers: Stack<Box<SubLexer>>,
buffer: Queue<Tokens>,
}

impl CombiLexer {
pub fn new(reader: Box<Reader>) -> Self {
let mut lexers: Stack<Box<SubLexer>> = Stack::new();
lexers.push(Box::new(CodeLexer::new()));
CombiLexer {
reader: reader,
lexers: lexers,
buffer: Queue::new(),
}
}
}

impl Lexer for CombiLexer {
fn lex(&mut self) -> MaybeToken {

// If there are tokens in the buffer, return from there;
if let Option::Some(token) = self.buffer.pop() {
return MaybeToken::Token(token);
}

match self.lexers.borrow_mut() {
// No more lexers to delegate to; lexing is finished.
Option::None => MaybeToken::End,
Option::Some(ref mut lexer) => {
match lexer.lex_pass(self.reader) {
SubLexerResult::Tokens(tokens) => {
if tokens.len() > 0 {
// The sublexer produced tokens, queue them.
self.buffer.append(tokens);
self.lex() // TODO: if every branch does this, move it down
} else {
// No tokens were produced; make sure the reader has advanced to prevent infinite loops.
// TODO: check reader state
self.lex()
}
},
SubLexerResult::Delegate(lexer) => {
// Switch to a different delegate lexer.
self.lexers.push(lexer);
self.lex()
},
SubLexerResult::End => {
// The sublexer is done, remove it from the stack and continue with the next.
self.lexers.pop(); // This needs non-lexical lifetimes
self.lex()
},
}
}
}
}

}

#[cfg(test)]
mod tests {
use mango::io::fortest::StringReader;
use mango::lexing::util::lex_all::{lex_all, LexList};
use mango::token::Tokens;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
use mango::token::tokens::IdentifierToken;
use mango::token::tokens::KeywordToken;
use mango::token::tokens::LiteralToken;
use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use std::cell::RefCell;
use std::rc::Rc;
use super::CombiLexer;

fn assert_text_to_tokens(text: &str, tokens: Vec<Tokens>) {
assert_eq!(
LexList::from_tokens(tokens),
lex_all(&mut CombiLexer::new(Rc::new(RefCell::new(
StringReader::new(text.to_owned())
))))
)
}

#[test]
fn test_lexing_individual() {
assert_text_to_tokens(
"if",
vec![Tokens::Keyword(
KeywordToken::from_str("if".to_owned()).unwrap(),
)],
);
// todo: more
}

#[test]
fn test_lexing_combined() {
assert_text_to_tokens(
"let x = 0\nfor x < 128\n\tx += 1",
vec![
Tokens::Keyword(KeywordToken::from_str("let".to_owned()).unwrap()),
Tokens::Identifier(IdentifierToken::from_str("x".to_owned()).unwrap()),
Tokens::Association(AssociationToken::from_unprefixed()),
Tokens::Literal(LiteralToken::Int(0)),
Tokens::EndStatement(EndStatementToken::new_end_line()),
Tokens::Keyword(KeywordToken::from_str("for".to_owned()).unwrap()),
Tokens::Operator(OperatorToken::from_str("<").unwrap()),
Tokens::Literal(LiteralToken::Int(128)),
Tokens::EndStatement(EndStatementToken::new_end_line()),
Tokens::StartBlock(StartBlockToken::new()),
Tokens::Identifier(IdentifierToken::from_str("x".to_owned()).unwrap()),
Tokens::Association(AssociationToken::from_str("+".to_owned()).unwrap()),
Tokens::Literal(LiteralToken::Int(1)),
Tokens::EndBlock(EndBlockToken::new(true, false)),
],
);
}

#[test]
fn test_lexing_delegation() {}
}
1 change: 1 addition & 0 deletions src/mango/lexing/gen_code_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use mango::util::strslice::slice::glyphat;
// TODO: this is problematic because the generator wants references to the container,
// TODO: and the container obviously stores the generator

// TODO: use generator: Box<Generator<Yield = Tokens, Return = ()>> directory
struct CodeLexer<G: Generator<Yield = Tokens, Return = ()>> {
indent: i32,
delegate: Option<Box<Lexer>>,
Expand Down
12 changes: 7 additions & 5 deletions src/mango/lexing/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
pub mod typ;
mod typ;

pub mod code_lexer;
mod combi_lexer;

pub mod comment_lexer;
mod code_lexer;

pub mod string_lexer;
mod comment_lexer;

pub mod util;
mod string_lexer;

mod util;
6 changes: 3 additions & 3 deletions src/mango/lexing/string_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl Lexer for StringLexer {
}
}

fn get_reader(&self) -> Rc<RefCell<Reader>> {
self.reader.clone()
}
// fn get_reader(&self) -> Rc<RefCell<Reader>> {
// self.reader.clone()
// }
}
18 changes: 15 additions & 3 deletions src/mango/lexing/typ.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
use mango::io::typ::Reader;
use mango::token::Tokens;
use std::cell::RefCell;
use std::rc::Rc;

// TODO: I don't want this to be public outside the crate
pub enum SubLexerResult {
Tokens(Vec<Tokens>),
Delegate(Box<SubLexer>),
End,
}

// TODO: I don't want this to be public outside the crate
pub trait SubLexer {
/// Does one iteration of a sublexer, which should either delegate or return tokens.
/// If an empty vector of tokens is returned, the reader should have advanced (to prevent infinite loops).
fn lex_pass(&mut self, reader: Box<Reader>) -> SubLexerResult;
}

pub enum MaybeToken {
Token(Tokens),
Expand All @@ -17,5 +29,5 @@ pub trait Lexer {
/// Every call to lex returns a token until the end of the input.
fn lex(&mut self) -> MaybeToken;

fn get_reader(&self) -> Rc<RefCell<Reader>>;
// fn get_reader(&self) -> Rc<RefCell<Reader>>;
}

0 comments on commit e661070

Please sign in to comment.