Skip to content

Commit

Permalink
Deprecate two lexer implementations #52
Browse files Browse the repository at this point in the history
  • Loading branch information
mverleg committed Jun 17, 2018
1 parent ce4dc62 commit e66e934
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//#![feature(nll)]
#![feature(generators, generator_trait)]
#![feature(nll)]
//#![feature(generators, generator_trait)]

extern crate core;
#[macro_use]
Expand Down
304 changes: 304 additions & 0 deletions src/mango/lexing/code_lexer_prev.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@

// TODO: dead code, no longer used

use mango::io::typ::Reader;
use mango::io::typ::ReaderResult::*;
use mango::lexing::string_lexer::StringLexer;
use mango::lexing::typ::Lexer;
use mango::lexing::typ::MaybeToken;
use mango::token::special::UnlexableToken;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
use mango::token::tokens::IdentifierToken;
use mango::token::tokens::KeywordToken;
use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use mango::util::collection::Queue;
use std::cell::RefCell;
use std::rc::Rc;

// TODO: Preferably there'd be only one Lexer at a time which has a Reader, but I did not get that to work,
// TODO: see this SO question: https://stackoverflow.com/questions/50535022/borrow-checker-problems-for-parser-that-can-delegate

enum ReaderOrDelegate {
Reader(),
Delegate(Box<Lexer>),
}

pub struct CodeLexer {
// reader: Rc<RefCell<Reader>>,
indent: i32,

reader: Rc<RefCell<Reader>>,
// This delegate deals with nested structures, like string literals and comments.
reader_or_delegate: ReaderOrDelegate,
// This is unfortunate, would not be needed with 'yield' but is now for indents.
buffer: Queue<Tokens>,
}

impl CodeLexer {
pub fn new(reader: Rc<RefCell<Reader>>) -> Self {
CodeLexer {
reader: reader,
reader_or_delegate: ReaderOrDelegate::Reader(),
indent: 0,
buffer: Queue::new(),
}
}

fn lex_indents(&mut self) -> MaybeToken {
let mut line_indent = 0;
while let Match(_) = self.reader.borrow_mut().matches("\\t") {
line_indent += 1;
}
for _ in line_indent..self.indent {
// This line is dedented, make end tokens.
// TODO: turn this "new" into a constant
if let Match(_) = self.reader.borrow_mut().matches("end") {
// If this is followed by an 'end' keyword, then that 'end' is redundant.
self.buffer
.push(Tokens::EndBlock(EndBlockToken::new(true, true)));
} else {
self.buffer
.push(Tokens::EndBlock(EndBlockToken::new(true, false)));
}
}
for _ in self.indent..line_indent {
// This line is indented, make start tokens.
self.buffer.push(Tokens::StartBlock(StartBlockToken::new()));
}
self.indent = line_indent;
self.lex()
}
}

impl Lexer for CodeLexer {
// TODO: TURN THIS AROUND: MAKE A FUNCTION THAT RETURNS FROM A QUEUE, AND CALLS ANOTHER TO FILL THE QUEUE IF NO RETURN

fn lex(&mut self) -> MaybeToken {
use self::MaybeToken::*;

// If currently delegating to a sub-lexer, return from that.
match self.reader_or_delegate {
ReaderOrDelegate::Delegate(ref mut delegate) => {
let delegated_token = delegate.lex();
match delegated_token {
End => {
// Swap back from delegation to direct mode.
// let reader = delegate.get_reader().clone();
self.reader_or_delegate = ReaderOrDelegate::Reader();
self.lex()
}
Token(token) => Token(token),
}
// Code to stop delegation cannot be here, because `self` is still mutably borrowed through `delegate`
}
ReaderOrDelegate::Reader() => {
// todo: maybe this branch could be a separate function?

// If there is a buffer due to indentation or continuations, return from that.
if let Some(token) = self.buffer.pop() {
return Token(token);
}
// Past this point, we assume that hte buffer is empty. When adding stuff, pop it or re-enter lex() soon.
let continue_match_res = self.reader.borrow_mut().matches("\\.\\.\\.");
if let Match(_) = continue_match_res {
// Line continuation has no token, it just continues on the next line.
let newline_match_res = self.reader.borrow_mut().matches("\\n\\r?");
if let Match(_) = newline_match_res {
// There should always be a newline after continuations, so that they can be ignored together.
} else {
let newline_match_res = self.reader.borrow_mut().matches("[^\\n]*\\n\\r?");
if let Match(word) = newline_match_res {
self.buffer
.push(Tokens::Unlexable(UnlexableToken::new(word)));
// This is a new line, so there may be indents.
self.lex_indents();
return self.lex();
} else {
// TODO: I don't know yet how to deal with '...' followed by end-of-file
panic!()
}
}
}
let newline_match_res = self.reader.borrow_mut().matches("\\n\\r?");
if let Match(_) = newline_match_res {
// Newline WITHOUT line continuation.
// This is a new line, so there may be indents.
self.buffer
.push(Tokens::EndStatement(EndStatementToken::new_end_line()));
self.lex_indents();
return self.lex();
}
let end_statement_match_res = self.reader.borrow_mut().matches(";");
if let Match(_) = end_statement_match_res {
// Semicolon, which ends a statement.
// Need to do some extra work with buffer, because there may be a newline followed by indentation, which ; should precede.
self.buffer
.push(Tokens::EndStatement(EndStatementToken::new_semicolon()));
let end_line_match_res = self.reader.borrow_mut().matches("\\n\\r?");
if let Match(_) = end_line_match_res {
// If semicolon is followed by a newline (redundant), then we need to deal with indents (but ignore the newline itself).
// This will return the queue of tokens, including the semicolon.
return self.lex_indents();
}
// No newline, can just return the semicolon (which is certainly on the queue, and should be the only thing, but it is fine here if not).
return Token(self.buffer.pop().unwrap());
}
//
// Indentation done; do the rest of lexing.
//
// Parse identifiers and keywords. This assumes that keywords are a subset of identifiers.
if let Match(word) = self
.reader
.borrow_mut()
.matches(IdentifierToken::subpattern())
{
// later: maybe turn identifier into keyword to avoid a string copy? kind of elaborate...
if let Ok(keyword) = KeywordToken::from_str(word.clone()) {
return Token(Tokens::Keyword(keyword));
}
return Token(Tokens::Identifier(IdentifierToken::from_str(word).unwrap()));
}
// Literal
let string_match_res = self.reader.borrow_mut().matches("[a-z]?\"");
if let Match(_) = string_match_res {
let sublexer: Box<Lexer> =
Box::new(StringLexer::new_double_quoted(self.reader.clone()));
self.reader_or_delegate = ReaderOrDelegate::Delegate(sublexer);
return self.lex();
}
// Association (before operator)
let association_match_res = self
.reader
.borrow_mut()
.matches(&AssociationToken::subpattern());
if let Match(token) = association_match_res {
if token.chars().last().unwrap() == '=' {
// return Token(Tokens::Association(AssociationToken::from_str(token[..1]).unwrap()));
return Token(Tokens::Association(AssociationToken::from_unprefixed())); // TODO
} else {
return Token(Tokens::Association(AssociationToken::from_unprefixed()));
}
}
// Operator
let operator_match_res = self
.reader
.borrow_mut()
.matches(OperatorToken::subpattern());
if let Match(token) = operator_match_res {
return Token(Tokens::Operator(OperatorToken::from_str(&token).unwrap()));
}
// Grouping symbols
if let Match(_) = self.reader.borrow_mut().matches(r"\(") {
return Token(Tokens::ParenthesisOpen(ParenthesisOpenToken::new()));
}
if let Match(_) = self.reader.borrow_mut().matches(r"\)") {
return Token(Tokens::ParenthesisClose(ParenthesisCloseToken::new()));
}

let unknown_word = self.reader.borrow_mut().matches("[^\\s]+");
match unknown_word {
Match(word) => return Token(Tokens::Unlexable(UnlexableToken::new(word))),
NoMatch() => {
println!("END {:?}", self.reader.borrow()); // TODO
panic!("Do not know how to proceed with parsing")
}
EOF() => {
// TODO: also dedent and end statement here
End
}
}
}
}
}

fn get_reader(&self) -> Rc<RefCell<Reader>> {
match self.reader_or_delegate {
ReaderOrDelegate::Reader() => self.reader.clone(),
ReaderOrDelegate::Delegate(ref delegate) => delegate.get_reader(),
}
}
}

#[cfg(test)]
mod tests {
use super::CodeLexer;
use mango::io::fortest::StringReader;
use mango::lexing::util::lex_all::{lex_all, LexList};
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
use mango::token::tokens::IdentifierToken;
use mango::token::tokens::KeywordToken;
use mango::token::tokens::LiteralToken;
use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use std::cell::RefCell;
use std::ops::Generator;
use std::rc::Rc;

fn assert_text_to_tokens(text: &str, tokens: Vec<Tokens>) {
assert_eq!(
LexList::from_tokens(tokens),
lex_all(&mut CodeLexer::new(Rc::new(RefCell::new(
StringReader::new(text.to_owned())
))))
)
}

#[test]
fn test_lexing_individual() {
assert_text_to_tokens(
"if",
vec![Tokens::Keyword(
KeywordToken::from_str("if".to_owned()).unwrap(),
)],
);
// todo: more
}

#[test]
fn test_lexing_combined() {
assert_text_to_tokens(
"let x = 0\nfor x < 128\n\tx += 1",
vec![
Tokens::Keyword(KeywordToken::from_str("let".to_owned()).unwrap()),
Tokens::Identifier(IdentifierToken::from_str("x".to_owned()).unwrap()),
Tokens::Association(AssociationToken::from_unprefixed()),
Tokens::Literal(LiteralToken::Int(0)),
Tokens::EndStatement(EndStatementToken::new_end_line()),
Tokens::Keyword(KeywordToken::from_str("for".to_owned()).unwrap()),
Tokens::Operator(OperatorToken::from_str("<").unwrap()),
Tokens::Literal(LiteralToken::Int(128)),
Tokens::EndStatement(EndStatementToken::new_end_line()),
Tokens::StartBlock(StartBlockToken::new()),
Tokens::Identifier(IdentifierToken::from_str("x".to_owned()).unwrap()),
Tokens::Association(AssociationToken::from_str("+".to_owned()).unwrap()),
Tokens::Literal(LiteralToken::Int(1)),
Tokens::EndBlock(EndBlockToken::new(true, false)),
],
);
}

#[test]
fn test_lexing_delegation() {}

#[test]
fn generators() {
let mut gen = || {
yield Tokens::Keyword(KeywordToken::from_str("let".to_owned()).unwrap());
yield Tokens::Identifier(IdentifierToken::from_str("x".to_owned()).unwrap());
yield Tokens::Association(AssociationToken::from_unprefixed());
return;
};
let first = unsafe { gen.resume() };
}
}
3 changes: 3 additions & 0 deletions src/mango/lexing/gen_code_lexer.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@

// TODO: dead code, no longer used

use mango::io::typ::Reader;
use mango::io::typ::ReaderResult::*;
use mango::lexing::string_lexer::StringLexer;
Expand Down
1 change: 0 additions & 1 deletion src/mango/lexing/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
pub mod typ;

pub mod code_lexer;
mod gen_code_lexer;

pub mod comment_lexer;

Expand Down

0 comments on commit e66e934

Please sign in to comment.