Skip to content

Commit

Permalink
Add block start/end token and lexing #56
Browse files Browse the repository at this point in the history
  • Loading branch information
mverleg committed May 22, 2018
1 parent f33692b commit 3728f4e
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 8 deletions.
63 changes: 56 additions & 7 deletions src/mango/lexing/code_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,96 @@ use mango::io::typ::ReaderResult::*;
use mango::lexing::typ::Lexer;
use mango::lexing::typ::MaybeToken;
use mango::token::special::UnlexableToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use mango::util::codeparts::Keyword;
use std::collections::VecDeque;
use mango::util::collection::Queue;

pub struct CodeLexer<'r> {
reader: &'r mut Reader,
indent: i32,
// This is unfortunate, would not be needed with 'yield' but is now for indents
buffer: VecDeque<Tokens>,
// This is unfortunate, would not be needed with 'yield' but is now for indents.
buffer: Queue<Tokens>,
}

impl<'r> CodeLexer<'r> {
fn new(reader: &'r mut Reader) -> Self {
CodeLexer {
reader,
indent: 0,
buffer: VecDeque::with_capacity(16),
buffer: Queue::new(),
}
}

fn lex_indents(&mut self) -> MaybeToken {
let mut line_indent = 0;
while let Match(_) = self.reader.matches("\\t") {
line_indent += 1;
}
for _ in line_indent..self.indent {
// This line is dedented, make end tokens.
if let Match(_) = self.reader.matches("end") {
// If this is followed by an 'end' keyword, then that 'end' is redundant.
self.buffer
.push(Tokens::EndBlock(EndBlockToken::new(true, true)));
} else {
self.buffer
.push(Tokens::EndBlock(EndBlockToken::new(true, false)));
}
}
for _ in self.indent..line_indent {
// This line is indented, make start tokens.
self.buffer.push(Tokens::StartBlock(StartBlockToken::new()));
}
self.indent = line_indent;
self.lex()
}
}

impl<'r> Lexer<'r> for CodeLexer<'r> {
fn lex(&mut self) -> MaybeToken {
// If there is a buffer due to indentation or continuations, return from that.
if !self.buffer.is_empty() {
return MaybeToken::Token(self.buffer.pop_front().unwrap());
if let Some(token) = self.buffer.pop() {
return MaybeToken::Token(token);
}
// Past this point, we assume that hte buffer is empty. When adding stuff, pop it or re-enter lex() soon.
if let Match(word) = self.reader.matches("\\.\\.\\.") {
// Line continuation has no token, it just continues on the next line.
if let Match(word) = self.reader.matches("\\n\\r?") {
// There should always be a newline after continuations, so that they can be ignored together.
} else if let Match(word) = self.reader.matches("[^\\n]*\\n\\r?") {
return MaybeToken::Token(Tokens::Unlexable(UnlexableToken::new(word)));
} else {
// TODO: I don't know yet how to deal with continuation followed by end of file
// TODO: I don't know yet how to deal with ... followed by end-of-file
panic!()
}
// This is a new line, so there may be indents.
return self.lex_indents();
}
if let Match(word) = self.reader.matches("\\n\\r?") {
// Newline WITHOUT line continuation.
return MaybeToken::Token(Tokens::EndStatement(EndStatementToken::new_end_line()));
}
if let Match(word) = self.reader.matches(";") {
// Semicolon, which ends a statement.
// Need to do some extra work with buffer, because there may be a newline followed by indentation, which ; should precede.
self.buffer
.push(Tokens::EndStatement(EndStatementToken::new_semicolon()));
if let Match(word) = self.reader.matches("\\n\\r?") {
// If semicolon is followed by a newline (redundant), then we need to deal with indents (but ignore the newline itself).
// This will return the queue of tokens, including the semicolon.
return self.lex_indents();
}
// No newline, can just return the semicolon (which is certainly on the queue, and should be the only thing, but it is fine here if not).
return MaybeToken::Token(self.buffer.pop().unwrap());
}
//
// Indentation done; do the rest of lexing.
//
if let Match(word) = self.reader.matches("(") {
return MaybeToken::Token(Tokens::ParenthesisOpen(ParenthesisOpenToken::new()));
}
Expand Down
1 change: 0 additions & 1 deletion src/mango/lexing/typ.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use mango::io::typ::Reader;
use mango::token::Tokens;

pub enum MaybeToken {
Expand Down
6 changes: 6 additions & 0 deletions src/mango/token/collect/all.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use mango::token::special::UnlexableToken;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
use mango::token::tokens::IdentifierToken;
use mango::token::tokens::KeywordToken;
use mango::token::tokens::LiteralToken;
use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::util::encdec::ToText;

/// Collection of all possible tokens.
Expand All @@ -21,6 +23,8 @@ pub enum Tokens {
ParenthesisClose(ParenthesisCloseToken),
EndStatement(EndStatementToken),
Unlexable(UnlexableToken),
StartBlock(StartBlockToken),
EndBlock(EndBlockToken),
}

impl ToText for Tokens {
Expand All @@ -38,6 +42,8 @@ impl ToText for Tokens {
ParenthesisClose(token) => token.to_text(),
EndStatement(token) => token.to_text(),
Unlexable(token) => token.to_text(),
StartBlock(token) => token.to_text(),
EndBlock(token) => token.to_text(),
}
}
}
46 changes: 46 additions & 0 deletions src/mango/token/tokens/block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use mango::token::Token;
use mango::util::encdec::ToText;

/// Start and end of blocks, signalled e.g. by indentation.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct StartBlockToken {}

#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct EndBlockToken {
is_dedent: bool,
is_end_keyword: bool,
}

impl StartBlockToken {
pub fn new() -> Self {
StartBlockToken {}
}
}

impl EndBlockToken {
pub fn new(is_dedent: bool, is_end_keyword: bool) -> Self {
assert!(is_dedent || is_end_keyword);
EndBlockToken {
is_dedent,
is_end_keyword,
}
}
}

impl ToText for StartBlockToken {
// TODO: needs context information to render indents
fn to_text(&self) -> String {
" { ".to_owned()
}
}

impl ToText for EndBlockToken {
// TODO: needs context information to render indents
fn to_text(&self) -> String {
" } ".to_owned()
}
}

impl Token for StartBlockToken {}

impl Token for EndBlockToken {}
3 changes: 3 additions & 0 deletions src/mango/token/tokens/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ pub use self::keyword::KeywordToken;

pub mod end_statement;
pub use self::end_statement::EndStatementToken;

pub mod block;
pub use self::block::{EndBlockToken, StartBlockToken};

0 comments on commit 3728f4e

Please sign in to comment.