From 3728f4e7d27c88be05f7bf41e50617ae31666be8 Mon Sep 17 00:00:00 2001 From: Mark Date: Tue, 22 May 2018 14:23:23 +0200 Subject: [PATCH] Add block start/end token and lexing #56 --- src/mango/lexing/code_lexer.rs | 63 +++++++++++++++++++++++++++++---- src/mango/lexing/typ.rs | 1 - src/mango/token/collect/all.rs | 6 ++++ src/mango/token/tokens/block.rs | 46 ++++++++++++++++++++++++ src/mango/token/tokens/mod.rs | 3 ++ 5 files changed, 111 insertions(+), 8 deletions(-) create mode 100644 src/mango/token/tokens/block.rs diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs index 1fc3521f..97a88b1c 100644 --- a/src/mango/lexing/code_lexer.rs +++ b/src/mango/lexing/code_lexer.rs @@ -3,17 +3,20 @@ use mango::io::typ::ReaderResult::*; use mango::lexing::typ::Lexer; use mango::lexing::typ::MaybeToken; use mango::token::special::UnlexableToken; +use mango::token::tokens::EndBlockToken; +use mango::token::tokens::EndStatementToken; use mango::token::tokens::ParenthesisCloseToken; use mango::token::tokens::ParenthesisOpenToken; +use mango::token::tokens::StartBlockToken; use mango::token::Tokens; use mango::util::codeparts::Keyword; -use std::collections::VecDeque; +use mango::util::collection::Queue; pub struct CodeLexer<'r> { reader: &'r mut Reader, indent: i32, - // This is unfortunate, would not be needed with 'yield' but is now for indents - buffer: VecDeque, + // This is unfortunate, would not be needed with 'yield' but is now for indents. + buffer: Queue, } impl<'r> CodeLexer<'r> { @@ -21,17 +24,42 @@ impl<'r> CodeLexer<'r> { CodeLexer { reader, indent: 0, - buffer: VecDeque::with_capacity(16), + buffer: Queue::new(), } } + + fn lex_indents(&mut self) -> MaybeToken { + let mut line_indent = 0; + while let Match(_) = self.reader.matches("\\t") { + line_indent += 1; + } + for _ in line_indent..self.indent { + // This line is dedented, make end tokens. + if let Match(_) = self.reader.matches("end") { + // If this is followed by an 'end' keyword, then that 'end' is redundant. + self.buffer + .push(Tokens::EndBlock(EndBlockToken::new(true, true))); + } else { + self.buffer + .push(Tokens::EndBlock(EndBlockToken::new(true, false))); + } + } + for _ in self.indent..line_indent { + // This line is indented, make start tokens. + self.buffer.push(Tokens::StartBlock(StartBlockToken::new())); + } + self.indent = line_indent; + self.lex() + } } impl<'r> Lexer<'r> for CodeLexer<'r> { fn lex(&mut self) -> MaybeToken { // If there is a buffer due to indentation or continuations, return from that. - if !self.buffer.is_empty() { - return MaybeToken::Token(self.buffer.pop_front().unwrap()); + if let Some(token) = self.buffer.pop() { + return MaybeToken::Token(token); } + // Past this point, we assume that hte buffer is empty. When adding stuff, pop it or re-enter lex() soon. if let Match(word) = self.reader.matches("\\.\\.\\.") { // Line continuation has no token, it just continues on the next line. if let Match(word) = self.reader.matches("\\n\\r?") { @@ -39,11 +67,32 @@ impl<'r> Lexer<'r> for CodeLexer<'r> { } else if let Match(word) = self.reader.matches("[^\\n]*\\n\\r?") { return MaybeToken::Token(Tokens::Unlexable(UnlexableToken::new(word))); } else { - // TODO: I don't know yet how to deal with continuation followed by end of file + // TODO: I don't know yet how to deal with ... followed by end-of-file panic!() } + // This is a new line, so there may be indents. + return self.lex_indents(); + } + if let Match(word) = self.reader.matches("\\n\\r?") { + // Newline WITHOUT line continuation. + return MaybeToken::Token(Tokens::EndStatement(EndStatementToken::new_end_line())); + } + if let Match(word) = self.reader.matches(";") { + // Semicolon, which ends a statement. + // Need to do some extra work with buffer, because there may be a newline followed by indentation, which ; should precede. + self.buffer + .push(Tokens::EndStatement(EndStatementToken::new_semicolon())); + if let Match(word) = self.reader.matches("\\n\\r?") { + // If semicolon is followed by a newline (redundant), then we need to deal with indents (but ignore the newline itself). + // This will return the queue of tokens, including the semicolon. + return self.lex_indents(); + } + // No newline, can just return the semicolon (which is certainly on the queue, and should be the only thing, but it is fine here if not). + return MaybeToken::Token(self.buffer.pop().unwrap()); } + // // Indentation done; do the rest of lexing. + // if let Match(word) = self.reader.matches("(") { return MaybeToken::Token(Tokens::ParenthesisOpen(ParenthesisOpenToken::new())); } diff --git a/src/mango/lexing/typ.rs b/src/mango/lexing/typ.rs index 864db178..39295f76 100644 --- a/src/mango/lexing/typ.rs +++ b/src/mango/lexing/typ.rs @@ -1,4 +1,3 @@ -use mango::io::typ::Reader; use mango::token::Tokens; pub enum MaybeToken { diff --git a/src/mango/token/collect/all.rs b/src/mango/token/collect/all.rs index 37cd7853..17571b09 100644 --- a/src/mango/token/collect/all.rs +++ b/src/mango/token/collect/all.rs @@ -1,5 +1,6 @@ use mango::token::special::UnlexableToken; use mango::token::tokens::AssociationToken; +use mango::token::tokens::EndBlockToken; use mango::token::tokens::EndStatementToken; use mango::token::tokens::IdentifierToken; use mango::token::tokens::KeywordToken; @@ -7,6 +8,7 @@ use mango::token::tokens::LiteralToken; use mango::token::tokens::OperatorToken; use mango::token::tokens::ParenthesisCloseToken; use mango::token::tokens::ParenthesisOpenToken; +use mango::token::tokens::StartBlockToken; use mango::util::encdec::ToText; /// Collection of all possible tokens. @@ -21,6 +23,8 @@ pub enum Tokens { ParenthesisClose(ParenthesisCloseToken), EndStatement(EndStatementToken), Unlexable(UnlexableToken), + StartBlock(StartBlockToken), + EndBlock(EndBlockToken), } impl ToText for Tokens { @@ -38,6 +42,8 @@ impl ToText for Tokens { ParenthesisClose(token) => token.to_text(), EndStatement(token) => token.to_text(), Unlexable(token) => token.to_text(), + StartBlock(token) => token.to_text(), + EndBlock(token) => token.to_text(), } } } diff --git a/src/mango/token/tokens/block.rs b/src/mango/token/tokens/block.rs new file mode 100644 index 00000000..64a3041f --- /dev/null +++ b/src/mango/token/tokens/block.rs @@ -0,0 +1,46 @@ +use mango::token::Token; +use mango::util::encdec::ToText; + +/// Start and end of blocks, signalled e.g. by indentation. +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct StartBlockToken {} + +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct EndBlockToken { + is_dedent: bool, + is_end_keyword: bool, +} + +impl StartBlockToken { + pub fn new() -> Self { + StartBlockToken {} + } +} + +impl EndBlockToken { + pub fn new(is_dedent: bool, is_end_keyword: bool) -> Self { + assert!(is_dedent || is_end_keyword); + EndBlockToken { + is_dedent, + is_end_keyword, + } + } +} + +impl ToText for StartBlockToken { + // TODO: needs context information to render indents + fn to_text(&self) -> String { + " { ".to_owned() + } +} + +impl ToText for EndBlockToken { + // TODO: needs context information to render indents + fn to_text(&self) -> String { + " } ".to_owned() + } +} + +impl Token for StartBlockToken {} + +impl Token for EndBlockToken {} diff --git a/src/mango/token/tokens/mod.rs b/src/mango/token/tokens/mod.rs index 4508d768..3dfa133a 100644 --- a/src/mango/token/tokens/mod.rs +++ b/src/mango/token/tokens/mod.rs @@ -22,3 +22,6 @@ pub use self::keyword::KeywordToken; pub mod end_statement; pub use self::end_statement::EndStatementToken; + +pub mod block; +pub use self::block::{EndBlockToken, StartBlockToken};