Skip to content

Commit

Permalink
More code lexing functionality reactivated #52
Browse files Browse the repository at this point in the history
  • Loading branch information
mverleg committed Jun 17, 2018
1 parent 491e214 commit 10650d5
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 118 deletions.
199 changes: 100 additions & 99 deletions src/mango/lexing/code_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use mango::lexing::typ::MaybeToken;
use mango::lexing::typ::SubLexer;
use mango::lexing::typ::SubLexerResult;
use mango::token::special::UnlexableToken;
use mango::token::Tokens;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
Expand All @@ -16,6 +15,7 @@ use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use mango::util::collection::Queue;
use std::cell::RefCell;
use std::rc::Rc;
Expand Down Expand Up @@ -61,6 +61,13 @@ impl CodeLexer {
self.indent = line_indent;
tokens
}

fn token_and_indents(&mut self, reader: &mut Box<Reader>, token: Tokens) -> SubLexerResult {
let mut tokens: Vec<Tokens> = vec![token];
// This is a new line, so there may be indents.
tokens.append(&mut self.lex_indents(reader));
return SubLexerResult::Result(tokens);
}
}

impl SubLexer for CodeLexer {
Expand All @@ -70,115 +77,109 @@ impl SubLexer for CodeLexer {
// TODO: put all these match results inline

// End of line continuation
let continue_match_res = reader.matches(r"\.\.\.");
if let Match(_) = continue_match_res {
if let Match(_) = reader.matches(r"\.\.\.") {
// Line continuation has no token, it just continues on the next line, ignoring indents (for now).
let newline_match_res = reader.matches(r"\n\r?\t*");
if let Match(_) = newline_match_res {
if let Match(_) = reader.matches(r"\n\r?\t*") {
// There should always be a newline after continuations, so that they can be ignored together.
} else {
// The rest of this line is unparsable.
let newline_match_res = reader.matches("[^\\n]*\\n\\r?");
if let Match(word) = newline_match_res {
let mut res: Vec<Tokens> = vec![Tokens::Unlexable(UnlexableToken::new(word))];
if let Match(word) = reader.matches("[^\\n]*\\n\\r?") {
// This is a new line, so there may be indents.
res.append(&mut self.lex_indents(reader));
return Result(res);
return self
.token_and_indents(reader, Tokens::Unlexable(UnlexableToken::new(word)));
} else {
// TODO: I don't know yet how to deal with '...' followed by end-of-file
panic!()
}
}
}
// Newlines
if let Match(_) = reader.matches("\\n\\r?") {
// Newline WITHOUT line continuation.
// This is a new line, so there may be indents.
return self.token_and_indents(
reader,
Tokens::EndStatement(EndStatementToken::new_end_line()),
);
}
// End of statement
if let Match(_) = reader.matches(";") {
// Semicolon, which ends a statement.
if let Match(_) = reader.matches("\\n\\r?") {
// If semicolon is followed by a newline, it is redundant. Deal with indents (but ignore the newline itself).
return self.token_and_indents(
reader,
Tokens::EndStatement(EndStatementToken::new_semicolon()),
);
} else {
return SubLexerResult::single(Tokens::EndStatement(
EndStatementToken::new_semicolon(),
));
}
}
//
// Indentation done; do the rest of lexing.
//
// Parse identifiers and keywords. This assumes that keywords are a subset of identifiers.
if let Match(word) = reader.matches(IdentifierToken::subpattern()) {
// TODO: maybe turn identifier into keyword to avoid a string copy? kind of elaborate...
if let Ok(keyword) = KeywordToken::from_str(word.clone()) {
return SubLexerResult::single(Tokens::Keyword(keyword));
}
return SubLexerResult::single(Tokens::Identifier(
IdentifierToken::from_str(word).unwrap(),
));
}
// // Literal
// let string_match_res = reader.matches("[a-z]?\"");
// if let Match(_) = string_match_res {
// let sublexer: Box<Lexer> =
// Box::new(StringLexer::new_double_quoted(self.reader.clone()));
// self.reader_or_delegate = ReaderOrDelegate::Delegate(sublexer);
// return self.lex();
// }
// // Association (before operator)
// let association_match_res = self
// .reader
// .borrow_mut()
// .matches(&AssociationToken::subpattern());
// if let Match(token) = association_match_res {
// if token.chars().last().unwrap() == '=' {
// // return Token(Tokens::Association(AssociationToken::from_str(token[..1]).unwrap()));
// return Token(Tokens::Association(AssociationToken::from_unprefixed())); // TODO
// } else {
// return Token(Tokens::Association(AssociationToken::from_unprefixed()));
// }
// }
// // Operator
// let operator_match_res = self
// .reader
// .borrow_mut()
// .matches(OperatorToken::subpattern());
// if let Match(token) = operator_match_res {
// return Token(Tokens::Operator(OperatorToken::from_str(&token).unwrap()));
// }
// // Grouping symbols
// if let Match(_) = reader.matches(r"\(") {
// return Token(Tokens::ParenthesisOpen(ParenthesisOpenToken::new()));
// }
// if let Match(_) = reader.matches(r"\)") {
// return Token(Tokens::ParenthesisClose(ParenthesisCloseToken::new()));
// }
//
// let unknown_word = reader.matches("[^\\s]+");
// match unknown_word {
// Match(word) => return Token(Tokens::Unlexable(UnlexableToken::new(word))),
// NoMatch() => {
// println!("END {:?}", self.reader.borrow()); // TODO
// panic!("Do not know how to proceed with parsing")
// }
// EOF() => {
// // TODO: also dedent and end statement here
// End
// }
// }

panic!();
// let newline_match_res = reader.matches("\\n\\r?");
// if let Match(_) = newline_match_res {
// // Newline WITHOUT line continuation.
// // This is a new line, so there may be indents.
// self.buffer
// .push(Tokens::EndStatement(EndStatementToken::new_end_line()));
// self.lex_indents();
// return self.lex();
// }
// let end_statement_match_res = reader.matches(";");
// if let Match(_) = end_statement_match_res {
// // Semicolon, which ends a statement.
// // Need to do some extra work with buffer, because there may be a newline followed by indentation, which ; should precede.
// self.buffer
// .push(Tokens::EndStatement(EndStatementToken::new_semicolon()));
// let end_line_match_res = reader.matches("\\n\\r?");
// if let Match(_) = end_line_match_res {
// // If semicolon is followed by a newline (redundant), then we need to deal with indents (but ignore the newline itself).
// // This will return the queue of tokens, including the semicolon.
// return self.lex_indents();
// }
// // No newline, can just return the semicolon (which is certainly on the queue, and should be the only thing, but it is fine here if not).
// return Token(self.buffer.pop().unwrap());
// }
// //
// // Indentation done; do the rest of lexing.
// //
// // Parse identifiers and keywords. This assumes that keywords are a subset of identifiers.
// if let Match(word) = self
// .reader
// .borrow_mut()
// .matches(IdentifierToken::subpattern())
// {
// // later: maybe turn identifier into keyword to avoid a string copy? kind of elaborate...
// if let Ok(keyword) = KeywordToken::from_str(word.clone()) {
// return Token(Tokens::Keyword(keyword));
// }
// return Token(Tokens::Identifier(IdentifierToken::from_str(word).unwrap()));
// }
// // Literal
// let string_match_res = reader.matches("[a-z]?\"");
// if let Match(_) = string_match_res {
// let sublexer: Box<Lexer> =
// Box::new(StringLexer::new_double_quoted(self.reader.clone()));
// self.reader_or_delegate = ReaderOrDelegate::Delegate(sublexer);
// return self.lex();
// }
// // Association (before operator)
// let association_match_res = self
// .reader
// .borrow_mut()
// .matches(&AssociationToken::subpattern());
// if let Match(token) = association_match_res {
// if token.chars().last().unwrap() == '=' {
// // return Token(Tokens::Association(AssociationToken::from_str(token[..1]).unwrap()));
// return Token(Tokens::Association(AssociationToken::from_unprefixed())); // TODO
// } else {
// return Token(Tokens::Association(AssociationToken::from_unprefixed()));
// }
// }
// // Operator
// let operator_match_res = self
// .reader
// .borrow_mut()
// .matches(OperatorToken::subpattern());
// if let Match(token) = operator_match_res {
// return Token(Tokens::Operator(OperatorToken::from_str(&token).unwrap()));
// }
// // Grouping symbols
// if let Match(_) = reader.matches(r"\(") {
// return Token(Tokens::ParenthesisOpen(ParenthesisOpenToken::new()));
// }
// if let Match(_) = reader.matches(r"\)") {
// return Token(Tokens::ParenthesisClose(ParenthesisCloseToken::new()));
// }
//
// let unknown_word = reader.matches("[^\\s]+");
// match unknown_word {
// Match(word) => return Token(Tokens::Unlexable(UnlexableToken::new(word))),
// NoMatch() => {
// println!("END {:?}", self.reader.borrow()); // TODO
// panic!("Do not know how to proceed with parsing")
// }
// EOF() => {
// // TODO: also dedent and end statement here
// End
// }
// }
panic!() // TODO TMP
}
}
25 changes: 11 additions & 14 deletions src/mango/lexing/combi_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ use mango::io::typ::ReaderResult::*;
use mango::lexing::code_lexer::CodeLexer;
use mango::lexing::string_lexer::StringLexer;
use mango::lexing::typ::Lexer;
use mango::lexing::typ::SubLexer;
use mango::lexing::typ::MaybeToken;
use mango::lexing::typ::SubLexer;
use mango::lexing::typ::SubLexerResult;
use mango::token::special::UnlexableToken;
use mango::token::Tokens;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
Expand All @@ -17,12 +16,12 @@ use mango::token::tokens::OperatorToken;
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use mango::util::collection::Queue;
use mango::util::collection::Stack;
use std::cell::RefCell;
use std::rc::Rc;


pub struct CombiLexer {
reader: Box<Reader>,
lexers: Stack<Box<SubLexer>>,
Expand All @@ -43,7 +42,6 @@ impl CombiLexer {

impl Lexer for CombiLexer {
fn lex(&mut self) -> MaybeToken {

// If there are tokens in the buffer, return from there;
if let Option::Some(token) = self.buffer.pop() {
return MaybeToken::Token(token);
Expand All @@ -64,29 +62,28 @@ impl Lexer for CombiLexer {
// TODO: check reader state
self.lex()
}
},
}
SubLexerResult::Delegate(lexer) => {
// Switch to a different delegate lexer.
self.lexers.push(lexer);
self.lex()
},
}
SubLexerResult::End => {
// The sublexer is done, remove it from the stack and continue with the next.
self.lexers.pop(); // This needs non-lexical lifetimes
self.lexers.pop(); // This needs non-lexical lifetimes
self.lex()
},
}
}
}
}
}

}

#[cfg(test)]
mod tests {
use super::CombiLexer;
use mango::io::fortest::StringReader;
use mango::lexing::util::lex_all::{lex_all, LexList};
use mango::token::Tokens;
use mango::token::tokens::AssociationToken;
use mango::token::tokens::EndBlockToken;
use mango::token::tokens::EndStatementToken;
Expand All @@ -97,16 +94,16 @@ mod tests {
use mango::token::tokens::ParenthesisCloseToken;
use mango::token::tokens::ParenthesisOpenToken;
use mango::token::tokens::StartBlockToken;
use mango::token::Tokens;
use std::cell::RefCell;
use std::rc::Rc;
use super::CombiLexer;

fn assert_text_to_tokens(text: &str, tokens: Vec<Tokens>) {
assert_eq!(
LexList::from_tokens(tokens),
lex_all(&mut CombiLexer::new(Box::new(
StringReader::new(text.to_owned())
)))
lex_all(&mut CombiLexer::new(Box::new(StringReader::new(
text.to_owned()
))))
)
}

Expand Down
6 changes: 3 additions & 3 deletions src/mango/lexing/string_lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl Lexer for StringLexer {
}
}

// fn get_reader(&self) -> Rc<RefCell<Reader>> {
// self.reader.clone()
// }
// fn get_reader(&self) -> Rc<RefCell<Reader>> {
// self.reader.clone()
// }
}
6 changes: 6 additions & 0 deletions src/mango/lexing/typ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ pub enum SubLexerResult {
End,
}

impl SubLexerResult {
pub fn single(token: Tokens) -> Self {
SubLexerResult::Result(vec![token])
}
}

// TODO: I don't want this to be public outside the crate
pub trait SubLexer {
/// Does one iteration of a sublexer, which should either delegate or return tokens.
Expand Down
1 change: 0 additions & 1 deletion src/mango/util/collection/queue.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

/// A one-ended queue. See also [Stack].
/// This is just a wrapper around vec so nobody pushes or pops the wrong end.
pub struct Queue<T> {
Expand Down
2 changes: 1 addition & 1 deletion src/mango/util/strslice/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub fn charsliceto<S: Into<String>>(text: S, end: isize) -> String {
}

pub fn glyphat<S: Into<String>>(text: S, pos: isize) -> String {
charslice(text, pos, pos+1)
charslice(text, pos, pos + 1)
}

#[cfg(test)]
Expand Down

0 comments on commit 10650d5

Please sign in to comment.