From 04ab16ab12db6a0869b146595254de8c911221a9 Mon Sep 17 00:00:00 2001 From: Mark Date: Tue, 5 Jun 2018 19:24:47 +0200 Subject: [PATCH] Lexer infrastructure for testing #52 --- src/mango/io/fortest/fromstr.rs | 1 + src/mango/io/typ.rs | 2 ++ src/mango/lexing/code_lexer.rs | 34 ++++++++++++++++++++++++++++++-- src/mango/lexing/mod.rs | 2 ++ src/mango/lexing/util/lex_all.rs | 34 ++++++++++++++++++++++++++++++++ src/mango/lexing/util/mod.rs | 1 + src/mango/util/strslice/slice.rs | 4 ---- 7 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 src/mango/lexing/util/lex_all.rs create mode 100644 src/mango/lexing/util/mod.rs diff --git a/src/mango/io/fortest/fromstr.rs b/src/mango/io/fortest/fromstr.rs index 2ca183b9..c4ee12b7 100644 --- a/src/mango/io/fortest/fromstr.rs +++ b/src/mango/io/fortest/fromstr.rs @@ -30,6 +30,7 @@ impl Reader for StringReader { REXCACHE.with(|rl| { let mut rexlib = rl.borrow_mut(); let rex = rexlib.make_or_get(subpattern); + println!("{:?}", rex); }); ReaderResult::NoMatch() // TODO } diff --git a/src/mango/io/typ.rs b/src/mango/io/typ.rs index ebe523a5..2c83c5ca 100644 --- a/src/mango/io/typ.rs +++ b/src/mango/io/typ.rs @@ -12,6 +12,8 @@ pub trait Reader { // fn equals(&mut self, texts: Vec<&str>) -> ReaderResult; /// Checks whether the code from the current position matches a regex pattern. + /// + /// This has to eventually return EOF, after which it should not be called again. fn matches(&mut self, subpattern: &str) -> ReaderResult; } diff --git a/src/mango/lexing/code_lexer.rs b/src/mango/lexing/code_lexer.rs index a371a7dc..bbf2bf86 100644 --- a/src/mango/lexing/code_lexer.rs +++ b/src/mango/lexing/code_lexer.rs @@ -38,7 +38,7 @@ pub struct CodeLexer { } impl CodeLexer { - fn new(reader: Rc>) -> Self { + pub fn new(reader: Rc>) -> Self { CodeLexer { reader: reader, reader_or_delegate: ReaderOrDelegate::Reader(), @@ -186,7 +186,13 @@ impl Lexer for CodeLexer { } // TODO: specify the unlexable word - return Token(Tokens::Unlexable(UnlexableToken::new("TODO".to_owned()))); + let unknown_word = self.reader.borrow_mut().matches(" *[^\\s]+"); + if let Match(word) = unknown_word { + return Token(Tokens::Unlexable(UnlexableToken::new(word))); + } else { + // todo: handle better someday + panic!("Do not know how to proceed with parsing"); + } } } } @@ -198,3 +204,27 @@ impl Lexer for CodeLexer { } } } + +#[cfg(test)] +mod tests { + use super::CodeLexer; + use mango::io::fortest::StringReader; + use mango::io::typ::Reader; + use mango::lexing::util::lex_all::{lex_all, LexList}; + use std::cell::RefCell; + use std::rc::Rc; + + #[test] + fn test_lexing() { + assert_eq!( + LexList::from_tokens(vec![]), + lex_all(Rc::new(RefCell::new(StringReader::new( + "let x = 0\nfor x < 128\n\tx += 1\n".to_owned(), + )))) + ) + // assert_eq!(1, cnt, "No item in ProblemCollector"); + } + + #[test] + fn test_lexing_delegation() {} +} diff --git a/src/mango/lexing/mod.rs b/src/mango/lexing/mod.rs index bb88a815..254d9a1c 100644 --- a/src/mango/lexing/mod.rs +++ b/src/mango/lexing/mod.rs @@ -5,3 +5,5 @@ pub mod code_lexer; pub mod comment_lexer; pub mod string_lexer; + +pub mod util; diff --git a/src/mango/lexing/util/lex_all.rs b/src/mango/lexing/util/lex_all.rs new file mode 100644 index 00000000..99481231 --- /dev/null +++ b/src/mango/lexing/util/lex_all.rs @@ -0,0 +1,34 @@ +use mango::io::typ::Reader; +use mango::lexing::code_lexer::CodeLexer; +use mango::lexing::typ::Lexer; +use mango::lexing::typ::MaybeToken; +use mango::token::Token; +use mango::token::Tokens; +use std::cell::RefCell; +use std::rc::Rc; + +/// Represents all the lex tokens in a source. +#[derive(PartialEq, Eq, Debug)] +pub struct LexList { + tokens: Vec, +} + +impl LexList { + pub fn from_tokens(tokens: Vec) -> Self { + LexList { tokens } + } + + pub fn from_reader(reader: Rc>) -> Self { + lex_all(reader) + } +} + +pub fn lex_all(reader: Rc>) -> LexList { + let mut list = Vec::with_capacity(512); + let mut lexer = CodeLexer::new(reader); + while let MaybeToken::Token(token) = lexer.lex() { + list.push(token) + } + list.shrink_to_fit(); + LexList { tokens: list } +} diff --git a/src/mango/lexing/util/mod.rs b/src/mango/lexing/util/mod.rs new file mode 100644 index 00000000..52be7fa1 --- /dev/null +++ b/src/mango/lexing/util/mod.rs @@ -0,0 +1 @@ +pub mod lex_all; diff --git a/src/mango/util/strslice/slice.rs b/src/mango/util/strslice/slice.rs index 3260c401..58022f26 100644 --- a/src/mango/util/strslice/slice.rs +++ b/src/mango/util/strslice/slice.rs @@ -12,10 +12,6 @@ pub fn charslice>(text: S, start: isize, end: isize) -> String { -start as usize <= charcount, "charslice: if 'start' is negative, the magnitude may not exceed the length" ); - println!( - ">> charcount as isize + start = {} + {}", - charcount as isize, start - ); from = (charcount as isize + start) as usize; } else { from = start as usize;