From 9952c9f1f6082252918411111102ae0ac82c7c4d Mon Sep 17 00:00:00 2001 From: James Cox Date: Tue, 2 Jan 2024 20:36:25 +0000 Subject: [PATCH] Handle identifiers and keywords --- Cargo.lock | 72 +++++++++++++++++++++++++++++ crust_grammar/Cargo.toml | 1 + crust_grammar/src/lib.rs | 35 ++++++++++++++- src/scanner.rs | 97 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 202 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9201dff..e6b3373 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,3 +12,75 @@ dependencies = [ [[package]] name = "crust_grammar" version = "0.1.0" +dependencies = [ + "strum", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "proc-macro2" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/crust_grammar/Cargo.toml b/crust_grammar/Cargo.toml index 2f05313..ee38406 100644 --- a/crust_grammar/Cargo.toml +++ b/crust_grammar/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +strum = { version = "0.25.0", features = ["derive"] } diff --git a/crust_grammar/src/lib.rs b/crust_grammar/src/lib.rs index 62111e7..f5d8402 100644 --- a/crust_grammar/src/lib.rs +++ b/crust_grammar/src/lib.rs @@ -1,6 +1,12 @@ pub mod token { + use std::str::FromStr; - #[derive(Debug, PartialEq)] + use strum::{EnumDiscriminants, EnumString}; + + #[derive(Debug, PartialEq, EnumDiscriminants)] + #[strum_discriminants(derive(EnumString))] + #[strum_discriminants(name(TokenType))] + #[strum_discriminants(strum(ascii_case_insensitive))] pub enum Token { // Symbols LeftParen { @@ -97,11 +103,12 @@ pub mod token { line: usize, }, - // Keywords Eof { offset: usize, line: usize, }, + + // Keywords Class { offset: usize, line: usize, @@ -189,4 +196,28 @@ pub mod token { value: i32, }, } + + pub fn try_as_keyword(text: &str, offset: usize, line: usize) -> Option { + match TokenType::from_str(text) { + Ok(token_type) => match token_type { + TokenType::Class => Some(Token::Class { offset, line }), + TokenType::If => Some(Token::If { offset, line }), + TokenType::Else => Some(Token::Else { offset, line }), + TokenType::True => Some(Token::True { offset, line }), + TokenType::False => Some(Token::False { offset, line }), + TokenType::Fn => Some(Token::Fn { offset, line }), + TokenType::For => Some(Token::For { offset, line }), + TokenType::Mut => Some(Token::Mut { offset, line }), + TokenType::While => Some(Token::While { offset, line }), + TokenType::Loop => Some(Token::Loop { offset, line }), + TokenType::Break => Some(Token::Break { offset, line }), + TokenType::Return => Some(Token::Return { offset, line }), + TokenType::This => Some(Token::This { offset, line }), + TokenType::Super => Some(Token::Super { offset, line }), + TokenType::Let => Some(Token::Let { offset, line }), + _ => None, + }, + Err(_) => None, + } + } } diff --git a/src/scanner.rs b/src/scanner.rs index f3b06d4..7ba2ef0 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,4 @@ -use crust_grammar::token::Token; +use crust_grammar::token::{try_as_keyword, Token}; use std::str::FromStr; use crate::util::{CrustCoreErr, CrustCoreResult}; @@ -151,6 +151,11 @@ impl<'a> Scanner<'a> { errors.push(e); } } + 'A'..='z' => { + if let Err(e) = self.take_identifier() { + errors.push(e); + } + } ' ' | '\t' | '\r' => {} '\n' => self.line += 1, '\"' => { @@ -273,6 +278,25 @@ impl<'a> Scanner<'a> { self.char_at(self.current + 1) } } + + fn take_identifier(&mut self) -> CrustCoreResult<()> { + while self.peek().is_alphanumeric() || self.peek() == '_' { + self.advance(); + } + let text = &self.source[self.start..self.current]; + + if let Some(keyword) = try_as_keyword(text, self.start, self.line) { + self.tokens.push(keyword); + } else { + self.tokens.push(Token::Identifier { + offset: self.start, + length: self.current - self.start, + line: self.line, + value: text.to_string(), + }) + } + Ok(()) + } } #[cfg(test)] @@ -466,4 +490,75 @@ mod tests { .zip(symbols) .for_each(|(token, symbol)| assert_eq!(*token, symbol)); } + + #[test] + fn scan_identifiers() { + let symbols = vec![ + Token::If { offset: 0, line: 1 }, + Token::Else { offset: 3, line: 1 }, + Token::For { offset: 8, line: 1 }, + Token::Class { + offset: 12, + line: 1, + }, + Token::Super { + offset: 18, + line: 1, + }, + Token::Fn { + offset: 24, + line: 1, + }, + Token::Identifier { + offset: 27, + line: 1, + length: 11, + value: "some_name_1".to_string(), + }, + Token::True { + offset: 39, + line: 1, + }, + Token::False { + offset: 44, + line: 1, + }, + Token::Mut { + offset: 50, + line: 1, + }, + Token::While { + offset: 54, + line: 1, + }, + Token::Loop { + offset: 60, + line: 1, + }, + Token::Break { + offset: 65, + line: 1, + }, + Token::Return { + offset: 71, + line: 1, + }, + Token::This { + offset: 78, + line: 1, + }, + Token::Let { + offset: 83, + line: 1, + }, + ]; + let scanner = Scanner::new("if else for class super fn some_name_1 true false mut while loop break return this let"); + let tokens = scanner.scan_tokens(); + + tokens + .unwrap() + .iter() + .zip(symbols) + .for_each(|(token, symbol)| assert_eq!(*token, symbol)); + } }