Skip to content

Commit

Permalink
Handle identifiers and keywords
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesAC committed Jan 2, 2024
1 parent fc7c3e9 commit 9952c9f
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 3 deletions.
72 changes: 72 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crust_grammar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
strum = { version = "0.25.0", features = ["derive"] }
35 changes: 33 additions & 2 deletions crust_grammar/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
pub mod token {
use std::str::FromStr;

#[derive(Debug, PartialEq)]
use strum::{EnumDiscriminants, EnumString};

#[derive(Debug, PartialEq, EnumDiscriminants)]
#[strum_discriminants(derive(EnumString))]
#[strum_discriminants(name(TokenType))]
#[strum_discriminants(strum(ascii_case_insensitive))]
pub enum Token {
// Symbols
LeftParen {
Expand Down Expand Up @@ -97,11 +103,12 @@ pub mod token {
line: usize,
},

// Keywords
Eof {
offset: usize,
line: usize,
},

// Keywords
Class {
offset: usize,
line: usize,
Expand Down Expand Up @@ -189,4 +196,28 @@ pub mod token {
value: i32,
},
}

pub fn try_as_keyword(text: &str, offset: usize, line: usize) -> Option<Token> {
match TokenType::from_str(text) {
Ok(token_type) => match token_type {
TokenType::Class => Some(Token::Class { offset, line }),
TokenType::If => Some(Token::If { offset, line }),
TokenType::Else => Some(Token::Else { offset, line }),
TokenType::True => Some(Token::True { offset, line }),
TokenType::False => Some(Token::False { offset, line }),
TokenType::Fn => Some(Token::Fn { offset, line }),
TokenType::For => Some(Token::For { offset, line }),
TokenType::Mut => Some(Token::Mut { offset, line }),
TokenType::While => Some(Token::While { offset, line }),
TokenType::Loop => Some(Token::Loop { offset, line }),
TokenType::Break => Some(Token::Break { offset, line }),
TokenType::Return => Some(Token::Return { offset, line }),
TokenType::This => Some(Token::This { offset, line }),
TokenType::Super => Some(Token::Super { offset, line }),
TokenType::Let => Some(Token::Let { offset, line }),
_ => None,
},
Err(_) => None,
}
}
}
97 changes: 96 additions & 1 deletion src/scanner.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crust_grammar::token::Token;
use crust_grammar::token::{try_as_keyword, Token};
use std::str::FromStr;

use crate::util::{CrustCoreErr, CrustCoreResult};
Expand Down Expand Up @@ -151,6 +151,11 @@ impl<'a> Scanner<'a> {
errors.push(e);
}
}
'A'..='z' => {
if let Err(e) = self.take_identifier() {
errors.push(e);
}
}
' ' | '\t' | '\r' => {}
'\n' => self.line += 1,
'\"' => {
Expand Down Expand Up @@ -273,6 +278,25 @@ impl<'a> Scanner<'a> {
self.char_at(self.current + 1)
}
}

fn take_identifier(&mut self) -> CrustCoreResult<()> {
while self.peek().is_alphanumeric() || self.peek() == '_' {
self.advance();
}
let text = &self.source[self.start..self.current];

if let Some(keyword) = try_as_keyword(text, self.start, self.line) {
self.tokens.push(keyword);
} else {
self.tokens.push(Token::Identifier {
offset: self.start,
length: self.current - self.start,
line: self.line,
value: text.to_string(),
})
}
Ok(())
}
}

#[cfg(test)]
Expand Down Expand Up @@ -466,4 +490,75 @@ mod tests {
.zip(symbols)
.for_each(|(token, symbol)| assert_eq!(*token, symbol));
}

#[test]
fn scan_identifiers() {
let symbols = vec![
Token::If { offset: 0, line: 1 },
Token::Else { offset: 3, line: 1 },
Token::For { offset: 8, line: 1 },
Token::Class {
offset: 12,
line: 1,
},
Token::Super {
offset: 18,
line: 1,
},
Token::Fn {
offset: 24,
line: 1,
},
Token::Identifier {
offset: 27,
line: 1,
length: 11,
value: "some_name_1".to_string(),
},
Token::True {
offset: 39,
line: 1,
},
Token::False {
offset: 44,
line: 1,
},
Token::Mut {
offset: 50,
line: 1,
},
Token::While {
offset: 54,
line: 1,
},
Token::Loop {
offset: 60,
line: 1,
},
Token::Break {
offset: 65,
line: 1,
},
Token::Return {
offset: 71,
line: 1,
},
Token::This {
offset: 78,
line: 1,
},
Token::Let {
offset: 83,
line: 1,
},
];
let scanner = Scanner::new("if else for class super fn some_name_1 true false mut while loop break return this let");
let tokens = scanner.scan_tokens();

tokens
.unwrap()
.iter()
.zip(symbols)
.for_each(|(token, symbol)| assert_eq!(*token, symbol));
}
}

0 comments on commit 9952c9f

Please sign in to comment.