From 0857862981e9eb34fcd720655d9564f95d50f064 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sat, 29 Jun 2024 02:58:48 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20(parser):=20Work=20on=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit semver: chore --- src/files.rs | 73 +++++++++++++++++++---------------------- src/parser/macros.rs | 12 ++++--- src/parser/statement.rs | 17 ++++++++-- src/scanner/lexeme.rs | 14 ++++++++ test.som | 2 +- 5 files changed, 69 insertions(+), 49 deletions(-) diff --git a/src/files.rs b/src/files.rs index d3d47d3..883e85b 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1,21 +1,34 @@ use std::collections::HashMap; -#[derive(Default)] +use codespan_reporting::files::SimpleFiles; + pub struct Files<'a> { - pub files: HashMap<&'a str, &'a str>, + files: SimpleFiles<&'a str, &'a str>, + file_handles: HashMap<&'a str, usize>, +} + +impl<'a> Default for Files<'a> { + fn default() -> Self { + Self { + files: SimpleFiles::new(), + file_handles: HashMap::new(), + } + } } impl<'a> Files<'a> { pub fn insert(&mut self, file_id: &'a str, source: &'a str) { - self.files.insert(file_id, source); + let handle = self.files.add(source, source); + self.file_handles.insert(file_id, handle); } - pub fn file_ids(&self) -> Vec<&'a str> { - self.files.keys().copied().collect() + pub fn get(&self, file_id: impl Into<&'a str>) -> Option<&'a str> { + let handle = self.file_handles.get(file_id.into())?; + Some(self.files.get(*handle).unwrap().source()) } - pub fn get(&self, file_id: impl Into<&'a str>) -> Option<&'a str> { - self.files.get(file_id.into()).copied() + pub fn file_ids<'b>(&'b self) -> impl Iterator + 'b { + self.file_handles.keys().copied() } } @@ -25,21 +38,15 @@ impl<'a> codespan_reporting::files::Files<'a> for Files<'a> { type Source = &'a str; fn name(&'a self, id: Self::FileId) -> Result { - self.files - .keys() - .find(|key| **key == id) - .copied() - .ok_or(codespan_reporting::files::Error::FileMissing) + Ok(id) } fn source( &'a self, id: Self::FileId, ) -> Result { - self.files - .get(id) + self.get(id) .ok_or(codespan_reporting::files::Error::FileMissing) - .copied() } fn line_index( @@ -47,15 +54,12 @@ impl<'a> codespan_reporting::files::Files<'a> for Files<'a> { id: Self::FileId, byte_index: usize, ) -> Result { - self.get(id) - .ok_or(codespan_reporting::files::Error::FileMissing) - .map(|source| { - source - .char_indices() - .take_while(|(index, _)| *index < byte_index) - .filter(|(_, character)| *character == '\n') - .count() - }) + let handle = self + .file_handles + .get(id) + .ok_or(codespan_reporting::files::Error::FileMissing)?; + + self.files.line_index(*handle, byte_index) } fn line_range( @@ -63,22 +67,11 @@ impl<'a> codespan_reporting::files::Files<'a> for Files<'a> { id: Self::FileId, line_index: usize, ) -> Result, codespan_reporting::files::Error> { - self.get(id) - .ok_or(codespan_reporting::files::Error::FileMissing) - .map(|source| { - let start = source - .lines() - .take(line_index) - .map(|line| line.len() + 1) - .sum::(); - - let end = source - .lines() - .take(line_index + 1) - .map(|line| line.len() + 1) - .sum::(); + let handle = self + .file_handles + .get(id) + .ok_or(codespan_reporting::files::Error::FileMissing)?; - start..end - }) + self.files.line_range(*handle, line_index) } } diff --git a/src/parser/macros.rs b/src/parser/macros.rs index 3c38a5f..df3abd1 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -49,7 +49,7 @@ macro_rules! expect_any_token { Err(vec![crate::diagnostic::Error::primary( token.range.file_id, $cursor, - 1, + 0, format!("Expected {}", expected_token_types.join(" or ")), )]) } @@ -102,7 +102,9 @@ macro_rules! expect_tokens { invalid_indecies.push((i, $token_type)); } } - _ => {} + _ => { + + } }; i += 1; @@ -119,11 +121,11 @@ macro_rules! expect_tokens { errors.push(crate::diagnostic::Error::primary( $parser.tokens.get(0).unwrap().range.file_id, - $cursor + invalid_index, - 1, + $cursor, + 0, format!("Expected {}", expected_token_type) ).with_note( - format!("Expected {}, got {}", expected_token_type, actual_token.token_type) + format!("Expected {}, found {} ({})", expected_token_type, actual_token.token_type, actual_token.value) )); } diff --git a/src/parser/statement.rs b/src/parser/statement.rs index 4425ef0..ac1591b 100644 --- a/src/parser/statement.rs +++ b/src/parser/statement.rs @@ -136,7 +136,8 @@ pub fn parse_enum<'a>( let identifier = expect_token_value!(tokens[1], TokenValue::Identifier); let mut new_cursor = cursor; - let mut members: HashSet = HashSet::new(); + let mut members: HashMap = HashMap::new(); + let mut errors = Vec::new(); while let Some(token) = parser.tokens.get(new_cursor) { let (member_name, cursor) = match token.token_type { @@ -153,10 +154,20 @@ pub fn parse_enum<'a>( new_cursor = cursor; // TODO: Handle warning for overwritten members - members.insert(member_name); + if let Some(overwritten_key) = members.insert(member_name, token.clone()) { + errors.push( + Error::primary( + overwritten_key.range.file_id, + overwritten_key.range.position, + 1, + "This member was overwritten", + ) + .with_note("This member was overwritten since it was already declared"), + ); + } } let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Semicolon)?; - Ok((Statement::Enum(identifier, members), cursor)) + Ok(((Statement::Enum(identifier, members), cursor)), errors)) } diff --git a/src/scanner/lexeme.rs b/src/scanner/lexeme.rs index be4a54f..1fb2c4e 100644 --- a/src/scanner/lexeme.rs +++ b/src/scanner/lexeme.rs @@ -53,6 +53,20 @@ pub enum TokenValue { Identifier(String), } +impl Display for TokenValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenValue::None => write!(f, "none"), + TokenValue::Boolean(value) => write!(f, "{}", value), + TokenValue::Integer(value) => write!(f, "{}", value), + TokenValue::Decimal(value) => write!(f, "{}", value), + TokenValue::String(value) => write!(f, "{}", value), + TokenValue::Character(value) => write!(f, "{}", value), + TokenValue::Identifier(value) => write!(f, "{}", value), + } + } +} + #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub enum TokenType { /// A token that should be ignored. This is used for whitespace, comments, etc. diff --git a/test.som b/test.som index 61c41bf..c5e987d 100644 --- a/test.som +++ b/test.som @@ -1 +1 @@ -enum color: green blue red yellow 12 +enum color: green blue red yellow 12 green greeen green