diff --git a/src/diagnostic.rs b/src/diagnostic.rs new file mode 100644 index 0000000..1b1616b --- /dev/null +++ b/src/diagnostic.rs @@ -0,0 +1,161 @@ +use crate::scanner::lexeme::Lexeme; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Diagnostic<'a> { + pub severity: Severity, + pub title: String, + pub errors: Vec>, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Error<'a> { + pub message: String, + pub label: Label, + pub range: Range<'a>, +} + +impl<'a> Error<'a> { + pub fn primary( + file_id: impl Into<&'a str>, + position: usize, + length: usize, + message: impl Into, + ) -> Error<'a> { + Error::new(file_id, Label::Primary, position, length, message) + } + + pub fn secondary( + file_id: impl Into<&'a str>, + position: usize, + length: usize, + message: impl Into, + ) -> Error<'a> { + Error::new(file_id, Label::Secondary, position, length, message) + } + + pub fn new( + file_id: impl Into<&'a str>, + label: Label, + position: usize, + length: usize, + message: impl Into, + ) -> Error<'a> { + Error { + message: message.into(), + label, + range: Range { + file_id: file_id.into(), + position, + length, + }, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum Severity { + Error, + Warning, + Note, + Help, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum Label { + Primary, + Secondary, +} + +impl Into for Severity { + fn into(self) -> codespan_reporting::diagnostic::Severity { + match self { + Severity::Error => codespan_reporting::diagnostic::Severity::Error, + Severity::Warning => codespan_reporting::diagnostic::Severity::Warning, + Severity::Note => codespan_reporting::diagnostic::Severity::Note, + Severity::Help => codespan_reporting::diagnostic::Severity::Help, + } + } +} + +impl<'a> Diagnostic<'a> { + pub fn error(message: impl Into) -> Diagnostic<'a> { + Diagnostic::new(Severity::Error, message) + } + + pub fn warning(message: impl Into) -> Diagnostic<'a> { + Diagnostic::new(Severity::Warning, message) + } + + pub fn new(severity: Severity, message: impl Into) -> Diagnostic<'a> { + Diagnostic { + severity, + title: message.into(), + errors: vec![], + } + } + + pub fn with_error(mut self, error: Error<'a>) -> Self { + self.errors.push(error); + self + } +} + +impl<'a> Into> for Diagnostic<'a> { + fn into(self) -> codespan_reporting::diagnostic::Diagnostic<&'a str> { + codespan_reporting::diagnostic::Diagnostic::<&'a str>::new(self.severity.into()) + .with_labels(self.errors.into_iter().map(|error| error.into()).collect()) + } +} + +impl<'a> Into> for Error<'a> { + fn into(self) -> codespan_reporting::diagnostic::Label<&'a str> { + codespan_reporting::diagnostic::Label::new( + self.label.into(), + self.range.file_id, + self.range.position..self.range.position + self.range.length, + ) + .with_message(self.message) + } +} + +impl Into for Label { + fn into(self) -> codespan_reporting::diagnostic::LabelStyle { + match self { + Label::Primary => codespan_reporting::diagnostic::LabelStyle::Primary, + Label::Secondary => codespan_reporting::diagnostic::LabelStyle::Secondary, + } + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct Range<'a> { + pub file_id: &'a str, + pub position: usize, + pub length: usize, +} + +impl<'a> Range<'a> { + pub fn to_source_code_range(self, lexemes: &[Lexeme]) -> Self { + let start = if self.position >= lexemes.len() { + let last_lexeme = lexemes[lexemes.len() - 1].range(); + last_lexeme.position + 1 + } else { + let start_lexeme = lexemes[self.position].range(); + start_lexeme.position + }; + + let end = if self.position + self.length >= lexemes.len() { + let last_lexeme = lexemes[lexemes.len() - 1].range(); + last_lexeme.position + last_lexeme.length + } else { + let end_lexeme = lexemes[self.position + self.length].range(); + end_lexeme.position + end_lexeme.length + }; + + Range { + file_id: self.file_id, + position: start, + length: end - start, + } + } +} diff --git a/src/files.rs b/src/files.rs new file mode 100644 index 0000000..ac0c0ef --- /dev/null +++ b/src/files.rs @@ -0,0 +1,114 @@ +use std::collections::HashMap; + +pub struct Files<'a> { + pub files: HashMap<&'a str, &'a str>, +} + +impl<'a> Files<'a> { + pub fn new() -> Self { + Self { + files: HashMap::new(), + } + } + + pub fn insert(&mut self, file_id: impl Into<&'a str>, source: impl Into<&'a str>) { + self.files.insert(file_id.into(), source.into()); + } + + pub fn file_ids(&self) -> Vec<&'a str> { + self.files.keys().copied().collect() + } + + pub fn get(&self, file_id: impl Into<&'a str>) -> Option<&'a str> { + self.files.get(file_id.into()).copied() + } +} + +impl<'a> codespan_reporting::files::Files<'a> for Files<'a> { + type FileId = &'a str; + type Name = &'a str; + type Source = &'a str; + + fn name(&'a self, id: Self::FileId) -> Result { + self.files + .get(id) + .ok_or(codespan_reporting::files::Error::FileMissing) + .copied() + } + + fn source( + &'a self, + id: Self::FileId, + ) -> Result { + self.files + .get(id) + .ok_or(codespan_reporting::files::Error::FileMissing) + .copied() + } + + fn line_index( + &'a self, + id: Self::FileId, + byte_index: usize, + ) -> Result { + let source = self.source(id)?; + let mut line_index = 0; + let mut byte_count = 0; + + for (index, character) in source.char_indices() { + if index == byte_index { + return Ok(line_index); + } + + if character == '\n' { + line_index += 1; + } + + byte_count = index; + } + + if byte_index == byte_count { + Ok(line_index) + } else { + Err(codespan_reporting::files::Error::IndexTooLarge { + given: byte_index, + max: byte_count, + }) + } + } + + fn line_range( + &'a self, + id: Self::FileId, + line_index: usize, + ) -> Result, codespan_reporting::files::Error> { + let source = self.source(id)?; + let mut start = 0; + let mut end = 0; + let mut current_line_index = 0; + + for (index, character) in source.char_indices() { + if current_line_index == line_index { + start = index; + } + + if character == '\n' { + if current_line_index == line_index { + end = index; + break; + } + + current_line_index += 1; + } + } + + if current_line_index == line_index { + Ok(start..end) + } else { + Err(codespan_reporting::files::Error::IndexTooLarge { + given: line_index, + max: current_line_index, + }) + } + } +} diff --git a/src/main.rs b/src/main.rs index 466c6aa..8a9ac39 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,45 +1,26 @@ use anyhow::Result; use codespan_reporting::{ - diagnostic::{Diagnostic, Label}, - files::SimpleFile, + diagnostic::Diagnostic, term::{ self, termcolor::{ColorChoice, StandardStream}, }, }; use core::result::Result::Ok; -use scanner::lexeme::{Lexeme, Range}; +use files::Files; use transpiler::{bend::BendTranspiler, Transpiler}; +pub mod diagnostic; +pub mod files; pub mod parser; pub mod scanner; pub mod transpiler; -fn lexeme_range_to_source_range(lexemes: &[Lexeme], diagnostic: &parser::Diagnostic) -> Range { - let start = if diagnostic.range.position >= lexemes.len() { - let last_lexeme = lexemes[lexemes.len() - 1].range(); - last_lexeme.position + 1 - } else { - let start_lexeme = lexemes[diagnostic.range.position].range(); - start_lexeme.position - }; - - let end = if diagnostic.range.position + diagnostic.range.length >= lexemes.len() { - let last_lexeme = lexemes[lexemes.len() - 1].range(); - last_lexeme.position + last_lexeme.length - } else { - let end_lexeme = lexemes[diagnostic.range.position + diagnostic.range.length].range(); - end_lexeme.position + end_lexeme.length - }; - - Range { - position: start, - length: end - start, - } -} - fn main() -> Result<()> { - let code = " + let mut files = Files::new(); + files.insert( + "main", + " enum color: red green blue; struct person: @@ -47,35 +28,30 @@ fn main() -> Result<()> { .age: number ; + let 12 = 12; + // let lucas = person::new('Lucas', 22); // lucas.age_in_days(); - "; - let file: SimpleFile<&str, &str> = SimpleFile::new("main", code); + ", + ); - let lexemes = scanner::Scanner::new(code.to_owned()).collect::>(); + let scanner = scanner::Scanner::new(&files); + let lexemes = scanner.parse(); let mut parser = parser::Parser::new(&lexemes); let parsed = parser.parse(); match &parsed { Ok(_) => {} Err(diagnostics) => { - let diagnostic: Diagnostic<()> = Diagnostic::error() - .with_message("Syntax error") - .with_labels( - diagnostics - .iter() - .map(|diagnostic| { - let range = lexeme_range_to_source_range(&lexemes, diagnostic); - - Label::primary((), range.position..range.position + range.length) - .with_message(diagnostic.message.to_string()) - }) - .collect(), - ); + let diagnostics: Vec> = + diagnostics.into_iter().map(|d| d.clone().into()).collect(); let writer = StandardStream::stderr(ColorChoice::Auto); let config = codespan_reporting::term::Config::default(); - term::emit(&mut writer.lock(), &config, &file, &diagnostic)?; + + for diagnostic in diagnostics { + term::emit(&mut writer.lock(), &config, &files, &diagnostic)?; + } } } diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 7d82b8a..1fb94c3 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -3,11 +3,11 @@ use std::collections::{HashMap, HashSet}; use crate::scanner::lexeme::Lexeme; #[derive(Debug, Clone, PartialEq)] -pub enum Symbol { +pub enum Symbol<'a> { Expression(Expression), Statement(Statement), Type(Type), - Unknown(Lexeme), + Unknown(Lexeme<'a>), } #[derive(Debug, Clone, PartialEq)] diff --git a/src/parser/expression.rs b/src/parser/expression.rs index 61decee..0ffb974 100644 --- a/src/parser/expression.rs +++ b/src/parser/expression.rs @@ -1,29 +1,29 @@ use super::{ ast::{Expression, UnaryOperation}, lookup::BindingPower, - macros::{ - expect_expression, expect_token, expect_token_value, expect_tokens, expect_valid_token, - }, + macros::{expect_expression, expect_token_value, expect_tokens, expect_valid_token}, Diagnostic, Parser, }; use crate::{ + diagnostic::Error, parser::macros::expect_any_token, scanner::lexeme::{Lexeme, TokenType, TokenValue}, }; use std::collections::HashMap; -pub fn parse( - parser: &Parser, +pub fn parse<'a>( + parser: &Parser<'a>, cursor: usize, binding_power: &BindingPower, -) -> Result<(Expression, usize), Diagnostic> { +) -> Result<(Expression, usize), Error<'a>> { let mut cursor = cursor; - let (token, range) = expect_valid_token!(parser, cursor); + let (token, range) = expect_valid_token!(parser, cursor)?; let expression_handler = parser .lookup .expression_lookup .get(&token.token_type) - .ok_or(Diagnostic::error( + .ok_or(Error::primary( + range.file_id, cursor, range.length, "Expected a new expression", @@ -60,19 +60,22 @@ pub fn parse( Ok((left_hand_side, cursor)) } -pub fn parse_assignment( - parser: &Parser, +pub fn parse_assignment<'a>( + parser: &Parser<'a>, cursor: usize, lhs: Expression, binding_power: &BindingPower, -) -> Result<(Expression, usize), Diagnostic> { +) -> Result<(Expression, usize), Error<'a>> { let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Equal)?; let (rhs, cursor) = expect_expression!(parser, cursor, binding_power)?; Ok((Expression::Assignment(Box::new(lhs), Box::new(rhs)), cursor)) } -pub fn parse_unary(parser: &Parser, cursor: usize) -> Result<(Expression, usize), Diagnostic> { +pub fn parse_unary<'a, 'b, 'c>( + parser: &'a Parser, + cursor: usize, +) -> Result<(Expression, usize), Error<'a>> { let (token, cursor) = expect_any_token!(parser, cursor, TokenType::Minus, TokenType::Not)?; match token.token_type { TokenType::Minus => { @@ -93,24 +96,20 @@ pub fn parse_unary(parser: &Parser, cursor: usize) -> Result<(Expression, usize) } } -pub fn parse_struct_initializer( - parser: &Parser, +pub fn parse_struct_initializer<'a>( + parser: &Parser<'a>, cursor: usize, lhs: Expression, binding_power: &BindingPower, -) -> Result<(Expression, usize), Diagnostic> { +) -> Result<(Expression, usize), Error<'a>> { let identifier = match lhs { Expression::Identifier(identifier) => identifier.clone(), _ => { - return Err(Diagnostic::error( - cursor, - 1, - format!("Expected {}, found {:?}", TokenType::Identifier, lhs), - )) + unreachable!() } }; - let (_, cursor) = expect_token!(parser, cursor, TokenType::CurlyOpen)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::CurlyOpen)?; let mut members = HashMap::new(); let mut new_cursor = cursor; @@ -121,7 +120,7 @@ pub fn parse_struct_initializer( } if !members.is_empty() { - let (_, cursor) = expect_token!(parser, new_cursor, TokenType::Comma)?; + let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Comma)?; new_cursor = cursor; } @@ -137,7 +136,7 @@ pub fn parse_struct_initializer( new_cursor = cursor; } - let (_, cursor) = expect_token!(parser, new_cursor, TokenType::CurlyClose)?; + let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::CurlyClose)?; Ok((Expression::StructInitializer(identifier, members), cursor)) } diff --git a/src/parser/lookup.rs b/src/parser/lookup.rs index 365c1fb..c567a91 100644 --- a/src/parser/lookup.rs +++ b/src/parser/lookup.rs @@ -1,10 +1,14 @@ use super::{ ast::{BinaryOperation, Expression, Statement, Type}, expression, - macros::{expect_expression, expect_token}, - statement, typing, Diagnostic, Parser, + macros::{expect_expression, expect_tokens}, + statement, typing, Parser, +}; +use crate::{ + diagnostic::Error, + parser::macros::expect_token_value, + scanner::lexeme::{TokenType, TokenValue}, }; -use crate::scanner::lexeme::{TokenType, TokenValue}; use core::panic; use std::collections::HashMap; @@ -23,25 +27,29 @@ pub enum BindingPower { Primary = 10, } -pub type TypeHandler = fn(&Parser, usize) -> Result<(Type, usize), Diagnostic>; -pub type LeftTypeHandler = - fn(&Parser, usize, Type, &BindingPower) -> Result<(Type, usize), Diagnostic>; -pub type StatementHandler = fn(&Parser, usize) -> Result<(Statement, usize), Diagnostic>; -pub type ExpressionHandler = fn(&Parser, usize) -> Result<(Expression, usize), Diagnostic>; -pub type LeftExpressionHandler = - fn(&Parser, usize, Expression, &BindingPower) -> Result<(Expression, usize), Diagnostic>; - -pub struct Lookup { - pub statement_lookup: HashMap, - pub expression_lookup: HashMap, - pub left_expression_lookup: HashMap, - pub type_lookup: HashMap, - pub left_type_lookup: HashMap, +pub type TypeHandler<'a> = fn(&Parser, usize) -> Result<(Type, usize), Error<'a>>; +pub type LeftTypeHandler<'a> = + fn(&Parser, usize, Type, &BindingPower) -> Result<(Type, usize), Error<'a>>; +pub type StatementHandler<'a> = fn(&Parser, usize) -> Result<(Statement, usize), Error<'a>>; +pub type ExpressionHandler<'a> = fn(&Parser, usize) -> Result<(Expression, usize), Error<'a>>; +pub type LeftExpressionHandler<'a> = + fn(&Parser, usize, Expression, &BindingPower) -> Result<(Expression, usize), Error<'a>>; + +pub struct Lookup<'a> { + pub statement_lookup: HashMap>, + pub expression_lookup: HashMap>, + pub left_expression_lookup: HashMap>, + pub type_lookup: HashMap>, + pub left_type_lookup: HashMap>, pub binding_power_lookup: HashMap, } -impl Lookup { - pub(crate) fn add_statement_handler(&mut self, token: TokenType, handler: StatementHandler) { +impl<'a> Lookup<'a> { + pub(crate) fn add_statement_handler( + &mut self, + token: TokenType, + handler: StatementHandler<'a>, + ) { if self.statement_lookup.contains_key(&token) { panic!("Token already has a statement handler"); } @@ -49,7 +57,11 @@ impl Lookup { self.statement_lookup.insert(token, handler); } - pub(crate) fn add_expression_handler(&mut self, token: TokenType, handler: ExpressionHandler) { + pub(crate) fn add_expression_handler( + &mut self, + token: TokenType, + handler: ExpressionHandler<'a>, + ) { if self.expression_lookup.contains_key(&token) { panic!("Token already has an expression handler"); } @@ -61,7 +73,7 @@ impl Lookup { &mut self, token: TokenType, binding_power: BindingPower, - handler: LeftExpressionHandler, + handler: LeftExpressionHandler<'a>, ) { if self.binding_power_lookup.contains_key(&token) { panic!("Token already has a binding power"); @@ -71,7 +83,7 @@ impl Lookup { self.binding_power_lookup.insert(token, binding_power); } - pub(crate) fn add_type_handler(&mut self, token: TokenType, handler: TypeHandler) { + pub(crate) fn add_type_handler(&mut self, token: TokenType, handler: TypeHandler<'a>) { if self.type_lookup.contains_key(&token) { panic!("Token already has a type handler"); } @@ -80,7 +92,7 @@ impl Lookup { } #[allow(dead_code)] - pub(crate) fn add_left_type_handler(&mut self, token: TokenType, handler: LeftTypeHandler) { + pub(crate) fn add_left_type_handler(&mut self, token: TokenType, handler: LeftTypeHandler<'a>) { if self.left_type_lookup.contains_key(&token) { panic!("Token already has a left type handler"); } @@ -89,7 +101,7 @@ impl Lookup { } } -impl Default for Lookup { +impl<'a> Default for Lookup<'a> { fn default() -> Self { let mut lookup = Lookup { statement_lookup: HashMap::new(), @@ -105,7 +117,7 @@ impl Default for Lookup { TokenType::Plus, BindingPower::Additive, |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_token!(parser, cursor, TokenType::Plus)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Plus)?; let (rhs, cursor) = expect_expression!(parser, cursor, &BindingPower::Additive)?; Ok(( Expression::Binary(Box::new(lhs), BinaryOperation::Plus, Box::new(rhs)), @@ -118,7 +130,7 @@ impl Default for Lookup { TokenType::Minus, BindingPower::Additive, |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_token!(parser, cursor, TokenType::Minus)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Minus)?; let (rhs, cursor) = expect_expression!(parser, cursor, &BindingPower::Additive)?; Ok(( Expression::Binary(Box::new(lhs), BinaryOperation::Minus, Box::new(rhs)), @@ -132,7 +144,7 @@ impl Default for Lookup { TokenType::Star, BindingPower::Multiplicative, |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_token!(parser, cursor, TokenType::Star)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Star)?; let (rhs, cursor) = expect_expression!(parser, cursor, &BindingPower::Multiplicative)?; Ok(( @@ -146,7 +158,7 @@ impl Default for Lookup { TokenType::Slash, BindingPower::Multiplicative, |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_token!(parser, cursor, TokenType::Slash)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Slash)?; let (rhs, cursor) = expect_expression!(parser, cursor, &BindingPower::Multiplicative)?; Ok(( @@ -158,50 +170,33 @@ impl Default for Lookup { // Literals and symbols lookup.add_expression_handler(TokenType::Decimal, |parser, cursor| { - let (decimal, cursor) = expect_token!(parser, cursor, TokenType::Decimal)?; - - if let TokenValue::Decimal(value) = decimal.value { - return Ok((Expression::Number(value), cursor)); - } - - panic!("expect_token! should return a valid token and handle the error case"); + let (decimal, cursor) = expect_tokens!(parser, cursor, TokenType::Decimal)?; + let decimal = expect_token_value!(decimal[0], TokenValue::Decimal); + Ok((Expression::Number(decimal), cursor)) }); lookup.add_expression_handler(TokenType::Integer, |parser, cursor| { - let (integer, cursor) = expect_token!(parser, cursor, TokenType::Integer)?; - - if let TokenValue::Integer(value) = integer.value { - return Ok((Expression::Number(value as f64), cursor)); - } - - panic!("expect_token! should return a valid token and handle the error case"); + let (integer, cursor) = expect_tokens!(parser, cursor, TokenType::Integer)?; + let integer = expect_token_value!(integer[0], TokenValue::Integer); + Ok((Expression::Number(integer as f64), cursor)) }); lookup.add_expression_handler(TokenType::String, |parser, cursor| { - let (string, cursor) = expect_token!(parser, cursor, TokenType::String)?; - - if let TokenValue::String(string) = string.value.clone() { - return Ok((Expression::String(string), cursor)); - } - - panic!("expect_token! should return a valid token and handle the error case"); + let (string, cursor) = expect_tokens!(parser, cursor, TokenType::String)?; + let string = expect_token_value!(string[0], TokenValue::String); + Ok((Expression::String(string), cursor)) }); lookup.add_expression_handler(TokenType::Identifier, |parser, cursor| { - let (identifier, cursor) = expect_token!(parser, cursor, TokenType::Identifier)?; - - if let TokenValue::Identifier(identifier) = identifier.value.clone() { - return Ok((Expression::Identifier(identifier), cursor)); - } - - panic!("expect_token! should return a valid token and handle the error case"); + let (identifier, cursor) = expect_tokens!(parser, cursor, TokenType::Identifier)?; + let identifier = expect_token_value!(identifier[0], TokenValue::Identifier); + Ok((Expression::Identifier(identifier), cursor)) }); lookup.add_expression_handler(TokenType::ParenOpen, |parser, cursor| { - let (_, cursor) = expect_token!(parser, cursor, TokenType::ParenOpen)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::ParenOpen)?; let (expression, cursor) = expect_expression!(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_token!(parser, cursor, TokenType::ParenClose)?; - + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::ParenClose)?; Ok((Expression::Grouping(Box::new(expression)), cursor)) }); diff --git a/src/parser/macros.rs b/src/parser/macros.rs index 126bd0e..f82742c 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -21,15 +21,22 @@ macro_rules! expect_valid_token { ($parser:expr, $cursor:expr) => {{ let lexeme = $parser.lexemes.get($cursor); - if lexeme.is_none() { - return Err(Diagnostic::error($cursor, 0, "Unexpected end of file")); - } - - let lexeme = lexeme.unwrap(); - match lexeme { - Lexeme::Valid(token) => (token, lexeme.range()), - Lexeme::Invalid(_) => return Err(Diagnostic::error($cursor, 1, "Invalid token")), + Some(lexeme) => match lexeme { + Lexeme::Valid(token) => Ok((token, lexeme.range())), + Lexeme::Invalid(_) => Err(crate::diagnostic::Error::primary( + lexeme.range().file_id, + $cursor, + 1, + "Invalid token", + )), + }, + None => Err(crate::diagnostic::Error::primary( + "", + $cursor, + 0, + "Unexpected end of file", + )), } }}; } @@ -40,12 +47,35 @@ macro_rules! expect_any_token { ($parser:expr, $cursor:expr, $($token_type:expr),*) => {{ let expected_token_types = vec![$($token_type.to_string()),*]; - let expected_tokens = vec![$(expect_token!($parser, $cursor, $token_type)),*]; + let lexeme = $parser.lexemes.get($cursor); - // If any of the expected tokens are valid, return the first valid token - match expected_tokens.into_iter().find(|token| token.is_ok()) { - Some(token) => token, - None => Err(Diagnostic::error($cursor, 1, format!("Expected {}", expected_token_types.join(" or ")))) + match lexeme { + Some(lexeme) => match lexeme { + Lexeme::Valid(token) => { + if expected_token_types.contains(&token.token_type.to_string()) { + Ok((token, $cursor + 1)) + } else { + Err(crate::diagnostic::Error::primary( + lexeme.range().file_id, + $cursor, + 1, + format!("Expected one of {}", expected_token_types.join(" or ")), + )) + } + } + Lexeme::Invalid(_) => Err(crate::diagnostic::Error::primary( + lexeme.range().file_id, + $cursor, + 1, + "Invalid token", + )), + }, + None => Err(crate::diagnostic::Error::primary( + "", + $cursor, + 0, + "Unexpected end of file", + )), } }}; } @@ -53,7 +83,7 @@ macro_rules! expect_any_token { #[allow(unused_macros)] macro_rules! expect_optional_token { ($parser:expr, $cursor:expr, $token_type:expr) => {{ - let result = expect_token!($parser, $cursor, $token_type); + let result = expect_tokens!($parser, $cursor, $token_type); match result { Ok((token, cursor)) => Ok((Some(token), cursor)), @@ -61,25 +91,6 @@ macro_rules! expect_optional_token { } }}; } - -macro_rules! expect_token { - ($parser:expr, $cursor:expr, $token_type:expr) => {{ - let result = crate::parser::macros::expect_tokens!($parser, $cursor, $token_type); - - match result { - Ok((tokens, cursor)) => { - let token = tokens.first().unwrap().clone(); - if token.token_type == $token_type { - Ok((token, cursor)) - } else { - Err(Diagnostic::error($cursor, 1, "Invalid token")) - } - } - Err(err) => Err(err), - } - }}; -} - macro_rules! expect_token_value { ($token:expr, $value:path) => {{ match &$token.value { @@ -94,16 +105,24 @@ macro_rules! expect_tokens { let mut i = $cursor; let mut tokens = Vec::new(); let mut valid = 0; + let mut file_id = ""; $( - let lexeme = $parser.lexemes.get(i); + let lexeme: Option<&crate::scanner::lexeme::Lexeme> = $parser.lexemes.get(i); match lexeme { - Some(crate::scanner::lexeme::Lexeme::Valid(token)) => { - if token.token_type == $token_type { - tokens.push(token.clone()); - valid += 1; - } + Some(lexeme) => { + file_id = lexeme.range().file_id.clone(); + + match lexeme { + crate::scanner::lexeme::Lexeme::Valid(token) => { + if token.token_type == $token_type { + tokens.push(token.clone()); + valid += 1; + } + } + _ => {} + }; } _ => {} }; @@ -117,7 +136,7 @@ macro_rules! expect_tokens { Ok((tokens, i)) } else { let unexpected_tokens = all_tokens.iter().skip(valid).map(|t| t.to_string()).collect::>(); - Err(Diagnostic::error($cursor + valid, 1, format!("Expected {}", unexpected_tokens.join(" and ")))) + Err(crate::diagnostic::Error::primary(file_id, $cursor + valid, 1, format!("Expected {}", unexpected_tokens.join(" and ")))) } }}; } @@ -128,7 +147,6 @@ pub(crate) use expect_expression; pub(crate) use expect_optional_token; #[allow(unused_imports)] pub(crate) use expect_statement; -pub(crate) use expect_token; pub(crate) use expect_token_value; pub(crate) use expect_tokens; pub(crate) use expect_type; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bbdc69e..f57fc50 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,4 +1,7 @@ -use crate::scanner::lexeme::{Lexeme, Range}; +use crate::{ + diagnostic::{Diagnostic, Error}, + scanner::lexeme::Lexeme, +}; use ast::{Statement, Symbol}; use lookup::Lookup; use std::collections::HashSet; @@ -12,12 +15,12 @@ pub mod typing; pub struct Parser<'a> { lookup: Lookup, - lexemes: &'a Vec, + lexemes: &'a Vec>, cursor: usize, } impl<'a> Parser<'a> { - pub fn new(lexemes: &'a Vec) -> Self { + pub fn new(lexemes: &'a Vec>) -> Self { Self { lookup: Lookup::default(), lexemes, @@ -28,31 +31,34 @@ impl<'a> Parser<'a> { pub fn parse(&mut self) -> Result> { let mut statements = vec![]; let mut diagnostics = HashSet::new(); - let mut current_error: Option<(Diagnostic, usize)> = None; + let mut current_error: Option<(Error, usize)> = None; let mut last_safe_cursor = 0; while self.cursor < self.lexemes.len() { if current_error.is_some() { - let (error_diagnostic, _error_start_cursor) = current_error.take().unwrap(); + let (error_diagnostic, error_start_cursor) = current_error.take().unwrap(); // Skip to the next semicolon // TODO: This is a naive approach, we should skip to the next statement - while let Some(Lexeme::Valid(_)) = self.lexemes.get(self.cursor) { + while let Some(lexeme) = self.lexemes.get(self.cursor) { self.cursor += 1; // Try to parse the next statement - if statement::parse(self, self.cursor).is_ok() { + if lexeme.is_valid() && statement::parse(self, self.cursor).is_ok() { + diagnostics.insert( + Diagnostic::error("Syntax error") + .with_error(error_diagnostic) + .with_error(Error::secondary( + lexeme.range().file_id, + error_start_cursor, + self.cursor - error_start_cursor, + "Could not parse this code", + )), + ); + break; }; } - - // diagnostics.insert(Diagnostic::error( - // error_start_cursor, - // self.cursor - error_start_cursor, - // "Syntax error", - // )); - - diagnostics.insert(error_diagnostic); } match statement::parse(self, self.cursor) { @@ -61,8 +67,8 @@ impl<'a> Parser<'a> { last_safe_cursor = new_cursor; statements.push(statement); } - Err(diagnostic) => { - current_error = Some((diagnostic, last_safe_cursor)); + Err(error) => { + current_error = Some((error, last_safe_cursor)); } }; } @@ -75,52 +81,6 @@ impl<'a> Parser<'a> { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Diagnostic { - pub range: Range, - pub message: String, -} - -impl Diagnostic { - fn error(cursor: usize, length: usize, message: impl Into) -> Diagnostic { - Diagnostic { - range: Range { - position: cursor, - length, - }, - message: message.into(), - } - } - - #[allow(dead_code)] - fn combine(diagnostics: &[Diagnostic]) -> Diagnostic { - let min_range = diagnostics - .iter() - .map(|diagnostic| diagnostic.range.clone()) - .min_by_key(|range| range.position) - .unwrap(); - - let max_range = diagnostics - .iter() - .map(|diagnostic| diagnostic.range.clone()) - .max_by_key(|range| range.position + range.length) - .unwrap(); - - Diagnostic::error( - min_range.position, - max_range.position + max_range.length - min_range.position, - diagnostics - .iter() - .map(|diagnostic| diagnostic.message.clone()) - .collect::>() - .iter() - .cloned() - .collect::>() - .join("\n"), - ) - } -} - #[cfg(test)] mod tests { use ast::{BinaryOperation, Expression}; diff --git a/src/parser/statement.rs b/src/parser/statement.rs index eae115e..39bb8ce 100644 --- a/src/parser/statement.rs +++ b/src/parser/statement.rs @@ -1,20 +1,22 @@ -use crate::scanner::lexeme::{Lexeme, TokenType, TokenValue}; -use core::panic; -use std::collections::{HashMap, HashSet}; - use super::{ ast::Type, expression, lookup::BindingPower, macros::{ - expect_any_token, expect_expression, expect_token, expect_token_value, expect_tokens, - expect_type, expect_valid_token, + expect_any_token, expect_expression, expect_token_value, expect_tokens, expect_type, + expect_valid_token, }, - Diagnostic, Parser, Statement, + Parser, Statement, }; +use crate::{ + diagnostic::Error, + scanner::lexeme::{Lexeme, TokenType, TokenValue}, +}; +use core::panic; +use std::collections::{HashMap, HashSet}; -pub fn parse(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Diagnostic> { - let (token, _) = expect_valid_token!(parser, cursor); +pub fn parse(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Error> { + let (token, _) = expect_valid_token!(parser, cursor)?; let statement_handler = parser.lookup.statement_lookup.get(&token.token_type); match statement_handler { @@ -23,17 +25,17 @@ pub fn parse(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Diagn } } -pub fn parse_expression(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Diagnostic> { +pub fn parse_expression(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Error> { let (expression, cursor) = expression::parse(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_token!(parser, cursor, TokenType::Semicolon)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Semicolon)?; Ok((Statement::Expression(expression), cursor)) } -pub fn parse_declaration(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Diagnostic> { - let (_, cursor) = expect_token!(parser, cursor, TokenType::Let)?; - let (identifier, cursor) = expect_token!(parser, cursor, TokenType::Identifier)?; - let identifier = match &identifier.value { +pub fn parse_declaration(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Error> { + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Let)?; + let (identifier, cursor) = expect_tokens!(parser, cursor, TokenType::Identifier)?; + let identifier = match &identifier[0].value { TokenValue::Identifier(identifier) => identifier, _ => panic!("expect_token! should return a valid token and handle the error case"), }; @@ -41,16 +43,16 @@ pub fn parse_declaration(parser: &Parser, cursor: usize) -> Result<(Statement, u let (token, _) = expect_any_token!(parser, cursor, TokenType::Colon, TokenType::Equal)?; let (typing, cursor) = match token.token_type { TokenType::Colon => { - let (_, cursor) = expect_token!(parser, cursor, TokenType::Colon)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Colon)?; let (typing, cursor) = expect_type!(parser, cursor, BindingPower::None)?; (Some(typing), cursor) } _ => (None, cursor), }; - let (_, cursor) = expect_token!(parser, cursor, TokenType::Equal)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Equal)?; let (expression, cursor) = expect_expression!(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_token!(parser, cursor, TokenType::Semicolon)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Semicolon)?; Ok(( Statement::Declaration(identifier.clone(), typing, expression), @@ -58,7 +60,7 @@ pub fn parse_declaration(parser: &Parser, cursor: usize) -> Result<(Statement, u )) } -pub fn parse_struct(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Diagnostic> { +pub fn parse_struct(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Error> { let (tokens, cursor) = expect_tokens!( parser, cursor, @@ -98,12 +100,12 @@ pub fn parse_struct(parser: &Parser, cursor: usize) -> Result<(Statement, usize) new_cursor = cursor; } - let (_, cursor) = expect_token!(parser, new_cursor, TokenType::Semicolon)?; + let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Semicolon)?; Ok((Statement::Struct(identifier, members), cursor)) } -pub fn parse_enum(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Diagnostic> { +pub fn parse_enum(parser: &Parser, cursor: usize) -> Result<(Statement, usize), Error> { let (tokens, cursor) = expect_tokens!( parser, cursor, @@ -122,9 +124,9 @@ pub fn parse_enum(parser: &Parser, cursor: usize) -> Result<(Statement, usize), TokenType::Semicolon => break, _ => { let (field_name, cursor) = - expect_token!(parser, new_cursor, TokenType::Identifier)?; + expect_tokens!(parser, new_cursor, TokenType::Identifier)?; - let field_name = expect_token_value!(field_name, TokenValue::Identifier); + let field_name = expect_token_value!(field_name[0], TokenValue::Identifier); (field_name, cursor) } @@ -135,7 +137,7 @@ pub fn parse_enum(parser: &Parser, cursor: usize) -> Result<(Statement, usize), members.insert(member_name); } - let (_, cursor) = expect_token!(parser, new_cursor, TokenType::Semicolon)?; + let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Semicolon)?; Ok((Statement::Enum(identifier, members), cursor)) } diff --git a/src/parser/typing.rs b/src/parser/typing.rs index f42219d..8038570 100644 --- a/src/parser/typing.rs +++ b/src/parser/typing.rs @@ -3,23 +3,31 @@ use std::collections::HashMap; use super::{ ast::Type, lookup::BindingPower, - macros::{expect_optional_token, expect_token, expect_type, expect_valid_token}, - Diagnostic, Parser, + macros::{expect_optional_token, expect_tokens, expect_type, expect_valid_token}, + Parser, +}; +use crate::{ + diagnostic::{Diagnostic, Error}, + scanner::lexeme::{Lexeme, TokenType, TokenValue}, }; -use crate::scanner::lexeme::{Lexeme, TokenType, TokenValue}; pub fn parse( parser: &Parser, cursor: usize, binding_power: &BindingPower, -) -> Result<(Type, usize), Diagnostic> { +) -> Result<(Type, usize), Error> { let mut cursor = cursor; - let (token, range) = expect_valid_token!(parser, cursor); + let (token, range) = expect_valid_token!(parser, cursor)?; let type_handler = parser .lookup .type_lookup .get(&token.token_type) - .ok_or(Diagnostic::error(cursor, range.length, "Expected a type"))?; + .ok_or(Error::primary( + parser.lexemes.get(cursor).unwrap().range().file_id, + cursor, + range.length, + "Expected a type", + ))?; let (mut left_hand_side, new_cursor) = type_handler(parser, cursor)?; @@ -51,24 +59,24 @@ pub fn parse( Ok((left_hand_side, cursor)) } -pub fn parse_symbol(parser: &Parser, cursor: usize) -> Result<(Type, usize), Diagnostic> { - let (identifier, cursor) = expect_token!(parser, cursor, TokenType::Identifier)?; - let identifier = match &identifier.value { +pub fn parse_symbol(parser: &Parser, cursor: usize) -> Result<(Type, usize), Error> { + let (identifier, cursor) = expect_tokens!(parser, cursor, TokenType::Identifier)?; + let identifier = match &identifier[0].value { TokenValue::Identifier(identifier) => identifier, _ => panic!("expect_token! should only return identifiers"), }; Ok((Type::Symbol(identifier.clone()), cursor)) } -pub fn parse_array(parser: &Parser, cursor: usize) -> Result<(Type, usize), Diagnostic> { - let (_, cursor) = expect_token!(parser, cursor, TokenType::SquareOpen)?; +pub fn parse_array(parser: &Parser, cursor: usize) -> Result<(Type, usize), Error> { + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::SquareOpen)?; let (element_type, cursor) = expect_type!(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_token!(parser, cursor, TokenType::SquareClose)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::SquareClose)?; Ok((Type::Array(Box::new(element_type)), cursor)) } -pub fn parse_tuple(parser: &Parser, cursor: usize) -> Result<(Type, usize), Diagnostic> { - let (_, cursor) = expect_token!(parser, cursor, TokenType::CurlyOpen)?; +pub fn parse_tuple(parser: &Parser, cursor: usize) -> Result<(Type, usize), Error> { + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::CurlyOpen)?; let mut new_cursor = cursor; let mut members: HashMap = HashMap::new(); @@ -77,7 +85,7 @@ pub fn parse_tuple(parser: &Parser, cursor: usize) -> Result<(Type, usize), Diag TokenType::CurlyClose => break, _ => { if !members.is_empty() { - let (_, cursor) = expect_token!(parser, new_cursor, TokenType::Comma)?; + let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Comma)?; new_cursor = cursor; } @@ -86,13 +94,13 @@ pub fn parse_tuple(parser: &Parser, cursor: usize) -> Result<(Type, usize), Diag match colon { Some(_) => { let (field_name, cursor) = - expect_token!(parser, new_cursor, TokenType::Identifier)?; - let field_name = match &field_name.value { + expect_tokens!(parser, new_cursor, TokenType::Identifier)?; + let field_name = match &field_name[0].value { TokenValue::Identifier(field_name) => field_name.clone(), _ => panic!("expect_token! should only return identifiers"), }; - let (_, cursor) = expect_token!(parser, cursor, TokenType::Colon)?; + let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Colon)?; let (field_type, cursor) = expect_type!(parser, cursor, BindingPower::None)?; @@ -114,7 +122,7 @@ pub fn parse_tuple(parser: &Parser, cursor: usize) -> Result<(Type, usize), Diag new_cursor = cursor; } - let (_, cursor) = expect_token!(parser, new_cursor, TokenType::CurlyClose)?; + let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::CurlyClose)?; Ok((Type::Tuple(members), cursor)) } diff --git a/src/scanner/lexeme.rs b/src/scanner/lexeme.rs index 8bb1fdc..b11c3ac 100644 --- a/src/scanner/lexeme.rs +++ b/src/scanner/lexeme.rs @@ -1,12 +1,14 @@ use std::{fmt::Display, hash::Hash}; +use crate::diagnostic::Range; + #[derive(Debug, Clone, PartialEq)] -pub enum Lexeme { - Valid(Token), - Invalid(Range), +pub enum Lexeme<'a> { + Valid(Token<'a>), + Invalid(Range<'a>), } -impl Display for Lexeme { +impl<'a> Display for Lexeme<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Lexeme::Valid(token) => write!(f, "`{}`", token.token_type), @@ -15,20 +17,28 @@ impl Display for Lexeme { } } -impl Lexeme { - pub fn valid(token_type: TokenType, value: TokenValue, start: usize, length: usize) -> Lexeme { +impl<'a> Lexeme<'a> { + pub fn valid( + file_id: impl Into<&'a str>, + token_type: TokenType, + value: TokenValue, + start: usize, + length: usize, + ) -> Lexeme<'a> { Lexeme::Valid(Token::new( token_type, value, Range { + file_id: file_id.into(), position: start, length, }, )) } - pub fn invalid(start: usize, length: usize) -> Lexeme { + pub fn invalid(file_id: impl Into<&'a str>, start: usize, length: usize) -> Lexeme<'a> { Lexeme::Invalid(Range { + file_id: file_id.into(), position: start, length, }) @@ -40,22 +50,21 @@ impl Lexeme { Lexeme::Invalid(range) => range, } } -} -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct Range { - pub position: usize, - pub length: usize, + pub fn is_valid(&self) -> bool { + matches!(self, Lexeme::Valid(_)) + } } #[derive(Debug, Clone, PartialEq)] -pub struct Token { +pub struct Token<'a> { pub token_type: TokenType, pub value: TokenValue, - pub range: Range, + /// The range of the token in the source code. + pub range: Range<'a>, } -impl Token { +impl<'a> Token<'a> { pub fn new(token_type: TokenType, value: TokenValue, range: Range) -> Token { Token { token_type, diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 28f1c32..c8ba908 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,3 +1,4 @@ +use crate::files::Files; use lexeme::Lexeme; use lexeme::TokenType; use lexeme::TokenValue; @@ -13,16 +14,16 @@ macro_rules! r { }; } -pub struct Scanner { - input: String, +pub struct Scanner<'a> { + files: &'a Files<'a>, cursor: usize, spec: Vec, } -impl Scanner { - pub fn new(input: String) -> Scanner { +impl<'a> Scanner<'a> { + pub fn new(files: &'a Files) -> Scanner<'a> { Scanner { - input, + files, cursor: 0, spec: vec![ (r!(r"(\s+)"), |_| (TokenType::Ignore, TokenValue::None)), @@ -99,64 +100,46 @@ impl Scanner { } } - fn find_lexeme(&self, cursor: usize) -> Option<(Lexeme, usize)> { - let haystack = &self.input.chars().skip(cursor).collect::(); - - for (regex, handler) in &self.spec { - let capture = regex.captures(haystack); - - if let Some((capture, matched)) = capture.and_then(|c| Some((c.get(0)?, c.get(1)?))) { - let value = matched.as_str(); - let (token_type, token_value) = handler(value); - let length = capture.as_str().chars().count(); // TODO: Check if we shouldn't use as_str().len() instead - let new_cursor = cursor + capture.end(); - return Some(( - Lexeme::valid(token_type, token_value, cursor, length), - new_cursor, - )); - } - } - - None - } -} - -impl Iterator for Scanner { - type Item = Lexeme; - - fn next(&mut self) -> Option { - if self.cursor >= self.input.chars().count() { - return None; - } - - // Search for the next lexeme. If we get a None value, keep increasing the cursor until the next lexeme would be found. Return an Invalid Lexeme, and have the next call to this function handle the next valid lexeme. - let lexeme = self.find_lexeme(self.cursor); - if lexeme.is_none() { - let cursor_start = self.cursor; - let mut cursor = self.cursor; - while self.find_lexeme(cursor).is_none() { - cursor += 1; - - if cursor >= self.input.chars().count() { - break; + pub fn parse(&self) -> Vec { + let mut lexemes = Vec::new(); + + for file in self.files.file_ids() { + let content = self.files.get(file).unwrap(); + let mut cursor = 0; + + let mut panic_start_at = None; + while cursor < content.chars().count() { + for (regex, handler) in &self.spec { + let capture = regex.captures(&content[cursor..]); + + match capture.and_then(|c| Some((c.get(0)?, c.get(1)?))) { + Some((capture, matched)) => { + if let Some(start) = panic_start_at.take() { + lexemes.push(Lexeme::invalid(file, start, cursor - start - 1)); + } + + let value = matched.as_str(); + let (token_type, token_value) = handler(value); + let length = capture.as_str().chars().count(); + lexemes.push(Lexeme::valid( + file, + token_type, + token_value, + cursor, + length, + )); + cursor += length; + } + None => { + panic_start_at = Some(cursor); + cursor += 1; + } + } } } - - let length = cursor - self.cursor; - self.cursor = cursor; - return Some(Lexeme::invalid(cursor_start, length)); - } - - let (lexeme, new_cursor) = lexeme.unwrap(); - self.cursor = new_cursor; - - if let Lexeme::Valid(token) = &lexeme { - if token.token_type == TokenType::Ignore { - return self.next(); - } } - Some(lexeme) + lexemes } }