From 3543d635b21924c3b34b32224f414ba3bb97c198 Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 9 Jul 2024 14:33:41 +0200 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20(parser):=20Refactor=20par?= =?UTF-8?q?ser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit semver: chore --- src/diagnostic.rs | 18 +++++++ src/main.rs | 8 ++- src/parser/ast.rs | 2 +- src/parser/expression/binary.rs | 83 +++++++++++++++++++++++++++++++ src/parser/expression/literals.rs | 16 +++--- src/parser/expression/mod.rs | 38 ++++++++------ src/parser/lookup.rs | 11 ++-- src/parser/macros.rs | 44 ++++++++++++++-- src/parser/mod.rs | 12 ++++- src/parser/statement/enums.rs | 14 ++++++ src/parser/statement/mod.rs | 19 +++++-- test.som | 5 +- 12 files changed, 228 insertions(+), 42 deletions(-) create mode 100644 src/parser/expression/binary.rs create mode 100644 src/parser/statement/enums.rs diff --git a/src/diagnostic.rs b/src/diagnostic.rs index e7bd03d..0906309 100644 --- a/src/diagnostic.rs +++ b/src/diagnostic.rs @@ -113,6 +113,15 @@ impl<'a> Snippet<'a> { Snippet::new(file_id, Label::Primary, position, length, message) } + pub fn primary_from_token(token: &Token<'a>, message: impl Into) -> Snippet<'a> { + Snippet::primary( + token.range.file_id, + token.range.position, + token.range.length, + message, + ) + } + pub fn secondary( file_id: impl Into<&'a str>, position: usize, @@ -122,6 +131,15 @@ impl<'a> Snippet<'a> { Snippet::new(file_id, Label::Secondary, position, length, message) } + pub fn secondary_from_token(token: &Token<'a>, message: impl Into) -> Snippet<'a> { + Snippet::secondary( + token.range.file_id, + token.range.position, + token.range.length, + message, + ) + } + pub fn new( file_id: impl Into<&'a str>, label: Label, diff --git a/src/main.rs b/src/main.rs index ff39243..582f999 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,12 +27,16 @@ fn main() -> Result<()> { let scanner = scanner::Scanner::new(&files); let scanner_pass = scanner.parse(); - //sscanner_pass.print_diagnostics(&files); + // scanner_pass.print_diagnostics(&files); let mut parser = parser::Parser::new(&scanner_pass.result); - let parser_pass = parser.parse(); + let parser_pass = parser.parse().unwrap(); parser.print_diagnostics(&files); + let transpiler = BendTranspiler::transpile(&parser_pass); + + println!("{}", transpiler); + Ok(()) } diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 56abcdd..3bbbb9f 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::{diagnostic::Range, scanner::lexeme::Token}; +use crate::scanner::lexeme::Token; #[derive(Debug, Clone, PartialEq)] pub enum Symbol<'a> { diff --git a/src/parser/expression/binary.rs b/src/parser/expression/binary.rs new file mode 100644 index 0000000..c54230d --- /dev/null +++ b/src/parser/expression/binary.rs @@ -0,0 +1,83 @@ +use crate::parser::{ + ast::{BinaryOperation, Expression}, + lookup::{BindingPower, Lookup}, + macros::expect_token, + ParseResult, Parser, +}; + +pub fn register(lookup: &mut Lookup) { + use crate::scanner::lexeme::TokenType; + + lookup.add_left_expression_handler(TokenType::Plus, BindingPower::Additive, parse_addative); + lookup.add_left_expression_handler(TokenType::Minus, BindingPower::Additive, parse_subtractive); + lookup.add_left_expression_handler( + TokenType::Star, + BindingPower::Multiplicative, + parse_multiplicative, + ); + lookup.add_left_expression_handler( + TokenType::Slash, + BindingPower::Multiplicative, + parse_dividing, + ); +} + +fn parse_addative<'a>( + parser: &mut Parser<'a>, + left: Expression, + binding_power: BindingPower, +) -> ParseResult<'a, Expression> { + expect_token!(parser, Plus)?; + let right = super::parse(parser, binding_power)?; + + Ok(Expression::Binary( + Box::new(left), + BinaryOperation::Plus, + Box::new(right), + )) +} + +fn parse_subtractive<'a>( + parser: &mut Parser<'a>, + left: Expression, + binding_power: BindingPower, +) -> ParseResult<'a, Expression> { + expect_token!(parser, Minus)?; + let right = super::parse(parser, binding_power)?; + + Ok(Expression::Binary( + Box::new(left), + BinaryOperation::Minus, + Box::new(right), + )) +} + +fn parse_multiplicative<'a>( + parser: &mut Parser<'a>, + left: Expression, + binding_power: BindingPower, +) -> ParseResult<'a, Expression> { + expect_token!(parser, Star)?; + let right = super::parse(parser, binding_power)?; + + Ok(Expression::Binary( + Box::new(left), + BinaryOperation::Times, + Box::new(right), + )) +} + +fn parse_dividing<'a>( + parser: &mut Parser<'a>, + left: Expression, + binding_power: BindingPower, +) -> ParseResult<'a, Expression> { + expect_token!(parser, Slash)?; + let right = super::parse(parser, binding_power)?; + + Ok(Expression::Binary( + Box::new(left), + BinaryOperation::Divide, + Box::new(right), + )) +} diff --git a/src/parser/expression/literals.rs b/src/parser/expression/literals.rs index 353fb8a..0bae4ac 100644 --- a/src/parser/expression/literals.rs +++ b/src/parser/expression/literals.rs @@ -16,37 +16,37 @@ pub(crate) fn register(lookup: &mut Lookup) { .add_expression_handler(TokenType::Boolean, parse_boolean); } -fn parse_decimal<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { +fn parse_decimal<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { let decimal = expect_token!(parser, Decimal)?; - let decimal = expect_value!(decimal, Decimal).clone(); + let decimal = *expect_value!(decimal, Decimal); Ok(Expression::Number(decimal)) } -fn parse_integer<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { +fn parse_integer<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { let integer = expect_token!(parser, Integer)?; - let integer = expect_value!(integer, Integer).clone(); + let integer = *expect_value!(integer, Integer); Ok(Expression::Number(integer as f64)) } -fn parse_string<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { +fn parse_string<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { let string = expect_token!(parser, String)?; let string = expect_value!(string, String).clone(); Ok(Expression::String(string)) } -fn parse_identifier<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { +fn parse_identifier<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { let identifier = expect_token!(parser, Identifier)?; let identifier = expect_value!(identifier, Identifier).clone(); Ok(Expression::Identifier(identifier)) } -fn parse_boolean<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { +fn parse_boolean<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { let boolean = expect_token!(parser, Boolean)?; - let boolean = expect_value!(boolean, Boolean).clone(); + let boolean = *expect_value!(boolean, Boolean); Ok(Expression::Boolean(boolean)) } diff --git a/src/parser/expression/mod.rs b/src/parser/expression/mod.rs index b1a261e..26eff90 100644 --- a/src/parser/expression/mod.rs +++ b/src/parser/expression/mod.rs @@ -1,19 +1,24 @@ -use crate::diagnostic::Diagnostic; - use super::{ast::Expression, lookup::BindingPower, macros::expect_token, ParseResult, Parser}; -use std::collections::HashSet; +use crate::diagnostic::{Diagnostic, Snippet}; +pub mod binary; pub mod literals; pub fn parse<'a>( parser: &mut Parser<'a>, - binding_power: &BindingPower, + binding_power: BindingPower, ) -> ParseResult<'a, Expression> { + let token = expect_token!(parser)?; + let expression_handler = parser .lookup .expression_lookup - .get(&parser.peek().unwrap().token_type) - .ok_or(Diagnostic::error("P0001", "Expected a new expression"))?; + .get(&token.token_type) + .ok_or( + Diagnostic::error("P0001", "Expected a new expression").with_snippet( + Snippet::primary_from_token(token, "Expected an expression to start here"), + ), + )?; let mut left_hand_side = expression_handler(parser)?; @@ -23,8 +28,9 @@ pub fn parse<'a>( let token_binding_power = parser .lookup .binding_power_lookup - .get(&token.token_type) - .unwrap_or(&BindingPower::None); + .get(&parser.peek().unwrap().token_type) + .copied() + .unwrap_or_default(); if binding_power > token_binding_power { break; @@ -42,13 +48,17 @@ pub fn parse<'a>( Ok(left_hand_side) } -pub fn parse_addative<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { - let plus = expect_token!(parser, Plus)?; - let left = parse(parser, &BindingPower::Additive)?; +pub(crate) fn register(lookup: &mut super::lookup::Lookup) { + literals::register(lookup); + binary::register(lookup); - todo!() + lookup.add_expression_handler(crate::scanner::lexeme::TokenType::ParenOpen, parse_grouping); } -pub(crate) fn register(lookup: &mut super::lookup::Lookup) { - literals::register(lookup); +fn parse_grouping<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { + expect_token!(parser, ParenOpen)?; + let expression = parse(parser, BindingPower::None)?; + expect_token!(parser, ParenClose)?; + + Ok(Expression::Grouping(Box::new(expression))) } diff --git a/src/parser/lookup.rs b/src/parser/lookup.rs index 584ceca..dc0653b 100644 --- a/src/parser/lookup.rs +++ b/src/parser/lookup.rs @@ -1,13 +1,14 @@ use super::{ - ast::{BinaryOperation, Expression, Statement, Type}, + ast::{Expression, Statement, Type}, ParseResult, Parser, }; -use crate::scanner::lexeme::{TokenType, TokenValue}; +use crate::scanner::lexeme::TokenType; use core::panic; use std::collections::HashMap; -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Default, Debug, Clone, Copy, PartialEq, PartialOrd)] pub enum BindingPower { + #[default] None = 0, Comma = 1, Assignment = 2, @@ -22,11 +23,11 @@ pub enum BindingPower { } pub type TypeHandler<'a> = fn(&mut Parser<'a>) -> ParseResult<'a, Type>; -pub type LeftTypeHandler<'a> = fn(&mut Parser<'a>, Type, &BindingPower) -> ParseResult<'a, Type>; +pub type LeftTypeHandler<'a> = fn(&mut Parser<'a>, Type, BindingPower) -> ParseResult<'a, Type>; pub type StatementHandler<'a> = fn(&mut Parser<'a>) -> ParseResult<'a, Statement>; pub type ExpressionHandler<'a> = fn(&mut Parser<'a>) -> ParseResult<'a, Expression>; pub type LeftExpressionHandler<'a> = - fn(&mut Parser<'a>, Expression, &BindingPower) -> ParseResult<'a, Expression>; + fn(&mut Parser<'a>, Expression, BindingPower) -> ParseResult<'a, Expression>; pub struct Lookup<'a> { pub statement_lookup: HashMap>, diff --git a/src/parser/macros.rs b/src/parser/macros.rs index 137c0a5..aedd033 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -6,12 +6,48 @@ macro_rules! expect_token { if let Some(TokenType::$token) = $parser.peek().map(|t| &t.token_type) { Ok($parser.consume().unwrap()) } else { - Err(Diagnostic::error( - "expected_token", - format!("Expected token {:?}", TokenType::$token), - )) + let token = $parser.peek().unwrap_or($parser.tokens.last().unwrap()); + let position = if $parser.peek().is_none() { + token.range.position + token.range.length + } else { + token.range.position + }; + + Err( + Diagnostic::error("expected_token", format!("Expected {}", TokenType::$token)) + .with_snippet(crate::diagnostic::Snippet::primary( + token.range.file_id, + position, + 1, + format!("Expected {} here", TokenType::$token), + )) + .with_note(format!( + "Expected {}, but got {} instead", + TokenType::$token, + token.token_type + )), + ) } }}; + + ($parser:ident) => { + if let Some(token) = $parser.peek() { + Ok(token) + } else { + let token = $parser.tokens.last().unwrap(); + + Err( + Diagnostic::error("expected_token", "Unexpected end of file") + .with_snippet(crate::diagnostic::Snippet::primary( + token.range.file_id, + token.range.position + token.range.length, + 1, + "Unexpected end of file", + )) + .with_note("Expected more code, but reached the end of the file"), + ) + } + }; } macro_rules! expect_value { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 30bb14d..5ebab73 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -35,14 +35,24 @@ impl<'a> Parser<'a> { pub fn parse(&mut self) -> ParseResult<'a, Symbol<'a>> { let mut statements = Vec::new(); + let mut panic_mode = false; while self.has_tokens() { match statement::parse(self) { Ok(statement) => { + if panic_mode { + panic_mode = false; + } + statements.push(statement); } Err(diagnostic) => { - self.diagnostics.insert(diagnostic); + if !panic_mode { + self.diagnostics.insert(diagnostic); + } + + self.consume(); + panic_mode = true; } } } diff --git a/src/parser/statement/enums.rs b/src/parser/statement/enums.rs new file mode 100644 index 0000000..f289f81 --- /dev/null +++ b/src/parser/statement/enums.rs @@ -0,0 +1,14 @@ +use crate::{ + parser::{ast::Statement, lookup::Lookup, macros::expect_token, ParseResult, Parser}, + scanner::lexeme::TokenType, +}; + +pub fn register(lookup: &mut Lookup) { + lookup.add_statement_handler(TokenType::Enum, parse_enum); +} + +fn parse_enum<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Statement> { + expect_token!(parser, Enum)?; + + todo!() +} diff --git a/src/parser/statement/mod.rs b/src/parser/statement/mod.rs index 6f28c36..c1354f6 100644 --- a/src/parser/statement/mod.rs +++ b/src/parser/statement/mod.rs @@ -1,4 +1,8 @@ -use super::{ast::Statement, expression, macros::expect_token, ParseResult, Parser}; +use super::{ + ast::Statement, expression, lookup::BindingPower, macros::expect_token, ParseResult, Parser, +}; + +pub mod enums; pub fn parse<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Statement> { let statement_handler = parser @@ -7,13 +11,22 @@ pub fn parse<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Statement> { .get(&parser.peek().unwrap().token_type); match statement_handler { - Some(handler) => handler(parser), + Some(handler) => { + println!("Using statement handler"); + handler(parser) + } None => parse_expression(parser), } } +pub fn register(lookup: &mut super::lookup::Lookup) { + enums::register(lookup); +} + fn parse_expression<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Statement> { - let expression = expression::parse(parser)?; + println!("Parsing expression"); + + let expression = expression::parse(parser, BindingPower::None)?; expect_token!(parser, Semicolon)?; Ok(Statement::Expression(expression)) diff --git a/test.som b/test.som index 40eb8ca..ad4e08e 100644 --- a/test.som +++ b/test.som @@ -1,4 +1 @@ - -struct test2 - cheese ~ [int] - cheese ~ string +enum;