From c9beb7f5d1a0d86407b291d284c7f5119dccaa26 Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 9 Jul 2024 12:12:31 +0200 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20(parser):=20Refactor=20par?= =?UTF-8?q?ser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit semver: chore --- src/diagnostic.rs | 160 ++++++++++++++++--------- src/main.rs | 26 +---- src/parser/ast.rs | 3 +- src/parser/expression.rs | 142 ----------------------- src/parser/expression/literals.rs | 52 +++++++++ src/parser/expression/mod.rs | 54 +++++++++ src/parser/lookup.rs | 187 +++++++----------------------- src/parser/macros.rs | 149 +++--------------------- src/parser/mod.rs | 125 ++++++++++---------- src/parser/statement.rs | 162 -------------------------- src/parser/statement/mod.rs | 20 ++++ src/parser/typing.rs | 137 ---------------------- src/scanner/lexeme.rs | 12 ++ src/scanner/mod.rs | 130 ++++++++++++++------- src/transpiler/bend.rs | 1 + test.som | 5 +- 16 files changed, 467 insertions(+), 898 deletions(-) delete mode 100644 src/parser/expression.rs create mode 100644 src/parser/expression/literals.rs create mode 100644 src/parser/expression/mod.rs delete mode 100644 src/parser/statement.rs create mode 100644 src/parser/statement/mod.rs delete mode 100644 src/parser/typing.rs diff --git a/src/diagnostic.rs b/src/diagnostic.rs index 207faaa..e7bd03d 100644 --- a/src/diagnostic.rs +++ b/src/diagnostic.rs @@ -1,28 +1,116 @@ -use crate::scanner::lexeme::Token; +use std::collections::HashSet; + +use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; + +use crate::{files::Files, scanner::lexeme::Token}; + +pub struct PassResult<'a, T> { + pub result: T, + pub diagnostics: HashSet>, +} + +impl<'a, T> PassResult<'a, T> { + pub fn new(result: T, diagnostics: HashSet>) -> Self { + Self { + result, + diagnostics, + } + } +} #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Diagnostic<'a> { pub severity: Severity, + pub code: String, pub title: String, - pub errors: Vec>, + pub snippets: Vec>, + pub notes: Vec, +} + +impl<'a> Diagnostic<'a> { + pub fn error(code: impl Into, message: impl Into) -> Diagnostic<'a> { + Diagnostic::new(Severity::Error, code, message) + } + + pub fn warning(code: impl Into, message: impl Into) -> Diagnostic<'a> { + Diagnostic::new(Severity::Warning, code, message) + } + + pub fn note(code: impl Into, message: impl Into) -> Diagnostic<'a> { + Diagnostic::new(Severity::Note, code, message) + } + + pub fn help(code: impl Into, message: impl Into) -> Diagnostic<'a> { + Diagnostic::new(Severity::Help, code, message) + } + + pub fn new( + severity: Severity, + code: impl Into, + message: impl Into, + ) -> Diagnostic<'a> { + Diagnostic { + severity, + title: message.into(), + snippets: vec![], + code: code.into(), + notes: vec![], + } + } + + pub fn with_snippet(mut self, snippet: Snippet<'a>) -> Self { + self.snippets.push(snippet); + self + } + + pub fn with_snippets(mut self, snippets: impl IntoIterator>) -> Self { + self.snippets.extend(snippets); + self + } + + pub fn with_note(mut self, note: impl Into) -> Self { + self.notes.push(note.into()); + self + } + + pub fn with_notes(mut self, notes: impl IntoIterator>) -> Self { + self.notes.extend(notes.into_iter().map(|note| note.into())); + self + } + + pub(crate) fn transform_range(mut self, tokens: &'a [Token<'a>]) -> Diagnostic<'a> { + self.snippets = self + .snippets + .into_iter() + .map(|snippet| { + let range = snippet.range.to_source_code_range(tokens); + Snippet { + message: snippet.message, + label: snippet.label, + range, + } + }) + .collect(); + + self + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Error<'a> { +pub struct Snippet<'a> { pub message: String, pub label: Label, pub range: Range<'a>, - pub notes: Vec, } -impl<'a> Error<'a> { +impl<'a> Snippet<'a> { pub fn primary( file_id: impl Into<&'a str>, position: usize, length: usize, message: impl Into, - ) -> Error<'a> { - Error::new(file_id, Label::Primary, position, length, message) + ) -> Snippet<'a> { + Snippet::new(file_id, Label::Primary, position, length, message) } pub fn secondary( @@ -30,8 +118,8 @@ impl<'a> Error<'a> { position: usize, length: usize, message: impl Into, - ) -> Error<'a> { - Error::new(file_id, Label::Secondary, position, length, message) + ) -> Snippet<'a> { + Snippet::new(file_id, Label::Secondary, position, length, message) } pub fn new( @@ -40,8 +128,8 @@ impl<'a> Error<'a> { position: usize, length: usize, message: impl Into, - ) -> Error<'a> { - Error { + ) -> Snippet<'a> { + Snippet { message: message.into(), label, range: Range { @@ -49,19 +137,8 @@ impl<'a> Error<'a> { position, length, }, - notes: vec![], } } - - pub fn with_note(mut self, note: impl Into) -> Self { - self.notes.push(note.into()); - self - } - - pub(crate) fn transform_range(mut self, lexemes: &'a [Token<'a>]) -> Error { - self.range = self.range.to_source_code_range(lexemes); - self - } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -89,51 +166,24 @@ impl From for codespan_reporting::diagnostic::Severity { } } -impl<'a> Diagnostic<'a> { - pub fn error(message: impl Into) -> Diagnostic<'a> { - Diagnostic::new(Severity::Error, message) - } - - pub fn warning(message: impl Into) -> Diagnostic<'a> { - Diagnostic::new(Severity::Warning, message) - } - - pub fn new(severity: Severity, message: impl Into) -> Diagnostic<'a> { - Diagnostic { - severity, - title: message.into(), - errors: vec![], - } - } - - pub fn with_error(mut self, error: Error<'a>) -> Self { - self.errors.push(error); - self - } -} - impl<'a> From> for codespan_reporting::diagnostic::Diagnostic<&'a str> { fn from(val: Diagnostic<'a>) -> Self { codespan_reporting::diagnostic::Diagnostic::<&'a str>::new(val.severity.into()) .with_message(val.title) .with_labels( - val.errors + val.snippets .clone() .into_iter() - .map(|error| error.into()) + .map(|snippet| snippet.into()) .collect(), ) - .with_notes( - val.errors - .iter() - .flat_map(|e| e.notes.clone()) - .collect::>(), - ) + .with_notes(val.notes) + .with_code(val.code) } } -impl<'a> From> for codespan_reporting::diagnostic::Label<&'a str> { - fn from(val: Error<'a>) -> Self { +impl<'a> From> for codespan_reporting::diagnostic::Label<&'a str> { + fn from(val: Snippet<'a>) -> Self { codespan_reporting::diagnostic::Label::new( val.label.into(), val.range.file_id, diff --git a/src/main.rs b/src/main.rs index a166870..ff39243 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,30 +25,14 @@ fn main() -> Result<()> { files.insert(file, source); let scanner = scanner::Scanner::new(&files); - let (tokens, scanner_diagnostics) = scanner.parse(); + let scanner_pass = scanner.parse(); - print_diagnostics(scanner_diagnostics, &files); + //sscanner_pass.print_diagnostics(&files); - let mut parser = parser::Parser::new(&tokens); - let (ast, parser_diagnostics) = parser.parse(); + let mut parser = parser::Parser::new(&scanner_pass.result); + let parser_pass = parser.parse(); - print_diagnostics(parser_diagnostics, &files); + parser.print_diagnostics(&files); Ok(()) } - -fn print_diagnostics(diagnostics: HashSet, files: &Files) { - for diagnostic in diagnostics.iter() { - println!("{:?}", diagnostic); - } - - let diagnostics: Vec> = - diagnostics.iter().map(|d| d.clone().into()).collect(); - - let writer = StandardStream::stderr(ColorChoice::Auto); - let config = codespan_reporting::term::Config::default(); - - for diagnostic in diagnostics { - term::emit(&mut writer.lock(), &config, files, &diagnostic).unwrap(); - } -} diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 1e63451..56abcdd 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::scanner::lexeme::Token; +use crate::{diagnostic::Range, scanner::lexeme::Token}; #[derive(Debug, Clone, PartialEq)] pub enum Symbol<'a> { @@ -15,6 +15,7 @@ pub enum Expression { Number(f64), String(String), Identifier(String), + Boolean(bool), Unary(UnaryOperation, Box), Binary(Box, BinaryOperation, Box), Grouping(Box), diff --git a/src/parser/expression.rs b/src/parser/expression.rs deleted file mode 100644 index 1c1ee6a..0000000 --- a/src/parser/expression.rs +++ /dev/null @@ -1,142 +0,0 @@ -use super::{ - ast::{Expression, UnaryOperation}, - lookup::BindingPower, - macros::{expect_expression, expect_token_value, expect_tokens, expect_valid_token}, - Parser, -}; -use crate::{ - diagnostic::Error, - parser::macros::expect_any_token, - scanner::lexeme::{Token, TokenType, TokenValue}, -}; -use std::collections::HashMap; - -pub fn parse<'a>( - parser: &'a Parser<'a>, - cursor: usize, - binding_power: &BindingPower, -) -> Result<(Expression, usize), Vec>> { - let mut cursor = cursor; - let (token, range) = expect_valid_token!(parser, cursor)?; - let expression_handler = parser - .lookup - .expression_lookup - .get(&token.token_type) - .ok_or(vec![Error::primary( - range.file_id, - cursor, - range.length, - "Expected a new expression", - )])?; - - let (mut left_hand_side, new_cursor) = expression_handler(parser, cursor)?; - - cursor = new_cursor; - - while let Some(token) = parser.tokens.get(cursor) { - let token_binding_power = parser - .lookup - .binding_power_lookup - .get(&token.token_type) - .unwrap_or(&BindingPower::None); - - if binding_power > token_binding_power { - break; - } - - let left_expression_handler = - match parser.lookup.left_expression_lookup.get(&token.token_type) { - Some(handler) => handler, - None => break, - }; - - let (right_hand_side, new_cursor) = - left_expression_handler(parser, cursor, left_hand_side, token_binding_power)?; - - cursor = new_cursor; - left_hand_side = right_hand_side; - } - - Ok((left_hand_side, cursor)) -} - -pub fn parse_assignment<'a>( - parser: &'a Parser<'a>, - cursor: usize, - lhs: Expression, - binding_power: &BindingPower, -) -> Result<(Expression, usize), Vec>> { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Equal)?; - let (rhs, cursor) = expect_expression!(parser, cursor, binding_power)?; - - Ok((Expression::Assignment(Box::new(lhs), Box::new(rhs)), cursor)) -} - -pub fn parse_unary<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Expression, usize), Vec>> { - let (token, cursor) = expect_any_token!(parser, cursor, TokenType::Minus, TokenType::Not)?; - match token.token_type { - TokenType::Minus => { - let (expression, cursor) = expect_expression!(parser, cursor, &BindingPower::Unary)?; - Ok(( - Expression::Unary(UnaryOperation::Negate, Box::new(expression)), - cursor, - )) - } - TokenType::Not => { - let (expression, cursor) = expect_expression!(parser, cursor, &BindingPower::Unary)?; - Ok(( - Expression::Unary(UnaryOperation::Inverse, Box::new(expression)), - cursor, - )) - } - _ => unreachable!(), - } -} - -pub fn parse_struct_initializer<'a>( - parser: &'a Parser<'a>, - cursor: usize, - lhs: Expression, - binding_power: &BindingPower, -) -> Result<(Expression, usize), Vec>> { - let identifier = match lhs { - Expression::Identifier(identifier) => identifier.clone(), - _ => { - unreachable!() - } - }; - - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::CurlyOpen)?; - - let mut members = HashMap::new(); - let mut new_cursor = cursor; - - while let Some(token) = parser.tokens.get(new_cursor) { - if token.token_type == TokenType::CurlyClose { - break; - } - - if !members.is_empty() { - let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Comma)?; - new_cursor = cursor; - } - - let (tokens, cursor) = - expect_tokens!(parser, new_cursor, TokenType::Identifier, TokenType::Colon)?; - - let identifier = expect_token_value!(tokens[0], TokenValue::Identifier); - - let (expression, cursor) = expect_expression!(parser, cursor, binding_power)?; - - members.insert(identifier, expression); - - new_cursor = cursor; - } - - let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::CurlyClose)?; - - Ok((Expression::StructInitializer(identifier, members), cursor)) -} diff --git a/src/parser/expression/literals.rs b/src/parser/expression/literals.rs new file mode 100644 index 0000000..353fb8a --- /dev/null +++ b/src/parser/expression/literals.rs @@ -0,0 +1,52 @@ +use crate::parser::{ + ast::Expression, + lookup::Lookup, + macros::{expect_token, expect_value}, + ParseResult, Parser, +}; + +pub(crate) fn register(lookup: &mut Lookup) { + use crate::scanner::lexeme::TokenType; + + lookup + .add_expression_handler(TokenType::Decimal, parse_decimal) + .add_expression_handler(TokenType::Integer, parse_integer) + .add_expression_handler(TokenType::String, parse_string) + .add_expression_handler(TokenType::Identifier, parse_identifier) + .add_expression_handler(TokenType::Boolean, parse_boolean); +} + +fn parse_decimal<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { + let decimal = expect_token!(parser, Decimal)?; + let decimal = expect_value!(decimal, Decimal).clone(); + + Ok(Expression::Number(decimal)) +} + +fn parse_integer<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { + let integer = expect_token!(parser, Integer)?; + let integer = expect_value!(integer, Integer).clone(); + + Ok(Expression::Number(integer as f64)) +} + +fn parse_string<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { + let string = expect_token!(parser, String)?; + let string = expect_value!(string, String).clone(); + + Ok(Expression::String(string)) +} + +fn parse_identifier<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { + let identifier = expect_token!(parser, Identifier)?; + let identifier = expect_value!(identifier, Identifier).clone(); + + Ok(Expression::Identifier(identifier)) +} + +fn parse_boolean<'a>(parser: &mut Parser) -> ParseResult<'a, Expression> { + let boolean = expect_token!(parser, Boolean)?; + let boolean = expect_value!(boolean, Boolean).clone(); + + Ok(Expression::Boolean(boolean)) +} diff --git a/src/parser/expression/mod.rs b/src/parser/expression/mod.rs new file mode 100644 index 0000000..b1a261e --- /dev/null +++ b/src/parser/expression/mod.rs @@ -0,0 +1,54 @@ +use crate::diagnostic::Diagnostic; + +use super::{ast::Expression, lookup::BindingPower, macros::expect_token, ParseResult, Parser}; +use std::collections::HashSet; + +pub mod literals; + +pub fn parse<'a>( + parser: &mut Parser<'a>, + binding_power: &BindingPower, +) -> ParseResult<'a, Expression> { + let expression_handler = parser + .lookup + .expression_lookup + .get(&parser.peek().unwrap().token_type) + .ok_or(Diagnostic::error("P0001", "Expected a new expression"))?; + + let mut left_hand_side = expression_handler(parser)?; + + while parser.has_tokens() { + let token = parser.peek().unwrap(); + + let token_binding_power = parser + .lookup + .binding_power_lookup + .get(&token.token_type) + .unwrap_or(&BindingPower::None); + + if binding_power > token_binding_power { + break; + } + + let left_expression_handler = + match parser.lookup.left_expression_lookup.get(&token.token_type) { + Some(handler) => handler, + None => break, + }; + + left_hand_side = left_expression_handler(parser, left_hand_side, token_binding_power)?; + } + + Ok(left_hand_side) +} + +pub fn parse_addative<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Expression> { + let plus = expect_token!(parser, Plus)?; + let left = parse(parser, &BindingPower::Additive)?; + + todo!() +} + +pub(crate) fn register(lookup: &mut super::lookup::Lookup) { + literals::register(lookup); +} diff --git a/src/parser/lookup.rs b/src/parser/lookup.rs index 6d2611c..584ceca 100644 --- a/src/parser/lookup.rs +++ b/src/parser/lookup.rs @@ -1,14 +1,8 @@ use super::{ ast::{BinaryOperation, Expression, Statement, Type}, - expression, - macros::{expect_expression, expect_tokens}, - statement, typing, Parser, -}; -use crate::{ - diagnostic::Error, - parser::macros::expect_token_value, - scanner::lexeme::{TokenType, TokenValue}, + ParseResult, Parser, }; +use crate::scanner::lexeme::{TokenType, TokenValue}; use core::panic; use std::collections::HashMap; @@ -27,19 +21,12 @@ pub enum BindingPower { Primary = 10, } -pub type TypeHandler<'a> = fn(&'a Parser<'a>, usize) -> Result<(Type, usize), Vec>>; -pub type LeftTypeHandler<'a> = - fn(&'a Parser<'a>, usize, Type, &BindingPower) -> Result<(Type, usize), Vec>>; -pub type StatementHandler<'a> = - fn(&'a Parser<'a>, usize) -> Result<(Statement, usize), Vec>>; -pub type ExpressionHandler<'a> = - fn(&'a Parser<'a>, usize) -> Result<(Expression, usize), Vec>>; -pub type LeftExpressionHandler<'a> = fn( - &'a Parser<'a>, - usize, - Expression, - &BindingPower, -) -> Result<(Expression, usize), Vec>>; +pub type TypeHandler<'a> = fn(&mut Parser<'a>) -> ParseResult<'a, Type>; +pub type LeftTypeHandler<'a> = fn(&mut Parser<'a>, Type, &BindingPower) -> ParseResult<'a, Type>; +pub type StatementHandler<'a> = fn(&mut Parser<'a>) -> ParseResult<'a, Statement>; +pub type ExpressionHandler<'a> = fn(&mut Parser<'a>) -> ParseResult<'a, Expression>; +pub type LeftExpressionHandler<'a> = + fn(&mut Parser<'a>, Expression, &BindingPower) -> ParseResult<'a, Expression>; pub struct Lookup<'a> { pub statement_lookup: HashMap>, @@ -55,55 +42,72 @@ impl<'a> Lookup<'a> { &mut self, token: TokenType, handler: StatementHandler<'a>, - ) { + ) -> &mut Self { if self.statement_lookup.contains_key(&token) { panic!("Token already has a statement handler"); } self.statement_lookup.insert(token, handler); + + self } pub(crate) fn add_expression_handler( &mut self, - token: TokenType, + token_type: TokenType, handler: ExpressionHandler<'a>, - ) { - if self.expression_lookup.contains_key(&token) { + ) -> &mut Self { + if self.expression_lookup.contains_key(&token_type) { panic!("Token already has an expression handler"); } - self.expression_lookup.insert(token, handler); + self.expression_lookup.insert(token_type, handler); + + self } pub(crate) fn add_left_expression_handler( &mut self, - token: TokenType, + token_type: TokenType, binding_power: BindingPower, handler: LeftExpressionHandler<'a>, - ) { - if self.binding_power_lookup.contains_key(&token) { + ) -> &mut Self { + if self.binding_power_lookup.contains_key(&token_type) { panic!("Token already has a binding power"); } - self.left_expression_lookup.insert(token.clone(), handler); - self.binding_power_lookup.insert(token, binding_power); + self.left_expression_lookup + .insert(token_type.clone(), handler); + self.binding_power_lookup.insert(token_type, binding_power); + + self } - pub(crate) fn add_type_handler(&mut self, token: TokenType, handler: TypeHandler<'a>) { - if self.type_lookup.contains_key(&token) { + pub(crate) fn add_type_handler( + &mut self, + token_type: TokenType, + handler: TypeHandler<'a>, + ) -> &mut Self { + if self.type_lookup.contains_key(&token_type) { panic!("Token already has a type handler"); } - self.type_lookup.insert(token, handler); + self.type_lookup.insert(token_type, handler); + + self } #[allow(dead_code)] - pub(crate) fn add_left_type_handler(&mut self, token: TokenType, handler: LeftTypeHandler<'a>) { - if self.left_type_lookup.contains_key(&token) { + pub(crate) fn add_left_type_handler( + &mut self, + token_type: TokenType, + handler: LeftTypeHandler<'a>, + ) { + if self.left_type_lookup.contains_key(&token_type) { panic!("Token already has a left type handler"); } - self.left_type_lookup.insert(token, handler); + self.left_type_lookup.insert(token_type, handler); } } @@ -118,114 +122,7 @@ impl<'a> Default for Lookup<'a> { left_type_lookup: HashMap::new(), }; - // Addative - lookup.add_left_expression_handler( - TokenType::Plus, - BindingPower::Additive, - |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Plus)?; - let (rhs, cursor) = expect_expression!(parser, cursor, &BindingPower::Additive)?; - Ok(( - Expression::Binary(Box::new(lhs), BinaryOperation::Plus, Box::new(rhs)), - cursor, - )) - }, - ); - - lookup.add_left_expression_handler( - TokenType::Minus, - BindingPower::Additive, - |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Minus)?; - let (rhs, cursor) = expect_expression!(parser, cursor, &BindingPower::Additive)?; - Ok(( - Expression::Binary(Box::new(lhs), BinaryOperation::Minus, Box::new(rhs)), - cursor, - )) - }, - ); - - // Multiplicative - lookup.add_left_expression_handler( - TokenType::Star, - BindingPower::Multiplicative, - |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Star)?; - let (rhs, cursor) = - expect_expression!(parser, cursor, &BindingPower::Multiplicative)?; - Ok(( - Expression::Binary(Box::new(lhs), BinaryOperation::Times, Box::new(rhs)), - cursor, - )) - }, - ); - - lookup.add_left_expression_handler( - TokenType::Slash, - BindingPower::Multiplicative, - |parser, cursor, lhs, _binding| { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Slash)?; - let (rhs, cursor) = - expect_expression!(parser, cursor, &BindingPower::Multiplicative)?; - Ok(( - Expression::Binary(Box::new(lhs), BinaryOperation::Divide, Box::new(rhs)), - cursor, - )) - }, - ); - - // Literals and symbols - lookup.add_expression_handler(TokenType::Decimal, |parser, cursor| { - let (decimal, cursor) = expect_tokens!(parser, cursor, TokenType::Decimal)?; - let decimal = expect_token_value!(decimal[0], TokenValue::Decimal); - Ok((Expression::Number(decimal), cursor)) - }); - - lookup.add_expression_handler(TokenType::Integer, |parser, cursor| { - let (integer, cursor) = expect_tokens!(parser, cursor, TokenType::Integer)?; - let integer = expect_token_value!(integer[0], TokenValue::Integer); - Ok((Expression::Number(integer as f64), cursor)) - }); - - lookup.add_expression_handler(TokenType::String, |parser, cursor| { - let (string, cursor) = expect_tokens!(parser, cursor, TokenType::String)?; - let string = expect_token_value!(string[0], TokenValue::String); - Ok((Expression::String(string), cursor)) - }); - - lookup.add_expression_handler(TokenType::Identifier, |parser, cursor| { - let (identifier, cursor) = expect_tokens!(parser, cursor, TokenType::Identifier)?; - let identifier = expect_token_value!(identifier[0], TokenValue::Identifier); - Ok((Expression::Identifier(identifier), cursor)) - }); - - lookup.add_expression_handler(TokenType::ParenOpen, |parser, cursor| { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::ParenOpen)?; - let (expression, cursor) = expect_expression!(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::ParenClose)?; - Ok((Expression::Grouping(Box::new(expression)), cursor)) - }); - - lookup.add_expression_handler(TokenType::Minus, expression::parse_unary); - lookup.add_expression_handler(TokenType::Not, expression::parse_unary); - lookup.add_statement_handler(TokenType::Let, statement::parse_declaration); - lookup.add_left_expression_handler( - TokenType::Equal, - BindingPower::Assignment, - expression::parse_assignment, - ); - - lookup.add_left_expression_handler( - TokenType::CurlyOpen, - BindingPower::Primary, - expression::parse_struct_initializer, - ); - lookup.add_type_handler(TokenType::Identifier, typing::parse_symbol); - lookup.add_type_handler(TokenType::SquareOpen, typing::parse_array); - lookup.add_type_handler(TokenType::CurlyOpen, typing::parse_tuple); - - lookup.add_statement_handler(TokenType::Struct, statement::parse_struct); - lookup.add_statement_handler(TokenType::Enum, statement::parse_enum); + super::expression::register(&mut lookup); lookup } diff --git a/src/parser/macros.rs b/src/parser/macros.rs index 3c38a5f..137c0a5 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -1,144 +1,29 @@ -#[allow(unused_macros)] -macro_rules! expect_statement { - ($parser:expr, $cursor:expr) => {{ - crate::parser::statement::parse($parser, $cursor) - }}; -} - -macro_rules! expect_expression { - ($parser:expr, $cursor:expr, $binding_power:expr) => {{ - crate::parser::expression::parse($parser, $cursor, &$binding_power) - }}; -} - -macro_rules! expect_type { - ($parser:expr, $cursor:expr, $binding_power:expr) => {{ - crate::parser::typing::parse($parser, $cursor, &$binding_power) - }}; -} - -macro_rules! expect_valid_token { - ($parser:expr, $cursor:expr) => {{ - let token = $parser.tokens.get($cursor); +macro_rules! expect_token { + ($parser:ident, $token:ident) => {{ + use crate::diagnostic::Diagnostic; + use crate::scanner::lexeme::TokenType; - match token { - Some(token) => Ok((token, &token.range)), - None => Err(vec![crate::diagnostic::Error::primary( - $parser.tokens.get(0).unwrap().range.file_id, - $cursor + 1, - 0, - "Unexpected end of file", - )]), - } - }}; -} - -// allows for multiple token types to be expected -// peek_any_token!(parser, cursor, TokenType::Plus, TokenType::Minus); -macro_rules! expect_any_token { - ($parser:expr, $cursor:expr, $($token_type:expr),*) => {{ - let expected_token_types = vec![$($token_type.to_string()),*]; - - let token = $parser.tokens.get($cursor); - - match token { - Some(token) => { - if expected_token_types.contains(&token.token_type.to_string()) { - Ok((token, $cursor + 1)) - } else { - Err(vec![crate::diagnostic::Error::primary( - token.range.file_id, - $cursor, - 1, - format!("Expected {}", expected_token_types.join(" or ")), - )]) - } - } - None => Err(vec![crate::diagnostic::Error::primary( - $parser.tokens.get(0).unwrap().range.file_id, - $cursor + 1, - 0, - "Unexpected end of file", - )]), + if let Some(TokenType::$token) = $parser.peek().map(|t| &t.token_type) { + Ok($parser.consume().unwrap()) + } else { + Err(Diagnostic::error( + "expected_token", + format!("Expected token {:?}", TokenType::$token), + )) } }}; } -#[allow(unused_macros)] -macro_rules! expect_optional_token { - ($parser:expr, $cursor:expr, $token_type:expr) => {{ - let result = expect_tokens!($parser, $cursor, $token_type); +macro_rules! expect_value { + ($token:expr, $value:ident) => {{ + use crate::scanner::lexeme::TokenValue; - match result { - Ok((token, cursor)) => (Some(token[0].clone()), cursor), - Err(_) => (None, $cursor), - } - }}; -} -macro_rules! expect_token_value { - ($token:expr, $value:path) => {{ match &$token.value { - $value(value) => value.clone(), + TokenValue::$value(value) => value, _ => panic!("expect_token_value! should only return identifiers"), } }}; } -macro_rules! expect_tokens { - ($parser:expr, $cursor:expr, $($token_type:expr),*) => {{ - let mut i = $cursor; - let mut tokens = Vec::new(); - - let mut invalid_indecies = Vec::new(); - - $( - let token: Option<&crate::scanner::lexeme::Token> = $parser.tokens.get(i); - - match token { - Some(token) => { - if token.token_type == $token_type { - tokens.push(token.clone()); - } else { - invalid_indecies.push((i, $token_type)); - } - } - _ => {} - }; - - i += 1; - )* - - if invalid_indecies.is_empty() { - Ok((tokens, i)) - } else { - - let mut errors = Vec::new(); - - for (invalid_index, expected_token_type) in invalid_indecies { - let actual_token = $parser.tokens.get(invalid_index).unwrap(); - - errors.push(crate::diagnostic::Error::primary( - $parser.tokens.get(0).unwrap().range.file_id, - $cursor + invalid_index, - 1, - format!("Expected {}", expected_token_type) - ).with_note( - format!("Expected {}, got {}", expected_token_type, actual_token.token_type) - )); - } - - Err(errors) - } - }}; -} - -pub(crate) use expect_any_token; -pub(crate) use expect_expression; -#[allow(unused_imports)] -pub(crate) use expect_optional_token; -#[allow(unused_imports)] -pub(crate) use expect_statement; -pub(crate) use expect_token_value; -pub(crate) use expect_tokens; -pub(crate) use expect_type; -pub(crate) use expect_valid_token; +pub(crate) use expect_token; +pub(crate) use expect_value; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 19eb69d..30bb14d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,8 +1,10 @@ use crate::{ - diagnostic::{Diagnostic, Error}, - scanner::lexeme::Token, + diagnostic::{Diagnostic, PassResult}, + files::Files, + scanner::lexeme::{Token, TokenType, TokenValue}, }; use ast::{Statement, Symbol}; +use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; use lookup::Lookup; use std::collections::HashSet; @@ -11,81 +13,77 @@ pub mod expression; pub mod lookup; pub mod macros; pub mod statement; -pub mod typing; + +pub type ParseResult<'a, T> = Result>; pub struct Parser<'a> { - lookup: Lookup<'a>, tokens: &'a [Token<'a>], + cursor: usize, + lookup: lookup::Lookup<'a>, + diagnostics: HashSet>, } impl<'a> Parser<'a> { - pub fn new(tokens: &'a Vec>) -> Self { + pub fn new(tokens: &'a [Token<'a>]) -> Self { Self { - lookup: Lookup::default(), tokens, + cursor: 0, + lookup: Lookup::default(), + diagnostics: HashSet::new(), } } - pub fn parse(&'a mut self) -> (Symbol, HashSet) { - let mut statements = vec![]; - let mut diagnostics = HashSet::new(); - let mut last_safe_cursor = 0; - let mut cursor = 0; - let mut panic_mode = false; - - while cursor < self.tokens.len() { - if panic_mode { - // Skip to the next valid statement - while let Some(token) = self.tokens.get(cursor) { - // Try to parse the next statement - if statement::parse(self, cursor).is_ok() { - // diagnostics.insert( - // Diagnostic::warning("Unparsed code").with_error( - // Error::primary( - // token.range.file_id, - // cursor, - // cursor - last_safe_cursor, - // "This code was not parsed", - // ) - // .with_note("This code was not parsed since it ") - // .transform_range(self.tokens), - // ), - // ); - - break; - }; - - cursor += 1; - } - } + pub fn parse(&mut self) -> ParseResult<'a, Symbol<'a>> { + let mut statements = Vec::new(); - match statement::parse(self, cursor) { - Ok((statement, new_cursor)) => { - cursor = new_cursor; - last_safe_cursor = new_cursor; + while self.has_tokens() { + match statement::parse(self) { + Ok(statement) => { statements.push(statement); } - Err(error) => { - let mut diagnostic = Diagnostic::error("Syntax error"); - - for error in error { - diagnostic = - diagnostic.with_error(error.clone().transform_range(self.tokens)); - } - - diagnostics.insert(diagnostic); - panic_mode = true; + Err(diagnostic) => { + self.diagnostics.insert(diagnostic); } - }; + } } - (Symbol::Statement(Statement::Block(statements)), diagnostics) + Ok(Symbol::Statement(Statement::Block(statements))) + } + + pub(crate) fn peek(&self) -> Option<&Token<'a>> { + self.tokens.get(self.cursor) + } + + pub(crate) fn consume(&mut self) -> Option<&Token<'a>> { + self.cursor += 1; + self.tokens.get(self.cursor - 1) + } + + pub(crate) fn peek_next(&self) -> Option<&Token<'a>> { + self.tokens.get(self.cursor + 1) + } + + pub(crate) fn has_tokens(&self) -> bool { + self.cursor < self.tokens.len() + } + + pub fn print_diagnostics(&self, files: &Files) { + let diagnostics: Vec> = + self.diagnostics.iter().map(|d| d.clone().into()).collect(); + + let writer = StandardStream::stderr(ColorChoice::Auto); + let config = codespan_reporting::term::Config::default(); + + for diagnostic in diagnostics { + codespan_reporting::term::emit(&mut writer.lock(), &config, files, &diagnostic) + .unwrap(); + } } } #[cfg(test)] mod tests { - use ast::{BinaryOperation, Expression}; + use ast::{BinaryOperation, Expression, Symbol}; use crate::{files::Files, scanner::Scanner}; @@ -210,11 +208,20 @@ mod tests { files.insert("test", code); let scanner = Scanner::new(&files); - let tokens = scanner.parse().0; + let scanner_pass = scanner.parse(); + + let mut parser = Parser::new(&scanner_pass.result); + let parse_pass = parser.parse(); - let mut parser = Parser::new(&tokens); - let parsed = parser.parse(); + parser.print_diagnostics(&files); - assert_eq!(parsed.0, expected); + match &parse_pass { + Ok(parsed) => { + assert_eq!(parsed, &expected); + } + Err(_) => { + panic!("Parser failed to parse the code"); + } + } } } diff --git a/src/parser/statement.rs b/src/parser/statement.rs deleted file mode 100644 index 4425ef0..0000000 --- a/src/parser/statement.rs +++ /dev/null @@ -1,162 +0,0 @@ -use super::{ - ast::Type, - expression, - lookup::BindingPower, - macros::{ - expect_any_token, expect_expression, expect_token_value, expect_tokens, expect_type, - expect_valid_token, - }, - Parser, Statement, -}; -use crate::{ - diagnostic::Error, - parser::macros::expect_optional_token, - scanner::lexeme::{Token, TokenType, TokenValue}, -}; -use core::panic; -use std::collections::{HashMap, HashSet}; - -pub fn parse<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Statement, usize), Vec>> { - let (token, _) = expect_valid_token!(parser, cursor)?; - let statement_handler = parser.lookup.statement_lookup.get(&token.token_type); - - match statement_handler { - Some(statement_handler) => statement_handler(parser, cursor), - None => parse_expression(parser, cursor), - } -} - -pub fn parse_expression<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Statement, usize), Vec>> { - let (expression, cursor) = expression::parse(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Semicolon)?; - - Ok((Statement::Expression(expression), cursor)) -} - -pub fn parse_declaration<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Statement, usize), Vec>> { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Let)?; - let (identifier, cursor) = expect_tokens!(parser, cursor, TokenType::Identifier)?; - let identifier = match &identifier[0].value { - TokenValue::Identifier(identifier) => identifier, - _ => panic!("expect_token! should return a valid token and handle the error case"), - }; - - let (token, _) = expect_any_token!(parser, cursor, TokenType::Colon, TokenType::Equal)?; - let (typing, cursor) = match token.token_type { - TokenType::Colon => { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Colon)?; - let (typing, cursor) = expect_type!(parser, cursor, BindingPower::None)?; - (Some(typing), cursor) - } - _ => (None, cursor), - }; - - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Equal)?; - let (expression, cursor) = expect_expression!(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Semicolon)?; - - Ok(( - Statement::Declaration(identifier.clone(), typing, expression), - cursor, - )) -} - -pub fn parse_struct<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Statement, usize), Vec>> { - let (tokens, cursor) = expect_tokens!( - parser, - cursor, - TokenType::Struct, - TokenType::Identifier, - TokenType::Colon - )?; - - let identifier = expect_token_value!(tokens[1], TokenValue::Identifier); - - let (indentation_open, cursor) = - expect_optional_token!(parser, cursor, TokenType::IndentationOpen); - - let mut new_cursor = cursor; - let mut members: HashMap = HashMap::new(); - - while let Some(token) = parser.tokens.get(new_cursor) { - let (member_name, member_type, cursor) = match token.token_type { - TokenType::Semicolon => break, - _ => { - let (tokens, cursor) = expect_tokens!( - parser, - new_cursor, - TokenType::Dot, - TokenType::Identifier, - TokenType::Colon - )?; - - let identifier = expect_token_value!(tokens[1], TokenValue::Identifier); - - let (field_type, cursor) = expect_type!(parser, cursor, BindingPower::None)?; - - (identifier, field_type, cursor) - } - }; - - // TODO: Handle warning for overwritten member - members.insert(member_name, member_type); - - new_cursor = cursor; - } - - let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Semicolon)?; - - Ok((Statement::Struct(identifier, members), cursor)) -} - -pub fn parse_enum<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Statement, usize), Vec>> { - let (tokens, cursor) = expect_tokens!( - parser, - cursor, - TokenType::Enum, - TokenType::Identifier, - TokenType::Colon - )?; - - let identifier = expect_token_value!(tokens[1], TokenValue::Identifier); - - let mut new_cursor = cursor; - let mut members: HashSet = HashSet::new(); - - while let Some(token) = parser.tokens.get(new_cursor) { - let (member_name, cursor) = match token.token_type { - TokenType::Semicolon => break, - _ => { - let (field_name, cursor) = - expect_tokens!(parser, new_cursor, TokenType::Identifier)?; - - let field_name = expect_token_value!(field_name[0], TokenValue::Identifier); - - (field_name, cursor) - } - }; - - new_cursor = cursor; - // TODO: Handle warning for overwritten members - members.insert(member_name); - } - - let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Semicolon)?; - - Ok((Statement::Enum(identifier, members), cursor)) -} diff --git a/src/parser/statement/mod.rs b/src/parser/statement/mod.rs new file mode 100644 index 0000000..6f28c36 --- /dev/null +++ b/src/parser/statement/mod.rs @@ -0,0 +1,20 @@ +use super::{ast::Statement, expression, macros::expect_token, ParseResult, Parser}; + +pub fn parse<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Statement> { + let statement_handler = parser + .lookup + .statement_lookup + .get(&parser.peek().unwrap().token_type); + + match statement_handler { + Some(handler) => handler(parser), + None => parse_expression(parser), + } +} + +fn parse_expression<'a>(parser: &mut Parser<'a>) -> ParseResult<'a, Statement> { + let expression = expression::parse(parser)?; + expect_token!(parser, Semicolon)?; + + Ok(Statement::Expression(expression)) +} diff --git a/src/parser/typing.rs b/src/parser/typing.rs deleted file mode 100644 index 160b786..0000000 --- a/src/parser/typing.rs +++ /dev/null @@ -1,137 +0,0 @@ -use std::collections::HashMap; - -use super::{ - ast::Type, - lookup::BindingPower, - macros::{expect_optional_token, expect_tokens, expect_type, expect_valid_token}, - Parser, -}; -use crate::{ - diagnostic::Error, - scanner::lexeme::{Token, TokenType, TokenValue}, -}; - -pub fn parse<'a>( - parser: &'a Parser<'a>, - cursor: usize, - binding_power: &BindingPower, -) -> Result<(Type, usize), Vec>> { - let mut cursor = cursor; - let (token, range) = expect_valid_token!(parser, cursor)?; - let type_handler = parser - .lookup - .type_lookup - .get(&token.token_type) - .ok_or(vec![Error::primary( - parser.tokens.get(cursor).unwrap().range.file_id, - cursor, - range.length, - "Expected a type", - )])?; - - let (mut left_hand_side, new_cursor) = type_handler(parser, cursor)?; - - cursor = new_cursor; - - while let Some(token) = parser.tokens.get(cursor) { - let token_binding_power = parser - .lookup - .binding_power_lookup - .get(&token.token_type) - .unwrap_or(&BindingPower::None); - - if binding_power > token_binding_power { - break; - } - - let left_type_handler = match parser.lookup.left_type_lookup.get(&token.token_type) { - Some(handler) => handler, - None => break, - }; - - let (right_hand_side, new_cursor) = - left_type_handler(parser, cursor, left_hand_side, token_binding_power)?; - - cursor = new_cursor; - left_hand_side = right_hand_side; - } - - Ok((left_hand_side, cursor)) -} - -pub fn parse_symbol<'a>( - parser: &'a Parser, - cursor: usize, -) -> Result<(Type, usize), Vec>> { - let (identifier, cursor) = expect_tokens!(parser, cursor, TokenType::Identifier)?; - let identifier = match &identifier[0].value { - TokenValue::Identifier(identifier) => identifier, - _ => panic!("expect_token! should only return identifiers"), - }; - Ok((Type::Symbol(identifier.clone()), cursor)) -} - -pub fn parse_array<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Type, usize), Vec>> { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::SquareOpen)?; - let (element_type, cursor) = expect_type!(parser, cursor, &BindingPower::None)?; - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::SquareClose)?; - Ok((Type::Array(Box::new(element_type)), cursor)) -} - -pub fn parse_tuple<'a>( - parser: &'a Parser<'a>, - cursor: usize, -) -> Result<(Type, usize), Vec>> { - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::CurlyOpen)?; - let mut new_cursor = cursor; - let mut members: HashMap = HashMap::new(); - - while let Some(token) = parser.tokens.get(new_cursor) { - let (member_name, member_type, cursor) = match token.token_type { - TokenType::CurlyClose => break, - _ => { - if !members.is_empty() { - let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::Comma)?; - new_cursor = cursor; - } - - let (colon, _) = expect_optional_token!(parser, new_cursor + 1, TokenType::Colon); - - match colon { - Some(_) => { - let (field_name, cursor) = - expect_tokens!(parser, new_cursor, TokenType::Identifier)?; - let field_name = match &field_name[0].value { - TokenValue::Identifier(field_name) => field_name.clone(), - _ => panic!("expect_token! should only return identifiers"), - }; - - let (_, cursor) = expect_tokens!(parser, cursor, TokenType::Colon)?; - let (field_type, cursor) = - expect_type!(parser, cursor, BindingPower::None)?; - - (field_name, field_type, cursor) - } - None => { - let field_name = members.len().to_string(); - let (field_type, cursor) = - expect_type!(parser, new_cursor, BindingPower::None)?; - (field_name, field_type, cursor) - } - } - } - }; - - // TODO: Check for duplicate member names - members.insert(member_name, member_type); - - new_cursor = cursor; - } - - let (_, cursor) = expect_tokens!(parser, new_cursor, TokenType::CurlyClose)?; - - Ok((Type::Tuple(members), cursor)) -} diff --git a/src/scanner/lexeme.rs b/src/scanner/lexeme.rs index be4a54f..137100d 100644 --- a/src/scanner/lexeme.rs +++ b/src/scanner/lexeme.rs @@ -84,6 +84,14 @@ pub enum TokenType { Colon, /// A semicolon; `;`. Semicolon, + /// A tilde; `~`. + Tilde, + /// A hash; `#`. + Hash, + /// A dollar sign; `$`. + Dollar, + /// A pipe; `|`. + Pipe, /// A plus sign; `+`; Plus, @@ -168,6 +176,10 @@ impl Display for TokenType { TokenType::Dot => write!(f, "`.`"), TokenType::Colon => write!(f, "`:`"), TokenType::Semicolon => write!(f, "`;`"), + TokenType::Tilde => write!(f, "`~`"), + TokenType::Hash => write!(f, "`#`"), + TokenType::Dollar => write!(f, "`$`"), + TokenType::Pipe => write!(f, "`|`"), TokenType::Plus => write!(f, "`+`"), TokenType::Minus => write!(f, "`-`"), TokenType::Slash => write!(f, "`/`"), diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index c088428..c05491f 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,13 +1,12 @@ -use std::cmp::Ordering; -use std::collections::HashSet; - use crate::diagnostic::Diagnostic; -use crate::diagnostic::Error; +use crate::diagnostic::PassResult; +use crate::diagnostic::Snippet; use crate::files::Files; use lexeme::Token; use lexeme::TokenType; use lexeme::TokenValue; use regex::Regex; +use std::collections::HashSet; pub mod lexeme; @@ -41,6 +40,10 @@ impl<'a> Scanner<'a> { (r!(r"(\.)"), |_| (TokenType::Dot, TokenValue::None)), (r!(r"(\:)"), |_| (TokenType::Colon, TokenValue::None)), (r!(r"(;)"), |_| (TokenType::Semicolon, TokenValue::None)), + (r!(r"(~)"), |_| (TokenType::Tilde, TokenValue::None)), + (r!(r"(#)"), |_| (TokenType::Hash, TokenValue::None)), + (r!(r"($)"), |_| (TokenType::Dollar, TokenValue::None)), + (r!(r"(\|)"), |_| (TokenType::Pipe, TokenValue::None)), (r!(r"(\+)"), |_| (TokenType::Plus, TokenValue::None)), (r!(r"(-)"), |_| (TokenType::Minus, TokenValue::None)), (r!(r"(/)"), |_| (TokenType::Slash, TokenValue::None)), @@ -103,7 +106,7 @@ impl<'a> Scanner<'a> { } } - pub fn parse(&self) -> (Vec, HashSet) { + pub fn parse(&self) -> PassResult> { let mut tokens = Vec::new(); let mut diagnostics = HashSet::new(); @@ -125,9 +128,16 @@ impl<'a> Scanner<'a> { capture.and_then(|c| Some((c.get(0)?, c.get(1)?))) { if let Some(start) = panic_start_at.take() { - diagnostics.insert(Diagnostic::error("Invalid character").with_error( - Error::primary(file, start, start - cursor, "Invalid character"), - )); + diagnostics.insert( + Diagnostic::error("L0001", "Invalid character").with_snippet( + Snippet::primary( + file, + start, + cursor - start, + "Invalid character", + ), + ), + ); } let value = matched.as_str(); @@ -139,49 +149,55 @@ impl<'a> Scanner<'a> { // Walk all the characters in the token. If the character is an \n, set indentation level to 0. // If its a \t increase indentation level by one + let mut indentation_changed = false; let mut indentation = 0; for c in capture.as_str().chars() { if c == '\n' { indentation = 0; + indentation_changed = true; } if c == '\t' { indentation += 1; + indentation_changed = true; } } - match indentation.cmp(&indentation_level) { - Ordering::Greater => { - tokens.push(Token::new( - file, - TokenType::IndentationOpen, - TokenValue::None, - cursor, - 1, - )); + if indentation_changed { + while indentation != indentation_level { + if indentation < indentation_level { + tokens.push(Token::new( + file, + TokenType::IndentationClose, + TokenValue::None, + cursor, + 1, + )); + indentation_level -= 1; + } else { + tokens.push(Token::new( + file, + TokenType::IndentationOpen, + TokenValue::None, + cursor, + 1, + )); + indentation_level += 1; + } } - Ordering::Less => { - tokens.push(Token::new( - file, - TokenType::IndentationClose, - TokenValue::None, - cursor, - 1, - )); - } - _ => {} - } - indentation_level = indentation; - cursor += capture.as_str().chars().count(); - break; + indentation_level = indentation; + cursor += capture.as_str().chars().count(); + break; + } } let length = capture.as_str().chars().count(); - let lexeme = Token::new(file, token_type, token_value, cursor, length); - tokens.push(lexeme); + if token_type != TokenType::Ignore { + tokens.push(Token::new(file, token_type, token_value, cursor, length)); + } cursor += length; was_matched = true; @@ -199,9 +215,11 @@ impl<'a> Scanner<'a> { } if let Some(start) = panic_start_at.take() { - diagnostics.insert(Diagnostic::error("Invalid characters").with_error( - Error::primary(file, cursor, cursor - start, "Invalid characters"), - )); + diagnostics.insert( + Diagnostic::error("L0002", "Invalid characters").with_snippet( + Snippet::primary(file, cursor, cursor - start, "Invalid characters"), + ), + ); } for _ in 0..indentation_level { @@ -223,7 +241,7 @@ impl<'a> Scanner<'a> { // )); } - (tokens, diagnostics) + PassResult::new(tokens, diagnostics) } } @@ -260,6 +278,32 @@ mod tests { test_scanner(" \t\n", vec![]); } + #[test] + fn parses_indentation2() { + test_scanner( + "struct test:\n\thello ~ string", + vec![ + (TokenType::Struct, TokenValue::None, 0, 6), + (TokenType::Enum, TokenValue::None, 7, 4), + (TokenType::Colon, TokenValue::None, 11, 1), + (TokenType::IndentationOpen, TokenValue::None, 12, 1), + ( + TokenType::Identifier, + TokenValue::Identifier("hello".to_string()), + 13, + 5, + ), + (TokenType::Tilde, TokenValue::None, 19, 1), + ( + TokenType::Identifier, + TokenValue::Identifier("string".to_string()), + 21, + 6, + ), + ], + ); + } + #[test] fn ignores_comments() { test_scanner("// this is a comment", vec![]); @@ -414,7 +458,7 @@ mod tests { files.insert("test file", input); let scanner = Scanner::new(&files); - let tokens = scanner.parse(); + let scan_pass = scanner.parse(); let expected = expected .into_iter() @@ -424,9 +468,9 @@ mod tests { .collect::>(); println!( - "Got: {}", - tokens - .0 + "Got:\n{}", + scan_pass + .result .iter() .map(|lexeme| format!( "{}@{}..{}", @@ -438,7 +482,7 @@ mod tests { .join("\n") ); println!( - "Expected: {}", + "Expected:\n{}", expected .iter() .map(|lexeme| format!( @@ -451,6 +495,6 @@ mod tests { .join("\n") ); - assert_eq!(tokens.0, expected); + assert_eq!(scan_pass.result, expected); } } diff --git a/src/transpiler/bend.rs b/src/transpiler/bend.rs index 5b8de2f..6f04849 100644 --- a/src/transpiler/bend.rs +++ b/src/transpiler/bend.rs @@ -30,6 +30,7 @@ fn transpile_expression(expression: &Expression) -> String { match expression { Expression::Number(number) => number.to_string(), Expression::String(string) => format!("\"{}\"", string), + Expression::Boolean(boolean) => boolean.to_string(), Expression::Identifier(symbol) => symbol.clone(), Expression::Binary(left, operation, right) => { let left = transpile_expression(left); diff --git a/test.som b/test.som index 61c41bf..40eb8ca 100644 --- a/test.som +++ b/test.som @@ -1 +1,4 @@ -enum color: green blue red yellow 12 + +struct test2 + cheese ~ [int] + cheese ~ string