From 05a05c0bd404a5ff0dfac0455e0dab977767fe86 Mon Sep 17 00:00:00 2001 From: Lucas de Jong Date: Wed, 30 Oct 2024 16:17:20 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20WIP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit semver: chore --- src/lexer/mod.rs | 3 + src/lexer/token.rs | 7 +- src/main.rs | 40 ++++--- src/parser/ast.rs | 17 +++ src/parser/lookup.rs | 4 + src/parser/mod.rs | 2 +- src/parser/statement.rs | 260 ++++++++++++++++++++++++++++++++++++++-- 7 files changed, 308 insertions(+), 25 deletions(-) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 5e54767..464f5c1 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -176,6 +176,9 @@ impl<'de> Iterator for Lexer<'de> { "true" => Ok((TokenKind::Boolean, TokenValue::Boolean(true))), "false" => Ok((TokenKind::Boolean, TokenValue::Boolean(false))), "let" => Ok((TokenKind::Let, TokenValue::None)), + "struct" => Ok((TokenKind::Struct, TokenValue::None)), + "enum" => Ok((TokenKind::Enum, TokenValue::None)), + "trait" => Ok((TokenKind::Trait, TokenValue::None)), ident => Ok(( TokenKind::Identifier, TokenValue::Identifier(ident.to_string().into()), diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 0140e66..138ad70 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -163,6 +163,8 @@ pub enum TokenKind { Struct, /// A enum keyword; `enum`. Enum, + /// A trait keyword; `trait`. + Trait, } impl Display for TokenKind { @@ -216,8 +218,9 @@ impl Display for TokenKind { TokenKind::Question => write!(f, "`?`"), TokenKind::Pipe => write!(f, "`|`"), TokenKind::Caret => write!(f, "`^`"), - TokenKind::And => todo!("`&&`"), - TokenKind::Or => todo!("`||`"), + TokenKind::And => write!(f, "`&&`"), + TokenKind::Or => write!(f, "`||`"), + TokenKind::Trait => write!(f, "`trait`"), } } } diff --git a/src/main.rs b/src/main.rs index 467748d..08796de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,23 @@ pub mod lexer; pub mod parser; fn main() { - let input = "let value = a == { let b = 1 if true else 2 };"; + let input = " + let x = {1}; + let a = \"hello\"; + let b = 123; + let b = b if b <= 100 else 100; + + struct Human: name, age; + enum Color: red, green, blue, orange; + + fn add(a, b) { a + b } + fn sub(a, b) { a - b } + fn mul(a, b) { a * b } + fn div(a, b) { a / b } + + trait Add: + fn add(a, b); + "; miette::set_hook(Box::new(|_| { Box::new( @@ -56,32 +72,26 @@ impl miette::highlighters::HighlighterState for SomHighlighterState { let style: Style = match &token { Ok(token) => match &token.kind { // Comment / quote -> 92, 99, 112 + italic - TokenKind::If | TokenKind::Else | TokenKind::Let => { - Style::new().fg_rgb::<197, 120, 221>() - } + TokenKind::If + | TokenKind::Else + | TokenKind::Let + | TokenKind::Struct + | TokenKind::Enum + | TokenKind::Function | TokenKind::Trait => Style::new().fg_rgb::<197, 120, 221>(), TokenKind::Identifier => Style::new().fg_rgb::<224, 108, 117>(), TokenKind::String => Style::new().fg_rgb::<152, 195, 121>().italic(), TokenKind::Integer | TokenKind::Decimal => { Style::new().fg_rgb::<209, 154, 102>() } TokenKind::Boolean => Style::new().fg_rgb::<86, 156, 214>(), - TokenKind::CurlyOpen - | TokenKind::CurlyClose - | TokenKind::ParenOpen - | TokenKind::ParenClose - | TokenKind::SquareOpen - | TokenKind::SquareClose - | TokenKind::Equal + + TokenKind::Equal | TokenKind::LessThan | TokenKind::GreaterThan | TokenKind::LessThanOrEqual | TokenKind::GreaterThanOrEqual | TokenKind::Equality | TokenKind::Inequality - | TokenKind::Plus - | TokenKind::Minus - | TokenKind::Star - | TokenKind::Slash | TokenKind::Percent | TokenKind::Not | TokenKind::And diff --git a/src/parser/ast.rs b/src/parser/ast.rs index c7b85f4..13cd0ac 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -11,6 +11,23 @@ pub enum Statement<'de> { Block(Vec>), Expression(Expression<'de>), Assignment(Cow<'de, str>, Expression<'de>), + Struct { + name: Cow<'de, str>, + fields: Vec>, + }, + Enum { + name: Cow<'de, str>, + variants: Vec>, + }, + Function { + name: Cow<'de, str>, + parameters: Vec>, + body: Expression<'de>, + }, + Trait { + name: Cow<'de, str>, + functions: Vec<(Cow<'de, str>, Vec>)>, + }, } #[derive(Debug)] diff --git a/src/parser/lookup.rs b/src/parser/lookup.rs index b7a196a..608696f 100644 --- a/src/parser/lookup.rs +++ b/src/parser/lookup.rs @@ -186,6 +186,10 @@ impl<'de> Default for Lookup<'de> { expression::binary::or, ) .add_statement_handler(TokenKind::Let, statement::let_) + .add_statement_handler(TokenKind::Struct, statement::struct_) + .add_statement_handler(TokenKind::Enum, statement::enum_) + .add_statement_handler(TokenKind::Function, statement::function_) + .add_statement_handler(TokenKind::Trait, statement::trait_) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fd93bb8..f3bff49 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -28,7 +28,7 @@ impl<'de> Parser<'de> { let mut statements = vec![]; while self.lexer.peek().is_some() { - statements.push(statement::parse(self, false).wrap_err("while parsing main block")?); + statements.push(statement::parse(self, false)?); } Ok(Symbol::Statement(Statement::Block(statements))) diff --git a/src/parser/statement.rs b/src/parser/statement.rs index 86d64b4..b362e1a 100644 --- a/src/parser/statement.rs +++ b/src/parser/statement.rs @@ -17,16 +17,26 @@ pub fn parse<'de>(parser: &mut Parser<'de>, optional_semicolon: bool) -> Result< let statement_handler = parser.lookup.statement_lookup.get(&token.kind); + let token_kind = &token.clone(); + let statement = match statement_handler { Some(handler) => handler(parser)?, - None => Statement::Expression(expression::parse(parser, BindingPower::None)?), - }; + None => { + let expression = expression::parse(parser, BindingPower::None) + .wrap_err("while parsing a statement")?; + + if !optional_semicolon { + parser + .lexer + .expect( + TokenKind::Semicolon, + "expected a semicolon at the end of an expression", + ) + .wrap_err(format!("while parsing for {}", token_kind))?; + } - if !optional_semicolon { - parser.lexer.expect( - TokenKind::Semicolon, - "expected a semicolon at the end of an expression", - )?; + Statement::Expression(expression) + } }; Ok(statement) @@ -48,5 +58,241 @@ pub fn let_<'de>(parser: &mut Parser<'de>) -> Result> { .expect(TokenKind::Equal, "expected an equal sign")?; let expression = expression::parse(parser, BindingPower::None)?; + parser + .lexer + .expect(TokenKind::Semicolon, "expected a semicolon")?; + Ok(Statement::Assignment(identifier, expression)) } + +pub fn struct_<'de>(parser: &mut Parser<'de>) -> Result> { + parser + .lexer + .expect(TokenKind::Struct, "expected a struct keyword")?; + + let identifier = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let identifier = match identifier.value { + TokenValue::Identifier(identifier) => identifier, + _ => unreachable!(), + }; + + parser.lexer.expect(TokenKind::Colon, "expected a colon")?; + + let mut fields = vec![]; + + while parser.lexer.peek().map_or(false, |token| { + token + .as_ref() + .map_or(false, |token| token.kind != TokenKind::Semicolon) + }) { + if !fields.is_empty() { + parser.lexer.expect(TokenKind::Comma, "expected a comma")?; + } + + let field = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let field = match field.value { + TokenValue::Identifier(field) => field, + _ => unreachable!(), + }; + + fields.push(field); + } + + parser + .lexer + .expect(TokenKind::Semicolon, "expected a semicolon")?; + + Ok(Statement::Struct { + name: identifier, + fields, + }) +} + +pub fn enum_<'de>(parser: &mut Parser<'de>) -> Result> { + parser + .lexer + .expect(TokenKind::Enum, "expected an enum keyword")?; + + let identifier = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let identifier = match identifier.value { + TokenValue::Identifier(identifier) => identifier, + _ => unreachable!(), + }; + + parser.lexer.expect(TokenKind::Colon, "expected a colon")?; + + let mut variants = vec![]; + + while parser.lexer.peek().map_or(false, |token| { + token + .as_ref() + .map_or(false, |token| token.kind != TokenKind::Semicolon) + }) { + if !variants.is_empty() { + parser.lexer.expect(TokenKind::Comma, "expected a comma")?; + } + + let variant = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let variant = match variant.value { + TokenValue::Identifier(variant) => variant, + _ => unreachable!(), + }; + + variants.push(variant); + } + + parser + .lexer + .expect(TokenKind::Semicolon, "expected a semicolon")?; + + Ok(Statement::Enum { + name: identifier, + variants, + }) +} + +pub fn function_<'de>(parser: &mut Parser<'de>) -> Result> { + parser + .lexer + .expect(TokenKind::Function, "expected a fn keyword")?; + + let identifier = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let identifier = match identifier.value { + TokenValue::Identifier(identifier) => identifier, + _ => unreachable!(), + }; + + parser + .lexer + .expect(TokenKind::ParenOpen, "expected an open parenthesis")?; + + let mut parameters = vec![]; + + while parser.lexer.peek().map_or(false, |token| { + token + .as_ref() + .map_or(false, |token| token.kind != TokenKind::ParenClose) + }) { + if !parameters.is_empty() { + parser.lexer.expect(TokenKind::Comma, "expected a comma")?; + } + + let parameter = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let parameter = match parameter.value { + TokenValue::Identifier(parameter) => parameter, + _ => unreachable!(), + }; + + parameters.push(parameter); + } + + parser + .lexer + .expect(TokenKind::ParenClose, "expected a close parenthesis")?; + + let body = expression::parse(parser, BindingPower::None)?; + + Ok(Statement::Function { + name: identifier, + parameters, + body, + }) +} + +pub fn trait_<'de>(parser: &mut Parser<'de>) -> Result> { + parser + .lexer + .expect(TokenKind::Trait, "expected a trait keyword")?; + + let identifier = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let identifier = match identifier.value { + TokenValue::Identifier(identifier) => identifier, + _ => unreachable!(), + }; + + parser.lexer.expect(TokenKind::Colon, "expected a colon")?; + + let mut functions = vec![]; + + while parser.lexer.peek().map_or(false, |token| { + token + .as_ref() + .map_or(false, |token| token.kind != TokenKind::Semicolon) + }) { + if !functions.is_empty() { + parser.lexer.expect(TokenKind::Comma, "expected a comma")?; + } + + parser + .lexer + .expect(TokenKind::Function, "expected a fn keyword")?; + + let function = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let function = match function.value { + TokenValue::Identifier(function) => function, + _ => unreachable!(), + }; + + parser + .lexer + .expect(TokenKind::ParenOpen, "expected an open parenthesis")?; + + let mut parameters = vec![]; + + while parser.lexer.peek().map_or(false, |token| { + token + .as_ref() + .map_or(false, |token| token.kind != TokenKind::ParenClose) + }) { + if !parameters.is_empty() { + parser.lexer.expect(TokenKind::Comma, "expected a comma")?; + } + + let parameter = parser + .lexer + .expect(TokenKind::Identifier, "expected an identifier")?; + + let parameter = match parameter.value { + TokenValue::Identifier(parameter) => parameter, + _ => unreachable!(), + }; + + parameters.push(parameter); + } + + functions.push((function, parameters)); + } + + parser + .lexer + .expect(TokenKind::Semicolon, "expected a semicolon")?; + + Ok(Statement::Trait { + name: identifier, + functions, + }) +}