Skip to content

Commit

Permalink
🚧 (parser): Work on earley parser
Browse files Browse the repository at this point in the history
semver: chore
  • Loading branch information
Somfic committed Jun 25, 2024
1 parent e34f27a commit 5bf1525
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 126 deletions.
8 changes: 4 additions & 4 deletions src/abstract_syntax_tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@ use crate::diagnostic::Range;

pub mod builder;

#[derive(Debug)]
#[derive(PartialEq, Clone, Debug)]
pub enum AstractSyntax<'a> {
Statement(Statement<'a>),
}

#[derive(Debug)]
#[derive(PartialEq, Clone, Debug)]
pub enum Statement<'a> {
EnumDeclaration(Spanned<'a, EnumDeclaration<'a>>),
}

#[derive(Debug)]
#[derive(PartialEq, Clone, Debug)]
pub struct EnumDeclaration<'a> {
pub identifier: Spanned<'a, &'a str>,
pub items: Vec<Spanned<'a, &'a str>>,
}

#[derive(Debug)]
#[derive(PartialEq, Clone, Debug)]
pub struct Spanned<'a, T> {
pub value: &'a T,
pub range: &'a Range<'a>,
Expand Down
68 changes: 26 additions & 42 deletions src/concrete_syntax_tree/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,31 @@ use std::collections::HashMap;
use crate::scanner::token::TokenType;

#[derive(Debug, Clone, PartialEq)]
pub enum Term {
pub enum Symbol {
Terminal(TokenType),
NonTerminal(NonTerminal),
OneOrMore(NonTerminal),
ZeroOrMore(NonTerminal),
Optional(NonTerminal),
}

impl std::fmt::Display for Term {
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Term::Terminal(token_type) => write!(f, "{}", token_type),
Term::NonTerminal(non_terminal) => write!(f, "{}", non_terminal),
Symbol::Terminal(token_type) => write!(f, "{}", token_type),
Symbol::NonTerminal(non_terminal) => write!(f, "{}", non_terminal),
Symbol::OneOrMore(non_terminal) => write!(f, "{}+", non_terminal),
Symbol::ZeroOrMore(non_terminal) => write!(f, "{}*", non_terminal),
Symbol::Optional(non_terminal) => write!(f, "{}?", non_terminal),
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum NonTerminal {
Start,
RootItems,
RootItem,
EnumDeclaration,
EnumItems,
EnumItem,
}

Expand All @@ -35,7 +39,7 @@ impl std::fmt::Display for NonTerminal {

#[derive(Debug)]
pub struct Grammar {
pub rules: HashMap<NonTerminal, Vec<Vec<Term>>>,
pub rules: HashMap<NonTerminal, Vec<Vec<Symbol>>>,
}

impl Default for Grammar {
Expand All @@ -44,69 +48,49 @@ impl Default for Grammar {
rules: HashMap::new(),
};

// start -> root_items
// start -> root_item+
grammar.add_rule(
NonTerminal::Start,
vec![Term::NonTerminal(NonTerminal::RootItems)],
);
// root_items -> root_item root_items | root_item
grammar.add_rules(
NonTerminal::RootItems,
vec![
vec![
Term::NonTerminal(NonTerminal::RootItems),
Term::NonTerminal(NonTerminal::RootItem),
],
vec![Term::NonTerminal(NonTerminal::RootItem)],
],
vec![Symbol::ZeroOrMore(NonTerminal::RootItem)],
);

// root_item -> enum_declaration
grammar.add_rule(
NonTerminal::RootItem,
vec![Term::NonTerminal(NonTerminal::EnumDeclaration)],
vec![Symbol::NonTerminal(NonTerminal::EnumDeclaration)],
);
// enum_declaration -> <enum> <identifier> <colon> <identifier>? <semicolon>
// enum_declaration -> <enum> <identifier> <colon> enum_item+ <semicolon>
grammar.add_rule(
NonTerminal::EnumDeclaration,
vec![
Term::Terminal(TokenType::Enum),
Term::Terminal(TokenType::Identifier),
Term::Terminal(TokenType::Colon),
Term::NonTerminal(NonTerminal::EnumItems),
Term::Terminal(TokenType::Semicolon),
],
);
// enum_items -> enum_item enum_items | enum_item
grammar.add_rules(
NonTerminal::EnumItems,
vec![
vec![
Term::NonTerminal(NonTerminal::EnumItem),
Term::NonTerminal(NonTerminal::EnumItems),
],
vec![Term::NonTerminal(NonTerminal::EnumItem)],
Symbol::Terminal(TokenType::Enum),
Symbol::Terminal(TokenType::Identifier),
Symbol::Terminal(TokenType::Colon),
Symbol::OneOrMore(NonTerminal::EnumItem),
Symbol::Terminal(TokenType::Semicolon),
],
);

// enum_item -> <identifier>
grammar.add_rule(
NonTerminal::EnumItem,
vec![Term::Terminal(TokenType::Identifier)],
vec![Symbol::Terminal(TokenType::Identifier)],
);

grammar
}
}

impl Grammar {
pub fn add_rule(&mut self, non_terminal: NonTerminal, rule: Vec<Term>) {
pub fn add_rule(&mut self, non_terminal: NonTerminal, rule: Vec<Symbol>) {
self.rules.entry(non_terminal).or_default().push(rule);
}

pub fn add_rules(&mut self, non_terminal: NonTerminal, rules: Vec<Vec<Term>>) {
pub fn add_rules(&mut self, non_terminal: NonTerminal, rules: Vec<Vec<Symbol>>) {
self.rules.entry(non_terminal).or_default().extend(rules);
}

pub fn get(&self, start: &NonTerminal) -> Option<&Vec<Vec<Term>>> {
pub fn get(&self, start: &NonTerminal) -> Option<&Vec<Vec<Symbol>>> {
self.rules.get(start)
}
}
140 changes: 60 additions & 80 deletions src/concrete_syntax_tree/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use grammar::{Grammar, NonTerminal, Term};
use grammar::{Grammar, NonTerminal, Symbol};

use crate::{
abstract_syntax_tree::AstractSyntax,
diagnostic::{Diagnostic, Error, Range},
scanner::token::Token,
};
Expand Down Expand Up @@ -30,10 +31,11 @@ impl<'a> Chart<'a> {
#[derive(Debug, Clone, PartialEq)]
pub struct EarleyItem<'a> {
pub head: NonTerminal,
pub body: Vec<Term>,
pub body: Vec<Symbol>,
pub dot: usize,
pub start: usize,
pub tree: Vec<ConcreteSyntax<'a>>,
ast_node: Option<AstractSyntax<'a>>,
}

#[derive(Debug, Clone, PartialEq)]
Expand Down Expand Up @@ -93,21 +95,22 @@ impl<'a> std::fmt::Display for EarleyItem<'a> {
}

impl<'a> EarleyItem<'a> {
pub fn new(head: NonTerminal, body: Vec<Term>, dot: usize, start: usize) -> Self {
pub fn new(head: NonTerminal, body: Vec<Symbol>, dot: usize, start: usize) -> Self {
Self {
head,
body,
dot,
start,
tree: Vec::new(),
ast_node: None,
}
}

pub fn is_finished(&self) -> bool {
self.dot >= self.body.len()
}

pub fn next(&self) -> Option<&Term> {
pub fn next(&self) -> Option<&Symbol> {
self.body.get(self.dot)
}
}
Expand All @@ -116,14 +119,14 @@ impl<'a> EarleyItem<'a> {
pub struct EarleyParser<'a> {
grammar: Grammar,
chart: Chart<'a>,
diagnostics: Vec<Diagnostic<'a>>,
}

impl<'a> EarleyParser<'a> {
/// Parses the given input tokens according to the grammar.
/// Returns true if the input is accepted by the grammar, otherwise false.
pub fn parse(mut self, tokens: &'a [Token]) -> Result<ConcreteSyntax<'a>, Vec<Diagnostic<'a>>> {
let mut diagnostics = Vec::new();

self.diagnostics = Vec::new();
self.chart = Chart::new(tokens.len());

// Initial state
Expand All @@ -138,55 +141,31 @@ impl<'a> EarleyParser<'a> {
let mut j = 0;

if self.chart.states[i].is_empty() {
let expected_symbols: Vec<String> = self
.chart
.states
.get(i - 1)
.map(|state| {
state
.iter()
.filter_map(|item| item.next())
.filter_map(|term| match term {
Term::Terminal(token_type) => Some(token_type.to_string()),
_ => None,
})
.collect::<HashSet<String>>()
})
.iter()
.flat_map(|set| set.clone())
.collect::<Vec<_>>();

let token = tokens.get(i).unwrap_or(tokens.last().unwrap());

if !expected_symbols.is_empty() {
diagnostics.push(
Diagnostic::error("Syntax error").with_error(
Error::primary(
token.range.file_id,
i - 1,
0,
format!("Expected {}", expected_symbols.join(" or ")),
)
.transform_range(tokens),
),
);
}

self.add_diagnostic(tokens, i - 1);
// TODO: enter panic mode
}

while j < self.chart.states[i].len() {
let item = self.chart.states[i][j].clone();
if let Some(next_symbol) = item.next() {
match next_symbol {
Term::NonTerminal(non_terminal) => {
Symbol::NonTerminal(non_terminal) => {
self.predict(i, non_terminal);
}
Term::Terminal(token_type) => {
Symbol::Terminal(token_type) => {
if i < tokens.len() && token_type == &token.unwrap().token_type {
self.scan(i, &item, token.unwrap());
}
}
Symbol::OneOrMore(non_terminal) => {
self.handle_one_or_more(i, &item, non_terminal);
}
Symbol::ZeroOrMore(non_terminal) => {
self.handle_zero_or_more(i, &item, non_terminal);
}
Symbol::Optional(non_terminal) => {
self.handle_optional(i, &item, non_terminal);
}
}
} else {
self.complete(i, &item);
Expand All @@ -205,42 +184,8 @@ impl<'a> EarleyParser<'a> {
item.tree.clone(),
))
} else {
// Expected more input ...
let expected_symbols: Vec<String> = self
.chart
.states
.get(tokens.len())
.map(|state| {
state
.iter()
.filter_map(|item| item.next())
.filter_map(|term| match term {
Term::Terminal(token_type) => Some(token_type.to_string()),
_ => None,
})
.collect::<HashSet<String>>()
})
.iter()
.flat_map(|set| set.clone())
.collect::<Vec<_>>();

let token = tokens.last().unwrap();

if !expected_symbols.is_empty() {
diagnostics.push(
Diagnostic::error("Syntax error").with_error(
Error::primary(
token.range.file_id,
tokens.len(),
0,
format!("Expected {}", expected_symbols.join(" or ")),
)
.transform_range(tokens),
),
);
}

Err(diagnostics)
self.add_diagnostic(tokens, tokens.len());
Err(self.diagnostics)
}
}

Expand Down Expand Up @@ -281,7 +226,7 @@ impl<'a> EarleyParser<'a> {
fn complete(&mut self, position: usize, item: &EarleyItem<'a>) {
let start_state_set = self.chart.states[item.start].clone();
for state in start_state_set {
if let Some(Term::NonTerminal(non_terminal)) = state.next() {
if let Some(Symbol::NonTerminal(non_terminal)) = state.next() {
if non_terminal == &item.head {
let mut next_item = EarleyItem::new(
state.head.clone(),
Expand All @@ -301,5 +246,40 @@ impl<'a> EarleyParser<'a> {
}
}
}

fn add_diagnostic(&mut self, tokens: &'a [Token], index: usize) {
let expected_symbols: Vec<String> = self
.chart
.states
.get(index)
.map(|state| {
state
.iter()
.filter_map(|item| item.next())
.filter_map(|term| match term {
Symbol::Terminal(token_type) => Some(token_type.to_string()),
_ => None,
})
.collect::<HashSet<String>>()
})
.iter()
.flat_map(|set| set.clone())
.collect::<Vec<_>>();

let token = tokens.get(index).unwrap_or(tokens.last().unwrap());

if !expected_symbols.is_empty() {
self.diagnostics.push(
Diagnostic::error("Syntax error").with_error(
Error::primary(
token.range.file_id,
index,
0,
format!("Expected {}", expected_symbols.join(" or ")),
)
.transform_range(tokens),
),
);
}
}
}
q
2 changes: 2 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ fn main() -> Result<()> {
"main",
"
enum colors: green blue red
enum colors: green blue red;
",
);

Expand Down

0 comments on commit 5bf1525

Please sign in to comment.