From f5956f7adf76712d553dc4f9f60a6abeba3a5a14 Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Fri, 15 Nov 2024 12:13:09 -0500 Subject: [PATCH] [fea-rs] Make parse_string return ParseTree This gives us a simple way to generate an AST, which is potentially useful for things like testing. --- fea-lsp/src/document.rs | 10 ++++++--- fea-rs/benches/parsing.rs | 2 +- fea-rs/src/bin/highlight.rs | 4 ++-- fea-rs/src/diagnostic.rs | 5 +++++ fea-rs/src/parse.rs | 38 ++++++++++++++++++++++++++------- fea-rs/src/token_tree.rs | 8 +++++-- fea-rs/src/token_tree/cursor.rs | 12 +++++------ 7 files changed, 57 insertions(+), 22 deletions(-) diff --git a/fea-lsp/src/document.rs b/fea-lsp/src/document.rs index 2e18252c2..57aeda97b 100644 --- a/fea-lsp/src/document.rs +++ b/fea-lsp/src/document.rs @@ -164,9 +164,13 @@ fn compute_offsets(text: &str) -> Vec { } fn parse(text: &str) -> (Vec<(Kind, Range)>, Vec) { - let (root, errors) = fea_rs::parse::parse_string(text); - let result = root.iter_tokens().map(|t| (t.kind, t.range())).collect(); - (result, errors) + let (ast, errors) = fea_rs::parse::parse_string(text); + let result = ast + .root() + .iter_tokens() + .map(|t| (t.kind, t.range())) + .collect(); + (result, errors.diagnostics().to_vec()) } pub static STYLES: &[SemanticTokenType] = &[ diff --git a/fea-rs/benches/parsing.rs b/fea-rs/benches/parsing.rs index b03815828..72d60dc97 100644 --- a/fea-rs/benches/parsing.rs +++ b/fea-rs/benches/parsing.rs @@ -8,7 +8,7 @@ const DEVA: &str = include_str!("../test-data/real-files/plex_devanagari.fea"); const LATN: &str = include_str!("../test-data/real-files/roboto-regular.fea"); const ARAB: &str = include_str!("../test-data/real-files/tajawal-regular.fea"); -fn parse_source(source: Arc) -> fea_rs::Node { +fn parse_source(source: Arc) -> fea_rs::ParseTree { fea_rs::parse::parse_string(source).0 } diff --git a/fea-rs/src/bin/highlight.rs b/fea-rs/src/bin/highlight.rs index ad5d45b5b..876a82a99 100644 --- a/fea-rs/src/bin/highlight.rs +++ b/fea-rs/src/bin/highlight.rs @@ -5,11 +5,11 @@ use std::{env, ffi::OsStr, path::PathBuf}; fn main() { let args = Args::get_from_env_or_exit(); let raw_fea = std::fs::read_to_string(args.path).unwrap(); - let (node, _errors) = fea_rs::parse::parse_string(raw_fea); + let (ast, _errors) = fea_rs::parse::parse_string(raw_fea); let mut current_style = Style::new().fg(Colour::White); let mut needs_paint = String::new(); - for token in node.iter_tokens() { + for token in ast.root().iter_tokens() { let style = fea_rs::util::style_for_kind(token.kind); // if the style has changed, draw the previous range. if style != current_style { diff --git a/fea-rs/src/diagnostic.rs b/fea-rs/src/diagnostic.rs index 235f282b9..30335a444 100644 --- a/fea-rs/src/diagnostic.rs +++ b/fea-rs/src/diagnostic.rs @@ -156,6 +156,11 @@ impl DiagnosticSet { }) } + /// Return the underlying diagnostics, as a slice + pub fn diagnostics(&self) -> &[Diagnostic] { + &self.messages + } + /// Returns an opaque type that can pretty-print the diagnostics pub fn display(&self) -> impl std::fmt::Display + '_ { DiagnosticDisplayer(self) diff --git a/fea-rs/src/parse.rs b/fea-rs/src/parse.rs index ddc8a5dab..12ba3eeab 100644 --- a/fea-rs/src/parse.rs +++ b/fea-rs/src/parse.rs @@ -10,7 +10,11 @@ mod parser; mod source; mod tree; -use std::{ffi::OsString, path::PathBuf, sync::Arc}; +use std::{ + ffi::{OsStr, OsString}, + path::PathBuf, + sync::Arc, +}; pub use lexer::TokenSet; pub use source::{FileSystemResolver, SourceLoadError, SourceResolver}; @@ -20,7 +24,7 @@ pub(crate) use context::{IncludeStatement, ParseContext}; pub(crate) use parser::Parser; pub(crate) use source::{FileId, Source, SourceList, SourceMap}; -use crate::{Diagnostic, DiagnosticSet, GlyphMap, Node}; +use crate::{DiagnosticSet, GlyphMap}; /// Attempt to parse a feature file from disk, including its imports. /// @@ -76,13 +80,31 @@ pub fn parse_root( /// Convenience method to parse a block of FEA from memory. /// /// This is useful for things like testing or syntax highlighting of a single file, -/// but it cannot handle imports, or handle ambiguous glyph names. +/// but it cannot handle includes, or handle ambiguous glyph names. /// /// The input text can be any of `&str`, `String`, or `Arc`. -pub fn parse_string(text: impl Into>) -> (Node, Vec) { - let source = source::Source::new("", text.into()); - let (node, errs, _) = context::parse_src(&source, None); - (node, errs) +/// +/// # Panics +/// +/// Panics if the input contains any include statements. +pub fn parse_string(text: impl Into>) -> (ParseTree, DiagnosticSet) { + const SRC_NAME: &str = "parse::parse_string"; + let text = text.into(); + parse_root( + SRC_NAME.into(), + None, + Box::new(move |s: &OsStr| { + if s == SRC_NAME { + Ok(text.clone()) + } else { + Err(SourceLoadError::new( + s.to_os_string(), + "parse_string cannot handle imports", + )) + } + }), + ) + .unwrap() } /// Parse an arbitrary block of FEA text with a specific parsing function. @@ -90,7 +112,7 @@ pub fn parse_string(text: impl Into>) -> (Node, Vec) { /// This can be used to parse any part of the grammar, including elements that /// are not valid at the top level. #[cfg(test)] -pub(crate) fn parse_node(text: &str, parser_fn: impl FnOnce(&mut Parser)) -> Node { +pub(crate) fn parse_node(text: &str, parser_fn: impl FnOnce(&mut Parser)) -> crate::Node { let mut sink = crate::token_tree::AstSink::new(text, FileId::CURRENT_FILE, None); let mut parser = Parser::new(text, &mut sink); parser_fn(&mut parser); diff --git a/fea-rs/src/token_tree.rs b/fea-rs/src/token_tree.rs index 2dbd4662a..d96b3a3d3 100644 --- a/fea-rs/src/token_tree.rs +++ b/fea-rs/src/token_tree.rs @@ -631,8 +631,12 @@ mod tests { #[test] fn token_iter() { - let (root, _errs) = crate::parse::parse_string(SAMPLE_FEA); - let reconstruct = root.iter_tokens().map(Token::as_str).collect::(); + let (ast, _errs) = crate::parse::parse_string(SAMPLE_FEA); + let reconstruct = ast + .root() + .iter_tokens() + .map(Token::as_str) + .collect::(); crate::assert_eq_str!(SAMPLE_FEA, reconstruct); } } diff --git a/fea-rs/src/token_tree/cursor.rs b/fea-rs/src/token_tree/cursor.rs index f3253d867..c502c372b 100644 --- a/fea-rs/src/token_tree/cursor.rs +++ b/fea-rs/src/token_tree/cursor.rs @@ -197,10 +197,10 @@ mod tests { #[test] fn abs_positions() { - let (root, errs) = crate::parse::parse_string(SAMPLE_FEA); + let (ast, errs) = crate::parse::parse_string(SAMPLE_FEA); assert!(errs.is_empty()); let mut last_end = 0; - for token in root.iter_tokens() { + for token in ast.root().iter_tokens() { assert_eq!( token.range().start, last_end, @@ -214,8 +214,8 @@ mod tests { #[test] fn ascend_jump() { - let (root, _errs) = crate::parse::parse_string(SAMPLE_FEA); - let mut cursor = root.cursor(); + let (ast, _errs) = crate::parse::parse_string(SAMPLE_FEA); + let mut cursor = ast.root().cursor(); cursor.advance(); cursor.advance(); cursor.advance(); @@ -250,9 +250,9 @@ mod tests { #[test] fn advance() { - let (root, errs) = crate::parse::parse_string("feature kern { pos a b -20; }kern;"); + let (ast, errs) = crate::parse::parse_string("feature kern { pos a b -20; }kern;"); assert!(errs.is_empty()); - let mut cursor = root.cursor(); + let mut cursor = ast.root().cursor(); assert!( at_node(&cursor, Kind::FeatureNode), "{:?}",