From 6300754a1c5b8b2b617499f5bcda99114489550c Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 4 Oct 2024 09:54:47 +0200 Subject: [PATCH 01/14] Simplify `isRecursive` to check if a symbol/production is recursive --- .../main/rascal/lang/rascal/grammar/Util.rsc | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index 42476c2..a98880d 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -37,18 +37,21 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) { Checks if symbol `s` is recursive in grammar `g` } -bool isRecursive(Grammar g, Symbol s) { - set[Symbol] getChildren(Symbol s) - = {s | p <- lookup(g, s), /Symbol s := p.symbols}; +// TODO: Compute a map and memoize the results +bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {}) + = s in checking + ? true + : any(p <- lookup(g, delabel(s)), + /Symbol child := p.symbols, + isRecursive(g, child, checking = checking + s)); - bool check(set[Symbol] checking, Symbol s) - = s in checking - ? true - : any(child <- getChildren(s), check(checking + s, child)); - - return check({}, s); +@synopsis{ + Checks if production `p` is recursive in grammar `g` } +bool isRecursive(Grammar g, Production p) + = any(/Symbol s := p.symbols, isRecursive(g, s)); + @synopsis{ Representation of a pointer to a symbol in (the list of symbols of) a production. This is useful to distinguish between different occurrences of From ad9a60d7e2c330c3332acf3979870d6527edc51d Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 4 Oct 2024 09:57:09 +0200 Subject: [PATCH 02/14] Simplify interface (type `Predicate`) to query dependency graphs --- .../lang/rascal/grammar/analyze/Dependencies.rsc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc index ba1076d..eceeb29 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc @@ -81,14 +81,10 @@ alias Graph[&Node] = tuple[ rel[&Node, &Node] edges]; @synopsis { - Representation of predicates to select nodes in a graph based on their own - properties, their ancestors, and their descendants + Representation of predicates to select nodes in a graph } -alias Predicate[&Node] = bool( - &Node n, - set[&Node] ancestors /* of `n` in the graph */, - set[&Node] descendants /* of `n` in the graph */); +alias Predicate[&Node] = bool(&Node n); @synopsis{ Gets the nodes of graph `g` that satisfy predicate `p`, optionally including @@ -103,7 +99,7 @@ set[&Node] getNodes(Graph[&Node] g, Predicate[&Node] p, rel[&Node, &Node] ancestors = invert(descendants); // Select nodes - nodes = {n | n <- g.nodes, p(n, ancestors[n] ? {}, descendants[n] ? {})}; + nodes = {n | n <- g.nodes, p(n)}; nodes += ({} | it + (ancestors[n] ? {}) | getAncestors, n <- nodes); nodes += ({} | it + (descendants[n] ? {}) | getDescendants, n <- nodes); return nodes; From 350a0153b6917cecb2bfa277b3a360c162ab0e7c Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 4 Oct 2024 09:58:05 +0200 Subject: [PATCH 03/14] Add function to query ancestors of a node in a dependency graph --- .../rascal/grammar/analyze/Dependencies.rsc | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc index eceeb29..4d01999 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc @@ -16,6 +16,7 @@ import Grammar; import ParseTree; import Relation; import Set; +import util::Maybe; import lang::rascal::grammar::Util; @@ -49,7 +50,7 @@ Dependencies deps(Graph[Production] g) { = deps(removeNodes(g, getNodes(g, p, getAncestors = removeAncestors))); list[Production] getProds() = toList(g.nodes); - + return deps(retainProds, removeProds, getProds); } @@ -93,7 +94,7 @@ alias Predicate[&Node] = bool(&Node n); set[&Node] getNodes(Graph[&Node] g, Predicate[&Node] p, bool getAncestors = false, bool getDescendants = false) { - + // Compute ancestors/descendants of nodes rel[&Node, &Node] descendants = g.edges+; rel[&Node, &Node] ancestors = invert(descendants); @@ -117,4 +118,27 @@ Graph[&Node] retainNodes(Graph[&Node] g, set[&Node] nodes) } Graph[&Node] removeNodes(Graph[&Node] g, set[&Node] nodes) - = ; \ No newline at end of file + = ; + +@synopsis{ + Gets the closest ancestors that satisfy predicate `p` in each branch upward + from node `n` in graph `g`, optionally including `\default` when none of the + ancestors in a branch satisfy `p` +} + +set[&Node] getClosestAncestors( + Graph[&Node] g, Predicate[&Node] p, &Node n, + set[&Node] getting = {}, Maybe[&Node] \default = nothing()) { + + if (n in getting) { + return {}; + } else { + set[&Node] parents = invert(g.edges)[n]; + if ({} == parents && just(_) := \default) { + return {\default.val}; + } else { + set[&Node] recur(&Node parent) = getClosestAncestors(g, p, parent, getting = getting + n, \default = \default); + return {*(p(parent) ? {parent} : recur(parent)) | parent <- parents}; + } + } +} \ No newline at end of file From a6d8baed9ec3b5a94de4794eb117d53f14000990 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 4 Oct 2024 09:58:53 +0200 Subject: [PATCH 04/14] Fix issue to compute the length of labeled literals --- .../src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc index 164d401..fb94466 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc @@ -142,6 +142,8 @@ private default Maybe[int] max(Maybe[int] _, Maybe[int] _) = nothing(); Computes the length of a terminal symbol as a range } +Range length(label(_, symbol)) = length(symbol); + Range length(\lit(string)) = ; Range length(\cilit(string)) = ; Range length(\char-class(_)) = <1, just(1)>; From d93eb34e840bffe5dc351170be3c240a447d961b Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 4 Oct 2024 10:01:26 +0200 Subject: [PATCH 05/14] Add analysis to convert only productions with an *active* category --- .../main/rascal/lang/textmate/Conversion.rsc | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index 5e959b9..a81efe0 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -4,6 +4,8 @@ module lang::textmate::Conversion +import IO; + import Grammar; import ParseTree; import String; @@ -107,20 +109,26 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) { str jobLabel = "Analyzing)">"; jobStart(jobLabel, work = 4); - // Define auxiliary predicates - bool isCyclic(Production p, set[Production] ancestors, _) - = p in ancestors; - bool isNonEmpty(prod(def, _, _), _, _) - = !tryParse(rsc, delabel(def), ""); - bool hasCategory(prod(_, _, attributes), _, _) - = /\tag("category"(_)) := attributes; - // Analyze dependencies among productions jobStep(jobLabel, "Analyzing productions"); Graph[Production] graph = toGraph(rsc); - list[Production] prods = deps(graph).retainProds(isNonEmpty).retainProds(hasCategory).getProds(); - list[Production] prodsNonRecursive = prods & deps(graph).removeProds(isCyclic, true).getProds(); - list[Production] prodsRecursive = prods - prodsNonRecursive; + Production marker = prod(\empty(), [], {}); + + bool hasCategory(Production p) + = /\tag("category"(_)) := p; + bool hasActiveCategory(Production p) + = hasCategory(p) && marker in getClosestAncestors(graph, hasCategory, p, \default = just(marker)); + bool isNonEmpty(prod(def, _, _)) + = !tryParse(rsc, delabel(def), ""); + + list[Production] prods = deps(graph) + .retainProds(hasActiveCategory) + .retainProds(isNonEmpty) + .getProds(); + + // for (p <- prods) { + // println(""); + // } // Analyze delimiters jobStep(jobLabel, "Analyzing delimiters"); @@ -137,17 +145,18 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) { // Prepare units jobStep(jobLabel, "Preparing units"); - - bool isRecursive(Production p) - = p in prodsRecursive; bool isEmptyProd(prod(_, [\alt(alternatives)], _)) = alternatives == {}; - + set[ConversionUnit] units = {}; - units += {unit(rsc, p, isRecursive(p), hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods}; + units += {unit(rsc, p, isRecursive(rsc, p), hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods}; units += {unit(rsc, p, false, false, , ) | p <- prodsDelimiters + prodsKeywords, !isEmptyProd(p)}; list[ConversionUnit] ret = sort([*removeStrictPrefixes(units)]); + // for (u <- units) { + // println(": "); + // } + // Return jobEnd(jobLabel); return ret; From 4a209dd4692663d8be3e2066aeb04c18e547403b Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 8 Oct 2024 10:50:33 +0200 Subject: [PATCH 06/14] Add a new module to analyse categories --- .../rascal/grammar/analyze/Categories.rsc | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc new file mode 100644 index 0000000..b21edf3 --- /dev/null +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc @@ -0,0 +1,47 @@ +module lang::rascal::grammar::analyze::Categories + +import Grammar; +import ParseTree; + +import lang::rascal::grammar::Util; + +@synopsis{ + Special value to indicate that a production has no category +} + +public str NO_CATEGORY = ""; + +@synopsis{ + Gets a set of categories such that, for each category, there exists a string + with that category produced by production `p`, as part of a string produced + by a start production of grammar `g` +} + +set[str] getCategories(Grammar g, Production p) + = getCategoriesByProduction(g)[p]; + +@memo +private map[Production, set[str]] getCategoriesByProduction(Grammar g) { + map[Production, set[str]] ret = (p: {} | /p: prod(_, _, _) := g); + + void doGet(Production p, set[str] parentCategories) { + set[str] categories = {c | /\tag("category"(str c)) := p}; + + set[str] old = ret[p]; + set[str] new = _ <- categories ? categories : old + parentCategories; + ret[p] = new; + + // If the new categories of `p` are different from the old ones, then + // propagate these changes to the children of `p` + for (old != new, /Symbol s := p.symbols, child <- lookup(g, delabel(s))) { + doGet(child, new); + } + } + + // Propagate categories from the roots of the grammar + for (root: prod(\start(_), _, _) <- ret) { + doGet(root, {NO_CATEGORY}); + } + + return ret; +} \ No newline at end of file From 52297f115ebd7dce1e2ec54cf8e6ceceac3f04a8 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 8 Oct 2024 10:51:22 +0200 Subject: [PATCH 07/14] Add a new function to get the parents of a symbol in a grammar --- .../src/main/rascal/lang/rascal/grammar/Util.rsc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index a98880d..8a6c611 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -108,6 +108,17 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward()) return doFind({}, p, s); } +@synopsis{ + Lookdowns a list of productions for symbol `s` in grammar `g` +} + +// TODO: Rename this function because the current name makes little sense in +// isolation (it's supposed to be the opposite of `lookup`, but in that sense, +// the directions are illogical) + +set[Production] lookdown(Grammar g, Symbol s) + = {parent | /parent: prod(_, /Symbol _: s, _) := g}; + @synopsis{ Lookups a list of productions for symbol `s` in grammar `g`, replacing formal parameters with actual parameters when needed From eb9b78afd283e841de68c4b5ac5f2947a3c44a38 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 8 Oct 2024 10:52:02 +0200 Subject: [PATCH 08/14] Add the new analysis of categories to the converter --- .../main/rascal/lang/textmate/Conversion.rsc | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index 54edf2a..1e0d5c9 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -4,8 +4,6 @@ module lang::textmate::Conversion -import IO; - import Grammar; import ParseTree; import String; @@ -15,6 +13,7 @@ import util::Monitor; import lang::oniguruma::Conversion; import lang::oniguruma::RegExp; import lang::rascal::grammar::Util; +import lang::rascal::grammar::analyze::Categories; import lang::rascal::grammar::analyze::Delimiters; import lang::rascal::grammar::analyze::Dependencies; import lang::rascal::grammar::analyze::Newlines; @@ -169,28 +168,36 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc) list[ConversionUnit] analyze(RscGrammar rsc, str name) { str jobLabel = "Analyzing)">"; - jobStart(jobLabel, work = 4); + jobStart(jobLabel, work = 6); - // Analyze dependencies among productions + // Analyze productions jobStep(jobLabel, "Analyzing productions"); - Graph[Production] graph = toGraph(rsc); - Production marker = prod(\empty(), [], {}); - - bool hasCategory(Production p) - = /\tag("category"(_)) := p; - bool hasActiveCategory(Production p) - = hasCategory(p) && marker in getClosestAncestors(graph, hasCategory, p, \default = just(marker)); - bool isNonEmpty(prod(def, _, _)) - = !tryParse(rsc, delabel(def), ""); - - list[Production] prods = deps(graph) - .retainProds(hasActiveCategory) - .retainProds(isNonEmpty) - .getProds(); + list[Production] prods = [p | /p: prod(_, _, _) <- rsc]; + + // Analyze categories + jobStep(jobLabel, "Analyzing categories"); + prods = for (p <- prods) { + + // If `p` has 0 or >=2 categories, then ignore `p` (unclear which + // category should be used for highlighting) + set[str] categories = getCategories(rsc, p); + if ({_} !:= categories || {NO_CATEGORY} == categories) { + continue; + } - // for (p <- prods) { - // println(""); - // } + // If each parent of `p` has a category, then ignore `p` (the parents of + // `p` will be used for highlighting instead) + set[Production] parents = lookdown(rsc, delabel(p.def)); + if (!any(parent <- parents, NO_CATEGORY in getCategories(rsc, parent))) { + continue; + } + + append p; + } + + // Analyze emptiness + jobStep(jobLabel, "Analyzing emptiness"); + prods = [p | p <- prods, !tryParse(rsc, delabel(p.def), "")]; // Analyze delimiters jobStep(jobLabel, "Analyzing delimiters"); @@ -215,10 +222,6 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) { units += {unit(rsc, p, false, false, , ) | p <- prodsDelimiters + prodsKeywords, !isEmptyProd(p)}; list[ConversionUnit] ret = sort([*removeStrictPrefixes(units)]); - // for (u <- units) { - // println(": "); - // } - // Return jobEnd(jobLabel); return ret; From 67dff035e55a0c4c23af3cfb973388189c4271a1 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Wed, 9 Oct 2024 09:53:17 +0200 Subject: [PATCH 09/14] Update tests --- .../main/rascal/lang/textmate/conversiontests/Emoji.rsc | 7 ++++++- .../rascal/lang/textmate/conversiontests/RascalClass.rsc | 4 +++- .../rascal/lang/textmate/conversiontests/RascalComment.rsc | 4 +++- .../lang/textmate/conversiontests/RascalConcrete.rsc | 4 +++- .../lang/textmate/conversiontests/RascalStringConstant.rsc | 4 +++- .../lang/textmate/conversiontests/RascalStringLiteral.rsc | 4 +++- .../rascal/lang/textmate/conversiontests/RascalTag.rsc | 4 +++- 7 files changed, 24 insertions(+), 7 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc index cb8fc28..176cb97 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc @@ -8,6 +8,11 @@ import lang::textmate::Conversion; import lang::textmate::ConversionTests; import lang::textmate::ConversionUnit; +start syntax Start + = Unit + | Boolean + ; + lexical Unit = @category="constant.language" [🌊]; @@ -16,7 +21,7 @@ lexical Boolean | @category="constant.language" [🙁] ; -Grammar rsc = preprocess(grammar(#Boolean)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex("Boolean"),[lit("🙂")],{\tag("category"("constant.language"))}), false, false, , ), diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc index 438e3d5..51627d4 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc @@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit; // Based on `lang::rascal::\syntax::Rascal` +start syntax Start = Class; + syntax Class = simpleCharclass: "[" Range* ranges "]" | complement: "!" Class charClass @@ -37,7 +39,7 @@ lexical UnicodeEscape | ascii: "\\" [a] [0-7] [0-9A-Fa-f] ; -Grammar rsc = preprocess(grammar(#Class)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(")"),lit("("),lit("!"),lit("||"),lit("&&")})],{}), false, false, , ), diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc index 0b564ce..ff2f1d1 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc @@ -10,12 +10,14 @@ import lang::textmate::ConversionUnit; // Based on `lang::rascal::\syntax::Rascal` +start syntax Start = Comment; + lexical Comment = @category="Comment" "/*" (![*] | [*] !>> [/])* "*/" | @category="Comment" "//" ![\n]* !>> [\ \t\r \u00A0 \u1680 \u2000-\u200A \u202F \u205F \u3000] $ ; -Grammar rsc = preprocess(grammar(#Comment)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("comment"))}), false, false, , ), diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc index ee6df07..cbf925f 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc @@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit; // Based on `lang::rascal::\syntax::Rascal` +start syntax Start = Concrete; + lexical Concrete = typed: /* "(" LAYOUTLIST l1 Sym symbol LAYOUTLIST l2 ")" LAYOUTLIST l3 */ "`" ConcretePart* parts "`"; @@ -27,7 +29,7 @@ lexical ConcretePart syntax ConcreteHole = \one: "\<" /* Sym symbol Name name */ "\>"; -Grammar rsc = preprocess(grammar(#Concrete)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'")})],{}), false, false, , ), diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc index 3f94eca..2ccb630 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc @@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit; // Based on `lang::rascal::\syntax::Rascal` +start syntax Start = StringConstant; + lexical StringConstant = @category="Constant" "\"" StringCharacter* chars "\"" ; @@ -27,7 +29,7 @@ lexical UnicodeEscape | ascii: "\\" [a] [0-7] [0-9A-Fa-f] ; -Grammar rsc = preprocess(grammar(#StringConstant)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'"),lit("\\")})],{}), false, false, , ), diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc index ef27235..7e719bf 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc @@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit; // Based on `lang::rascal::\syntax::Rascal` +start syntax Start = StringLiteral; + syntax StringLiteral = template: PreStringChars pre StringTemplate template StringTail tail | interpolated: PreStringChars pre Expression expression StringTail tail @@ -66,7 +68,7 @@ syntax Expression | lessThan : Expression lhs "\<" !>> "-" Expression rhs | greaterThan : Expression lhs "\>" Expression rhs ); -Grammar rsc = preprocess(grammar(#StringLiteral)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("\n"),lit("\'"),lit("\<="),lit("}"),lit("\\"),lit("\>="),lit(";"),lit("{")})],{}), false, false, , ), diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc index 7cf92fc..372c8dc 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc @@ -11,6 +11,8 @@ import lang::textmate::ConversionUnit; // Based on `lang::rascal::\syntax::Rascal` +start syntax Start = Tag; + syntax Tag = @Folded @category="Comment" \default : "@" Name name TagString contents | @Folded @category="Comment" empty : "@" Name name @@ -37,7 +39,7 @@ lexical LAYOUT layout LAYOUTLIST = LAYOUT* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000] /* !>> "//" !>> "/*" */; -Grammar rsc = preprocess(grammar(#Tag)); +Grammar rsc = preprocess(grammar(#Start)); list[ConversionUnit] units = [ unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("="),lit("\\"),lit(";"),lit("{")})],{}), false, false, , ), From 0b35f6d9dfdc17d7aa396bb4a91f561f0be5ed44 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Wed, 9 Oct 2024 09:55:09 +0200 Subject: [PATCH 10/14] Add tests for nested categories --- .../conversiontests/NestedCategories.rsc | 99 ++++++++ .../conversiontests/NestedCategories.test | 211 ++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc create mode 100644 rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc new file mode 100644 index 0000000..d1229eb --- /dev/null +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.rsc @@ -0,0 +1,99 @@ +module lang::textmate::conversiontests::NestedCategories + +import Grammar; +import ParseTree; +import util::Maybe; + +import lang::textmate::Conversion; +import lang::textmate::ConversionConstants; +import lang::textmate::ConversionTests; +import lang::textmate::ConversionUnit; + +start syntax Start + = A01 | A02 | A03 | A04 | A05 | A06 | A07 | A08 | A09 | A10 | A11 | A12; + +lexical A01 = @category="a" B01 [\ ] C01; +lexical B01 = @category="b" D01 [\ ] "bar01"; +lexical C01 = @category="c" D01 [\ ] "baz01"; +lexical D01 = @category="d" "foo01"; + +lexical A02 = @category="a" B02 [\ ] C02; +lexical B02 = @category="b" D02 [\ ] "bar02"; +lexical C02 = @category="c" D02 [\ ] "baz02"; +lexical D02 = "foo02"; + +lexical A03 = @category="a" B03 [\ ] C03; +lexical B03 = @category="b" D03 [\ ] "bar03"; +lexical C03 = D03 [\ ] "baz03"; +lexical D03 = @category="d" "foo03"; + +lexical A04 = @category="a" B04 [\ ] C04; +lexical B04 = @category="b" D04 [\ ] "bar04"; +lexical C04 = D04 [\ ] "baz04"; +lexical D04 = "foo04"; + +lexical A05 = @category="a" B05 [\ ] C05; +lexical B05 = D05 [\ ] "bar05"; +lexical C05 = D05 [\ ] "baz05"; +lexical D05 = @category="d" "foo05"; + +lexical A06 = @category="a" B06 [\ ] C06; +lexical B06 = D06 [\ ] "bar06"; +lexical C06 = D06 [\ ] "baz06"; +lexical D06 = "foo06"; + +lexical A07 = B07 [\ ] C07; +lexical B07 = @category="b" D07 [\ ] "bar07"; +lexical C07 = @category="c" D07 [\ ] "baz07"; +lexical D07 = @category="d" "foo07"; + +lexical A08 = B08 [\ ] C08; +lexical B08 = @category="b" D08 [\ ] "bar08"; +lexical C08 = @category="c" D08 [\ ] "baz08"; +lexical D08 = "foo08"; + +lexical A09 = B09 [\ ] C09; +lexical B09 = @category="b" D09 [\ ] "bar09"; +lexical C09 = D09 [\ ] "baz09"; +lexical D09 = @category="d" "foo09"; // Design decision: D09 should be converted + // to a TextMate rule, because it's + // reachable via C09, which doesn't have a + // category + +lexical A10 = B10 [\ ] C10; +lexical B10 = @category="b" D10 [\ ] "bar10"; +lexical C10 = D10 [\ ] "baz10"; +lexical D10 = "foo10"; + +lexical A11 = B11 [\ ] C11; +lexical B11 = D11 [\ ] "bar11"; +lexical C11 = D11 [\ ] "baz11"; +lexical D11 = @category="d" "foo11"; + +lexical A12 = B12 [\ ] C12; +lexical B12 = D12 [\ ] "bar12"; +lexical C12 = D12 [\ ] "baz12"; +lexical D12 = "foo12"; + +Grammar rsc = preprocess(grammar(#Start)); + +list[ConversionUnit] units = [ + unit(rsc, prod(lex("C07"),[lex("D07"),lit(" "),lit("baz07")],{\tag("category"("c"))}), false, false, , ), + unit(rsc, prod(lex("C08"),[lex("D08"),lit(" "),lit("baz08")],{\tag("category"("c"))}), false, false, , ), + unit(rsc, prod(lex("A01"),[lex("B01"),lit(" "),lex("C01")],{\tag("category"("a"))}), false, false, , ), + unit(rsc, prod(lex("A02"),[lex("B02"),lit(" "),lex("C02")],{\tag("category"("a"))}), false, false, , ), + unit(rsc, prod(lex("A03"),[lex("B03"),lit(" "),lex("C03")],{\tag("category"("a"))}), false, false, , ), + unit(rsc, prod(lex("A04"),[lex("B04"),lit(" "),lex("C04")],{\tag("category"("a"))}), false, false, , ), + unit(rsc, prod(lex("A05"),[lex("B05"),lit(" "),lex("C05")],{\tag("category"("a"))}), false, false, , ), + unit(rsc, prod(lex("A06"),[lex("B06"),lit(" "),lex("C06")],{\tag("category"("a"))}), false, false, , ), + unit(rsc, prod(lex("B07"),[lex("D07"),lit(" "),lit("bar07")],{\tag("category"("b"))}), false, false, , ), + unit(rsc, prod(lex("B08"),[lex("D08"),lit(" "),lit("bar08")],{\tag("category"("b"))}), false, false, , ), + unit(rsc, prod(lex("B09"),[lex("D09"),lit(" "),lit("bar09")],{\tag("category"("b"))}), false, false, , ), + unit(rsc, prod(lex("B10"),[lex("D10"),lit(" "),lit("bar10")],{\tag("category"("b"))}), false, false, , ), + unit(rsc, prod(lex("D09"),[lit("foo09")],{\tag("category"("d"))}), false, false, , ), + unit(rsc, prod(lex("D11"),[lit("foo11")],{\tag("category"("d"))}), false, false, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("foo07"),lit("foo06"),lit("foo09"),lit("foo08"),lit("foo03"),lit("foo02"),lit("foo05"),lit("foo04"),lit("foo10"),lit("baz09"),lit("foo11"),lit("baz06"),lit("baz05"),lit("baz08"),lit("baz07"),lit("baz02"),lit("baz04"),lit("baz03"),lit("bar06"),lit("bar05"),lit("bar02"),lit("bar04"),lit("bar03"),lit("bar11"),lit("bar10"),lit("foo12"),lit("foo01"),lit("baz12"),lit("baz01"),lit("bar09"),lit("baz11"),lit("bar08"),lit("baz10"),lit("bar07"),lit("bar12"),lit("bar01")})],{\tag("category"("keyword.control"))}), false, false, , ) +]; + +test bool analyzeTest() = doAnalyzeTest(rsc, units, name = "NestedCategories"); +test bool transformTest() = doTransformTest(units, <15, 0, 0>, name = "NestedCategories"); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test new file mode 100644 index 0000000..500b498 --- /dev/null +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/NestedCategories.test @@ -0,0 +1,211 @@ +# SYNTAX TEST "NestedCategories" + + foo01 bar01 foo01 baz01 +# ^ a +# ^^^^^^ b +# ^^^^^^ c +# ^^^^^ ^^^^^ d + + foo01 bar01 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo01 baz01 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo01 +# ^^^^^ -d + + foo02 bar02 foo02 baz02 +# ^ a +# ^^^^^^^^^^^ b +# ^^^^^^^^^^^ c +# ^^^^^ ^^^^^ -d + + foo02 bar02 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo02 baz02 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo02 +# ^^^^^ -d + + foo03 bar03 foo03 baz03 +# ^ ^^^^^^ a +# ^^^^^^ b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ d + + foo03 bar03 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo03 baz03 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo03 +# ^^^^^ -d + + foo04 bar04 foo04 baz04 +# ^^^^^^^^^^^^ a +# ^^^^^^^^^^^ b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ -d + + foo04 bar04 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo04 baz04 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo04 +# ^^^^^ -d + + foo05 bar05 foo05 baz05 +# ^^^^^^^ ^^^^^^ a +# ^^^^^^^^^^^ -b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ d + + foo05 bar05 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo05 baz05 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo05 +# ^^^^^ -d + + foo06 bar06 foo06 baz06 +# ^^^^^^^^^^^^^^^^^^^^^^^ a +# ^^^^^^^^^^^ -b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ -d + + foo06 bar06 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo06 baz06 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo06 +# ^^^^^ -d + + foo07 bar07 foo07 baz07 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^ b +# ^^^^^^ c +# ^^^^^ ^^^^^ d + + foo07 bar07 +# ^^^^^^ b +# ^^^^^ d + + foo07 baz07 +# ^^^^^^ c +# ^^^^^ d + + foo07 +# ^^^^^ -d + + foo08 bar08 foo08 baz08 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^^^^^^ b +# ^^^^^^^^^^^ c +# ^^^^^ ^^^^^ -d + + foo08 bar08 +# ^^^^^^^^^^^ b +# ^^^^^ -d + + foo08 baz08 +# ^^^^^^^^^^^ c +# ^^^^^ -d + + foo08 +# ^^^^^ -d + + foo09 bar09 foo09 baz09 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^ b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ d + + foo09 bar09 +# ^^^^^^ b +# ^^^^^ d + + foo09 baz09 +# ^^^^^^^^^^^ -c +# ^^^^^ d + + foo09 +# ^^^^^ d + + foo10 bar10 foo10 baz10 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^^^^^^ b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ -d + + foo10 bar10 +# ^^^^^^^^^^^ b +# ^^^^^ -d + + foo10 baz10 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo10 +# ^^^^^ -d + + foo11 bar11 foo11 baz11 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^^^^^^ -b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ d + + foo11 bar11 +# ^^^^^^^^^^^ -b +# ^^^^^ d + + foo11 baz11 +# ^^^^^^^^^^^ -c +# ^^^^^ d + + foo11 +# ^^^^^ d + + foo12 bar12 foo12 baz12 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^^^^^^ -b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ -d + + foo11 bar11 foo11 baz11 +# ^^^^^^^^^^^^^^^^^^^^^^^ -a +# ^^^^^^^^^^^ -b +# ^^^^^^^^^^^ -c +# ^^^^^ ^^^^^ d + + foo12 bar12 +# ^^^^^^^^^^^ -b +# ^^^^^ -d + + foo12 baz12 +# ^^^^^^^^^^^ -c +# ^^^^^ -d + + foo12 +# ^^^^^ -d \ No newline at end of file From 562dc55a4cbc6b20d55c44cebd78253872f12f21 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Wed, 9 Oct 2024 13:15:22 +0200 Subject: [PATCH 11/14] Clean up obsolete code --- .../main/rascal/lang/rascal/grammar/Util.rsc | 1 - .../rascal/grammar/analyze/Dependencies.rsc | 40 +++++-------------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index 8a6c611..6ad80af 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -37,7 +37,6 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) { Checks if symbol `s` is recursive in grammar `g` } -// TODO: Compute a map and memoize the results bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {}) = s in checking ? true diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc index 4d01999..ba1076d 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Dependencies.rsc @@ -16,7 +16,6 @@ import Grammar; import ParseTree; import Relation; import Set; -import util::Maybe; import lang::rascal::grammar::Util; @@ -50,7 +49,7 @@ Dependencies deps(Graph[Production] g) { = deps(removeNodes(g, getNodes(g, p, getAncestors = removeAncestors))); list[Production] getProds() = toList(g.nodes); - + return deps(retainProds, removeProds, getProds); } @@ -82,10 +81,14 @@ alias Graph[&Node] = tuple[ rel[&Node, &Node] edges]; @synopsis { - Representation of predicates to select nodes in a graph + Representation of predicates to select nodes in a graph based on their own + properties, their ancestors, and their descendants } -alias Predicate[&Node] = bool(&Node n); +alias Predicate[&Node] = bool( + &Node n, + set[&Node] ancestors /* of `n` in the graph */, + set[&Node] descendants /* of `n` in the graph */); @synopsis{ Gets the nodes of graph `g` that satisfy predicate `p`, optionally including @@ -94,13 +97,13 @@ alias Predicate[&Node] = bool(&Node n); set[&Node] getNodes(Graph[&Node] g, Predicate[&Node] p, bool getAncestors = false, bool getDescendants = false) { - + // Compute ancestors/descendants of nodes rel[&Node, &Node] descendants = g.edges+; rel[&Node, &Node] ancestors = invert(descendants); // Select nodes - nodes = {n | n <- g.nodes, p(n)}; + nodes = {n | n <- g.nodes, p(n, ancestors[n] ? {}, descendants[n] ? {})}; nodes += ({} | it + (ancestors[n] ? {}) | getAncestors, n <- nodes); nodes += ({} | it + (descendants[n] ? {}) | getDescendants, n <- nodes); return nodes; @@ -118,27 +121,4 @@ Graph[&Node] retainNodes(Graph[&Node] g, set[&Node] nodes) } Graph[&Node] removeNodes(Graph[&Node] g, set[&Node] nodes) - = ; - -@synopsis{ - Gets the closest ancestors that satisfy predicate `p` in each branch upward - from node `n` in graph `g`, optionally including `\default` when none of the - ancestors in a branch satisfy `p` -} - -set[&Node] getClosestAncestors( - Graph[&Node] g, Predicate[&Node] p, &Node n, - set[&Node] getting = {}, Maybe[&Node] \default = nothing()) { - - if (n in getting) { - return {}; - } else { - set[&Node] parents = invert(g.edges)[n]; - if ({} == parents && just(_) := \default) { - return {\default.val}; - } else { - set[&Node] recur(&Node parent) = getClosestAncestors(g, p, parent, getting = getting + n, \default = \default); - return {*(p(parent) ? {parent} : recur(parent)) | parent <- parents}; - } - } -} \ No newline at end of file + = ; \ No newline at end of file From c1ededb00beb5fac88b745500d659c518172773f Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Wed, 9 Oct 2024 14:04:04 +0200 Subject: [PATCH 12/14] Rename function `lookup` to make its name more consistent with another function --- .../main/rascal/lang/oniguruma/Conversion.rsc | 12 ++++---- .../main/rascal/lang/rascal/grammar/Util.rsc | 28 ++++++++----------- .../rascal/grammar/analyze/Categories.rsc | 2 +- .../rascal/grammar/analyze/Delimiters.rsc | 10 +++---- .../lang/rascal/grammar/analyze/Newlines.rsc | 22 +++++++-------- .../lang/rascal/grammar/analyze/Symbols.rsc | 4 +-- .../main/rascal/lang/textmate/Conversion.rsc | 24 ++++++++-------- 7 files changed, 49 insertions(+), 53 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc index 98e0fdd..900068b 100644 --- a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc @@ -60,7 +60,7 @@ RegExp toRegExp(Grammar g, list[Symbol] symbols, set[Attr] attributes) { RegExp toRegExp(Grammar g, \label(_, symbol)) = toRegExp(g, symbol); RegExp toRegExp(Grammar g, \parameter(_, _)) { - throw "Presumably unreachable..."; } // Covered by `lookup` (which substitutes actuals for formals) + throw "Presumably unreachable..."; } // Covered by `prodsOf` (which substitutes actuals for formals) // `ParseTree`: Start RegExp toRegExp(Grammar g, \start(symbol)) @@ -68,7 +68,7 @@ RegExp toRegExp(Grammar g, \start(symbol)) // `ParseTree`: Non-terminals RegExp toRegExp(Grammar g, Symbol s) - = infix("|", [toRegExp(g, p) | p <- lookup(g, s)]) when isNonTerminalType(s); + = infix("|", [toRegExp(g, p) | p <- prodsOf(g, s)]) when isNonTerminalType(s); // `ParseTree`: Terminals RegExp toRegExp(Grammar _, \lit(string)) @@ -103,7 +103,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) { prefixConditions = [c | c <- conditions, isPrefixCondition(c)]; suffixConditions = [c | c <- conditions, isSuffixCondition(c)]; deleteConditions = [c | c <- conditions, isDeleteCondition(c)]; - + // Convert except conditions (depends on previous conversion) if (_ <- exceptConditions) { if (/\choice(symbol, alternatives) := g) { @@ -112,7 +112,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) { = \label(l, _) := def ? \except(l) notin exceptConditions : true; - + re = infix("|", toRegExps(g, {a | a <- alternatives, keep(a)})); } } @@ -130,7 +130,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) { // Convert delete conditions (depends on previous conversions) if (_ <- deleteConditions) { RegExp delete = infix("|", [toRegExp(g, s) | \delete(s) <- deleteConditions]); - + // TODO: Explain this complicated conversion... str string = "(?=(?\)(?\.*)$)(?!(?:)\\k\$)\\k\"; list[str] categories = ["", *re.categories, "", *delete.categories]; @@ -196,7 +196,7 @@ str encode(int char) = preEncoded[char] ? "\\x{}"; private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]}; private str toHex(int i) - = i < 16 + = i < 16 ? hex[i] : toHex(i / 16) + toHex(i % 16); diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index 6ad80af..ad09528 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -40,8 +40,8 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) { bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {}) = s in checking ? true - : any(p <- lookup(g, delabel(s)), - /Symbol child := p.symbols, + : any(p <- prodsOf(g, delabel(s)), + /Symbol child := p.symbols, isRecursive(g, child, checking = checking + s)); @synopsis{ @@ -72,7 +72,7 @@ alias Pointer = tuple[Production p, int index]; ``` lexical X = Y; - lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">"; + lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">"; lexical Z1 = "foo" "bar"; lexical Z2 = "baz"; ``` @@ -82,7 +82,7 @@ alias Pointer = tuple[Production p, int index]; - `` - `` - `` - + The list of pointers to `"qux"` is just empty. } @@ -94,7 +94,7 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward()) if (ith == needle) { return []; } - for (isNonTerminalType(ith), child <- lookup(g, ith)) { + for (isNonTerminalType(ith), child <- prodsOf(g, ith)) { if (list[Pointer] l: [_, *_] := doFind(doing + haystack, child, s)) { return [] + l; } @@ -108,30 +108,26 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward()) } @synopsis{ - Lookdowns a list of productions for symbol `s` in grammar `g` + Gets the list of productions that contain symbol `s` in grammar `g` } -// TODO: Rename this function because the current name makes little sense in -// isolation (it's supposed to be the opposite of `lookup`, but in that sense, -// the directions are illogical) - -set[Production] lookdown(Grammar g, Symbol s) +set[Production] prodsWith(Grammar g, Symbol s) = {parent | /parent: prod(_, /Symbol _: s, _) := g}; @synopsis{ - Lookups a list of productions for symbol `s` in grammar `g`, replacing + Gets the list of productions of symbol `s` in grammar `g`, replacing formal parameters with actual parameters when needed } -list[Production] lookup(Grammar g, s: \parameterized-sort(name, actual)) +list[Production] prodsOf(Grammar g, s: \parameterized-sort(name, actual)) = [subst(p, formal, actual) | /p: prod(\parameterized-sort(name, formal), _, _) := g.rules[s] ? []] + [subst(p, formal, actual) | /p: prod(label(_, \parameterized-sort(name, formal)), _, _) := g.rules[s] ? []]; -list[Production] lookup(Grammar g, s: \parameterized-lex(name, actual)) +list[Production] prodsOf(Grammar g, s: \parameterized-lex(name, actual)) = [subst(p, formal, actual) | /p: prod(\parameterized-lex(name, formal), _, _) := g.rules[s] ? []] + [subst(p, formal, actual) | /p: prod(label(_, \parameterized-lex(name, formal)), _, _) := g.rules[s] ? []]; -default list[Production] lookup(Grammar g, Symbol s) +default list[Production] prodsOf(Grammar g, Symbol s) = [p | /p: prod(s, _, _) := g.rules[s] ? []] + [p | /p: prod(label(_, s), _, _) := g.rules[s] ? []]; @@ -143,7 +139,7 @@ default list[Production] lookup(Grammar g, Symbol s) &T subst(&T t, list[Symbol] from, list[Symbol] to) = subst(t, toMapUnique(zip2(from, to))) when size(from) == size(to); - + private &T subst(&T t, map[Symbol, Symbol] m) = visit (t) { case Symbol s => m[s] when s in m }; diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc index b21edf3..9d42b7a 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc @@ -33,7 +33,7 @@ private map[Production, set[str]] getCategoriesByProduction(Grammar g) { // If the new categories of `p` are different from the old ones, then // propagate these changes to the children of `p` - for (old != new, /Symbol s := p.symbols, child <- lookup(g, delabel(s))) { + for (old != new, /Symbol s := p.symbols, child <- prodsOf(g, delabel(s))) { doGet(child, new); } } diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc index 1b15067..d7b40f9 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc @@ -49,7 +49,7 @@ DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = fal ``` lexical X = Y; lexical Y = Y1 | Y2; - lexical Y1 = "[" Z "]"; + lexical Y1 = "[" Z "]"; lexical Y2 = "[" Z ")" [a-z]; lexical Z = [a-z]; ``` @@ -83,7 +83,7 @@ private map[Symbol, Maybe[Symbol]] getInnerDelimiterBySymbol(Grammar g, Directio @memo private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, Direction direction, bool getOnlyFirst = false) { map[Production, Maybe[Symbol]] ret = (p: nothing() | /p: prod(_, _, _) := g); - + solve (ret) { for (p <- ret, ret[p] == nothing()) { for (s <- reorder(p.symbols, direction)) { @@ -108,7 +108,7 @@ private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, } private set[Production] getChildren(Grammar g, Symbol s) - = {*lookup(g, s)}; + = {*prodsOf(g, s)}; @synopsis{ Gets the unique rightmost delimiter (`begin`) and the unique leftmost @@ -122,7 +122,7 @@ private set[Production] getChildren(Grammar g, Symbol s) ``` lexical X = Y; lexical Y = Y1 | Y2; - lexical Y1 = "[" Z "]"; + lexical Y1 = "[" Z "]"; lexical Y2 = "[" Z ")" [a-z]; lexical Z = [a-z]; ``` @@ -166,7 +166,7 @@ private map[Symbol, Maybe[Symbol]] getOuterDelimiterBySymbol(Grammar g, Directio ret[s] = unique(delimiters); } } - + return ret; } diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc index b4e0d0b..5d3ffc8 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc @@ -55,7 +55,7 @@ private map[Production, Maybe[set[Segment]]] getSegmentsByProduction(Grammar g) } private Maybe[set[Segment]] getSegmentsWithEnvironment( - Grammar g, list[Symbol] symbols, + Grammar g, list[Symbol] symbols, map[Production, Maybe[set[Segment]]] env) { // General idea: Recursively traverse `symbols` from left to right, while @@ -73,9 +73,9 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment( set[Symbol] nested = {s | /Symbol s := head}; Maybe[set[Segment]] finished = get(running, [], final = tail == []); - + // If the head contains a non-terminal, then: (1) finish the running - // segment; (2) lookup the segments of the non-terminals in the + // segment; (2) look up the segments of the non-terminals in the // environment, if any; (3) compute the segments of the tail. Return the // union of 1-3. if (any(s <- nested, isNonTerminalType(s))) { @@ -85,7 +85,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment( sets += finished; // (2) - sets += for (s <- nested, isNonTerminalType(s), p <- lookup(g, s)) { + sets += for (s <- nested, isNonTerminalType(s), p <- prodsOf(g, s)) { bool isInitial(Segment seg) = seg.initial && running.initial && running.symbols == []; @@ -93,7 +93,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment( = seg.final && tail == []; Segment update(Segment seg) = seg[initial = isInitial(seg)][final = isFinal(seg)]; - + append just(segs) := env[p] ? just({update(seg) | seg <- segs}) : nothing(); } @@ -103,7 +103,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment( // Return union return (sets[0] | union(it, \set) | \set <- sets[1..]); } - + // If the head doesn't contain a non-terminal, but it has a newline, // then: (1) finish the running segment; (2) compute the segments of the // tail. Return the union of 1-2. Note: the head, as it has a newline, @@ -111,13 +111,13 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment( else if (any(s <- nested, hasNewline(g, s))) { return union(finished, get(segment([]), tail)); } - + // If the head doesn't contain a non-terminal, and if it doesn't have a // newline, then add the head to the running segment and proceed with // the tail. else { Segment old = running; - Segment new = old[symbols = old.symbols + head]; + Segment new = old[symbols = old.symbols + head]; return get(new, tail); } } @@ -130,7 +130,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment( } bool hasNewline(Grammar g, Symbol s) { - return any(p <- lookup(g, delabel(s)), hasNewline(g, p)); + return any(p <- prodsOf(g, delabel(s)), hasNewline(g, p)); } @synopsis{ @@ -149,7 +149,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) { for (p <- ret, !ret[p]) { set[Symbol] nonTerminals = {s | /Symbol s := p.symbols, isNonTerminalType(s)}; ret[p] = ret[p] || any(/r: range(_, _) := p.symbols, hasNewline(r)) - || any(s <- nonTerminals, Production child <- lookup(g, s), ret[child]); + || any(s <- nonTerminals, Production child <- prodsOf(g, s), ret[child]); } } @@ -165,7 +165,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) { bool hasNewline(str s) = LF in chars(s); - + bool hasNewline(range(begin, end)) = begin <= LF && LF <= end; diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc index fb94466..b05aa39 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc @@ -56,7 +56,7 @@ private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) pr for (s <- ret, nothing() == ret[s]) { if (predicate(s)) { ret[s] = just({s}); - } else if (list[Production] prods: [_, *_] := lookup(g, s)) { + } else if (list[Production] prods: [_, *_] := prodsOf(g, s)) { ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods); } else { ret[s] = just({\empty()}); @@ -84,7 +84,7 @@ set[Symbol] follow(Grammar g, Symbol s) @memo private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) { map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals - + Maybe[set[Symbol]] followOf(Symbol parent, []) = ret[delabel(parent)]; Maybe[set[Symbol]] followOf(Symbol parent, [h, *t]) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index 1e0d5c9..fdeb7c5 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -131,7 +131,7 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc) - one synthetic *delimiters* production; - zero-or-more *user-defined* productions (from `rsc`); - one synthetic *keywords* production. - + Each production in the list (including the synthetic ones) is *suitable for conversion* to a TextMate rule. A production is "suitable for conversion" when it satisfies each of the following conditions: @@ -139,7 +139,7 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc) - it does not match newlines; - it does not match the empty word; - it has a `@category` tag. - + See the walkthrough for further motivation and examples. } @@ -187,7 +187,7 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) { // If each parent of `p` has a category, then ignore `p` (the parents of // `p` will be used for highlighting instead) - set[Production] parents = lookdown(rsc, delabel(p.def)); + set[Production] parents = prodsWith(rsc, delabel(p.def)); if (!any(parent <- parents, NO_CATEGORY in getCategories(rsc, parent))) { continue; } @@ -295,7 +295,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { bool guard = nothing() := u.innerDelimiters.begin; TmRule r = toTmRule(toRegExp(u.rsc, u.prod, guard = guard)) [name = "/inner/single/"]; - + rules = insertIn(rules, (u: r)); } @@ -311,8 +311,8 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { // Simple case: each unit does have an `end` inner delimiter if (_ <- group && all(u <- group, just(_) := u.innerDelimiters.end)) { - - // Create a set of pointers to the first (resp. last) occurrence + + // Create a set of pointers to the first (resp. last) occurrence // of `pivot` in each unit, when `pivot` is a `begin` delimiter // (resp. an `end` delimiter) of the group. If `pivot` occurs // elsewhere in the grammar as well, then skip the conversion @@ -320,9 +320,9 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { // avoid tokenization mistakes in which the other occurrences of // `pivot` in the input are mistakenly interpreted as the // beginning or ending of a unit in the group. - + Symbol pivot = key.val; - + set[Pointer] pointers = {}; pointers += pivot in begins ? {*find(rsc, u.prod, pivot, dir = forward()) [-1..] | u <- group} : {}; pointers += pivot in ends ? {*find(rsc, u.prod, pivot, dir = backward())[-1..] | u <- group} : {}; @@ -342,7 +342,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { toRegExp(rsc, [\alt(ends)], {t}), [toTmRule(toRegExp(rsc, [s], {t})) | s <- toTerminals(segs)]) [name = "/inner/multi/"]; - + rules = insertIn(rules, (u: r | u <- group)); } @@ -370,7 +370,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { // and an `end` delimiter, then generate a // begin/end pattern to highlight these delimiters // and all content in between. - + set[Segment] segs = getSegments(rsc, suffix); segs = {removeBeginEnd(seg, {begin}, {end}) | seg <- segs}; @@ -379,7 +379,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { toRegExp(rsc, [end], {t}), [toTmRule(toRegExp(rsc, [s], {t})) | s <- toTerminals(segs)]); } - + else { // If the suffix has a `begin` delimiter, but not // an `end` delimiter, then generate a match pattern @@ -475,7 +475,7 @@ private list[ConversionUnit] addOuterRules(list[ConversionUnit] units) { toRegExp(rsc, [\alt(ends)], {}), [include("#") | TmRule r <- innerRules]) [name = "/outer/"]; - + rules = insertIn(rules, (u: r | u <- group)); } } From a866514b75ea0e8e59b8798659268b98758b1c48 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 22 Oct 2024 10:27:51 +0200 Subject: [PATCH 13/14] Simplify boolean expression --- rascal-textmate-core/.gitignore | 4 +++- .../src/main/rascal/lang/rascal/grammar/Util.rsc | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rascal-textmate-core/.gitignore b/rascal-textmate-core/.gitignore index 5feb907..8ec187b 100644 --- a/rascal-textmate-core/.gitignore +++ b/rascal-textmate-core/.gitignore @@ -1,2 +1,4 @@ target -node_modules \ No newline at end of file +node_modules + +src/main/rascal/Scratch.rsc \ No newline at end of file diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index ad09528..eb40a3a 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -38,11 +38,9 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) { } bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {}) - = s in checking - ? true - : any(p <- prodsOf(g, delabel(s)), - /Symbol child := p.symbols, - isRecursive(g, child, checking = checking + s)); + = s in checking || any(p <- prodsOf(g, delabel(s)), + /Symbol child := p.symbols, + isRecursive(g, child, checking = checking + s)); @synopsis{ Checks if production `p` is recursive in grammar `g` From 10284f9383a52f57715710cb43a44932069a0372 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 22 Oct 2024 10:30:31 +0200 Subject: [PATCH 14/14] Add editor config --- rascal-textmate-core/.editorconfig | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 rascal-textmate-core/.editorconfig diff --git a/rascal-textmate-core/.editorconfig b/rascal-textmate-core/.editorconfig new file mode 100644 index 0000000..6eeed87 --- /dev/null +++ b/rascal-textmate-core/.editorconfig @@ -0,0 +1,21 @@ +# Editor configuration, see http://editorconfig.org +root = true + +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +insert_final_newline = true +trim_trailing_whitespace = true +max_line_length = 80 + +[*.sh] +end_of_line = lf + +[*.java] +indent_size = 4 +max_line_length = 120 + +[*.rsc] +indent_size = 4 +max_line_length = 120