SWAT-engineering · sungshik · Nov 18, 2024 · Sep 25, 2024 · Sep 30, 2024 · Nov 15, 2024
diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc
@@ -58,14 +58,17 @@ alias RscGrammar = Grammar;
 }
 
 @description{
-    The conversion consists of two stages:
+    The conversion consists of three stages:
+      - preprocessing (function `preprocess`);
       - analysis (function `analyze`);
       - transformation (function `transform`).
 
-    The aim of the analysis stage is to select those productions of the Rascal
-    grammar that are "suitable for conversion" to TextMate rules. The aim of the
-    transformation stage is to subsequently convert those productions and
-    produce a TextMate grammar.
+    The aim of the preprocessing stage is to slightly massage the Rascal grammar
+    to make analysis and transformation easier (e.g., replace singleton ranges
+    with just the corresponding literal). The aim of the analysis stage is to
+    select those productions of the Rascal grammar that are "suitable for
+    conversion" to TextMate rules. The aim of the transformation stage is to
+    subsequently convert those productions and produce a TextMate grammar.
 
     To be able to cleanly separate analysis and transformation, productions
     selected during the analysis stage are wrapped into *conversion units* that
@@ -161,8 +164,6 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
     Each production in the list (including the synthetic ones) is *suitable for
     conversion* to a TextMate rule. A production is "suitable for conversion"
     when it satisfies each of the following conditions:
-      - it is non-recursive;
-      - it does not match newlines;
       - it does not match the empty word;
       - it has a `@category` tag.
 
@@ -171,36 +172,42 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
 
 @description{
     The analysis consists of three stages:
-      1. selection of user-defined productions;
-      2. creation of synthetic delimiters production;
-      3. creation of synthetic keywords production.
-
-    In stage 1, a dependency graph among all productions that occur in `rsc`
-    (specifically: `prod` constructors) is created. This dependency graph is
-    subsequently pruned to keep only the suitable-for-conversion productions:
-      - first, productions with a cyclic dependency on themselves are removed;
-      - next, productions that only involve single-line matching are retained;
-      - next, productions that only involve non-empty word matching are retained;
-      - next, productions that have a `@category` tag are retained.
+     1. selection of user-defined productions;
+     2. creation of synthetic delimiters production;
+     3. creation of synthetic keywords production;
+     4. wrapping of productions inside conversion units.
+
+    In stage 1, each user-defined production (specifically: `prod` constructor)
+    that occurs in `rsc` is selected for conversion when it fulfils the
+    following requirements:
+      - it has a unique `@category` tag;
+      - it doesn't match the empty word.
 
     In stage 2, the set of all delimiters that occur in `rsc` is created. This
     set is subsequently reduced by removing:
       - strict prefixes of delimiters;
-      - delimiters that enclose user-defined productions;
-      - delimiters that occur at the beginning of user-defined productions.
+      - delimiters that also occur as outer delimiters of
+        suitable-for-conversion productions;
+      - delimiters that also occur as inner delimiters of
+        suitable-for-conversion productions.
 
     In stage 3, the set of all keywords that occur in `rsc` is created.
+
+    In stage 4, each suitable-for-conversion production is wrapped in a
+    conversion unit with additional metadata (e.g., the inner/outer delimiters
+    of the production). The list of conversion units is subsequently reduced
+    by removing strict prefixes, and sorted.
 }
 
 list[ConversionUnit] analyze(RscGrammar rsc, str name) {
     str jobLabel = "Analyzing<name == "" ? "" : " (<name>)">";
     jobStart(jobLabel, work = 6);
 
-    // Analyze productions
+    // Stage 1: Analyze productions
     jobStep(jobLabel, "Analyzing productions");
     list[Production] prods = [p | /p: prod(_, _, _) <- rsc];
 
-    // Analyze categories
+    // Stage 1: Analyze categories
     jobStep(jobLabel, "Analyzing categories");
     prods = for (p <- prods) {
 
@@ -221,24 +228,24 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) {
         append p;
     }
 
-    // Analyze emptiness
+    // Stage 1: Analyze emptiness
     jobStep(jobLabel, "Analyzing emptiness");
     prods = [p | p <- prods, !tryParse(rsc, delabel(p.def), "")];
 
-    // Analyze delimiters
+    // Stage 2: Analyze delimiters
     jobStep(jobLabel, "Analyzing delimiters");
     set[Symbol] delimiters = {s | /Symbol s := rsc, isDelimiter(delabel(s))};
     delimiters &= removeStrictPrefixes(delimiters);
     delimiters -= {s | p <- prods, /just(s) := getOuterDelimiterPair(rsc, p)};
     delimiters -= {s | p <- prods, /just(s) := getInnerDelimiterPair(rsc, p, getOnlyFirst = true)};
     list[Production] prodsDelimiters = [prod(lex(DELIMITERS_PRODUCTION_NAME), [\alt(delimiters)], {})];
 
-    // Analyze keywords
+    // Stage 3: Analyze keywords
     jobStep(jobLabel, "Analyzing keywords");
     set[Symbol] keywords = {s | /Symbol s := rsc, isKeyword(delabel(s))};
     list[Production] prodsKeywords = [prod(lex(KEYWORDS_PRODUCTION_NAME), [\alt(keywords)], {\tag("category"("keyword.control"))})];
 
-    // Prepare units
+    // Stage 4: Prepare units
     jobStep(jobLabel, "Preparing units");
     bool isEmptyProd(prod(_, [\alt(alternatives)], _))
         = alternatives == {};
@@ -260,8 +267,13 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) {
 
 @description{
     The transformation consists of two stages:
-      1. creation of TextMate rules;
-      2. composition of TextMate rules into a TextMate grammar.
+     1. creation of TextMate rules;
+     2. composition of TextMate rules into a TextMate grammar.
+
+    Stage 1 is organized as a pipeline that, step-by-step, adds names and rules
+    to the conversion units. First, it adds unique names. Next, it adds "inner
+    rules". Last, it adds "outer rules". See module
+    `lang::textmate::ConversionUnit` for an explanation of inner/outer rules.
 }
 
 TmGrammar transform(list[ConversionUnit] units, str name, NameGeneration nameGeneration = long()) {

diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/NameGeneration.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/NameGeneration.rsc
@@ -49,9 +49,6 @@ alias NameGenerator = str(Production);
 
 NameGenerator newNameGenerator(list[Production] prods, short()) {
 
-    // Define auxiliary functions to compute names for symbols
-
-
     // Define auxiliary function to count the number of occurrences of a name
     int count(str name) = (0 | it + 1 | p <- prods, toName(p.def) == name);