Skip to content

Commit

Permalink
Merge pull request #8 from SWAT-engineering/shorter-rule-names
Browse files Browse the repository at this point in the history
Shorter rule names
  • Loading branch information
sungshik authored Jul 19, 2024
2 parents 23cca94 + 146771e commit 814cbe7
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 98 deletions.
3 changes: 2 additions & 1 deletion rascal-textmate-core/src/main/rascal/VSCode.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ import Grammar;
import lang::rascal::\syntax::Rascal;
import lang::textmate::Conversion;
import lang::textmate::Grammar;
import lang::textmate::NameGeneration;

int main() {
str scopeName = "source.rascalmpl.injection";
RscGrammar rsc = getRscGrammar();
TmGrammar tm = toTmGrammar(rsc, scopeName)[injectionSelector = "R:source.rascalmpl"];
TmGrammar tm = toTmGrammar(rsc, scopeName, nameGeneration = short())[injectionSelector = "R:source.rascalmpl"];
toJSON(tm, indent = 2, l = |project://vscode-extension/syntaxes/rascal.tmLanguage.json|);
return 0;
}
Expand Down
33 changes: 16 additions & 17 deletions rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import lang::rascal::grammar::analyze::Delimiters;
import lang::rascal::grammar::analyze::Dependencies;
import lang::rascal::grammar::analyze::Newlines;
import lang::textmate::Grammar;
import lang::textmate::NameGeneration;

alias RscGrammar = Grammar;

Expand All @@ -41,8 +42,8 @@ data ConversionUnit = unit(
may contain additional meta-data needed during the transformation stage.
}

TmGrammar toTmGrammar(RscGrammar rsc, ScopeName scopeName)
= transform(analyze(rsc)) [scopeName = scopeName];
TmGrammar toTmGrammar(RscGrammar rsc, ScopeName scopeName, NameGeneration nameGeneration = long())
= transform(analyze(rsc), nameGeneration = nameGeneration) [scopeName = scopeName];

@synoposis{
Analyzes Rascal grammar `rsc`. Returns a list of productions, in the form of
Expand Down Expand Up @@ -113,12 +114,12 @@ list[ConversionUnit] analyze(RscGrammar rsc) {
delimiters -= getStrictPrefixes(delimiters);
delimiters -= {s | prod(_, [s, *_], _) <- prods, isDelimiter(delabel(s))};
delimiters -= {s | prod(def, _, _) <- prods, /s := getDelimiterPairs(rsc, delabel(def))};
list[Production] prodsDelimiters = [prod(lex("delimiters"), [\alt(delimiters)], {})];
list[Production] prodsDelimiters = [prod(lex(DELIMITERS_PRODUCTION_NAME), [\alt(delimiters)], {})];
// Analyze keywords
println("[LOG] Analyzing keywords");
set[Symbol] keywords = {s | /Symbol s := rsc, isKeyword(delabel(s))};
list[Production] prodsKeywords = [prod(lex("keywords"), [\alt(keywords)], {\tag("category"("keyword.control"))})];
list[Production] prodsKeywords = [prod(lex(KEYWORDS_PRODUCTION_NAME), [\alt(keywords)], {\tag("category"("keyword.control"))})];

// Return
bool isEmptyProd(prod(_, [\alt(alternatives)], _)) = alternatives == {};
Expand All @@ -130,6 +131,9 @@ list[ConversionUnit] analyze(RscGrammar rsc) {
return units;
}

public str DELIMITERS_PRODUCTION_NAME = "$delimiters";
public str KEYWORDS_PRODUCTION_NAME = "$keywords";

@synopsis{
Transforms a list of productions, in the form of conversion units, to a
TextMate grammar
Expand All @@ -141,11 +145,12 @@ list[ConversionUnit] analyze(RscGrammar rsc) {
2. composition of TextMate rules into a TextMate grammar.
}

TmGrammar transform(list[ConversionUnit] units) {
TmGrammar transform(list[ConversionUnit] units, NameGeneration nameGeneration = long()) {

// Transform productions to rules
println("[LOG] Transforming productions to rules");
list[TmRule] rules = [toTmRule(u) | u <- units];
NameGenerator g = newNameGenerator([u.prod | u <- units], nameGeneration);
list[TmRule] rules = [toTmRule(u, g) | u <- units];

// Transform rules to grammar
println("[LOG] Transforming rules to grammar");
Expand All @@ -170,19 +175,13 @@ TmGrammar transform(list[ConversionUnit] units) {
Converts a conversion unit to a TextMate rule
}

TmRule toTmRule(ConversionUnit u)
= toTmRule(u.rsc, u.prod);

// TODO: Check if the size of rule names materially affects VS Code. Currently,
// we use stringified productions as names, which is quite useful for debugging,
// but maybe it leads to performance issues. If so, we should add a conversion
// configuration flag to control generation of "long names" vs "short names" (as
// long as names continue to be unique, everything should continue to work ok).
TmRule toTmRule(ConversionUnit u, NameGenerator g)
= toTmRule(u.rsc, u.prod, g(u.prod));

private TmRule toTmRule(RscGrammar rsc, p: prod(def, _, _))
private TmRule toTmRule(RscGrammar rsc, p: prod(def, _, _), str name)
= {<begin, end>} := getDelimiterPairs(rsc, delabel(def)) // TODO: Support non-singleton sets of delimiter pairs
? toTmRule(toRegExp(rsc, begin), toRegExp(rsc, end), "<begin>:<end>", [toTmRule(toRegExp(rsc, p), "<p>")])
: toTmRule(toRegExp(rsc, p), "<p>");
? toTmRule(toRegExp(rsc, begin), toRegExp(rsc, end), "<begin.string><end.string>", [toTmRule(toRegExp(rsc, p), name)])
: toTmRule(toRegExp(rsc, p), name);

private TmRule toTmRule(RegExp re, str name)
= match(re.string, captures = toCaptures(re.categories), name = name);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
@synoposis{
Types and functions to generate names for TextMate rules
}

module lang::textmate::NameGeneration

import Grammar;
import ParseTree;
import String;
import lang::rascal::format::Grammar;

data NameGeneration // Given a production `p` of the form `prod(label(l, sort(s)), _, _)`...
= short() // ...the generated name is of the form `<s>.<l>`
| long(bool pretty = false); // ...the generated name is of the form `<p>` (optionally pretty-printed)

alias NameGenerator = str(Production);

@synoposis{
Creates a name generator for list of productions `prods`, using a particular
name generation scheme.
}

NameGenerator newNameGenerator(list[Production] prods, short()) {

// Define auxiliary functions to compute names for symbols
str toName(sort(name)) = toLowerCase(name);
str toName(lex(name)) = toLowerCase(name);
str toName(label(name, symbol)) = "<toName(symbol)>.<name>";

// Define auxiliary function to count the number of occurrences of a name
int count(str name) = (0 | it + 1 | p <- prods, toName(p.def) == name);

// Determine which names should be suffixed with an index (i.e., this is the
// case when multiple productions would otherwise get the same name)
set[str] names = {toName(p.def) | p <- prods};
map[str, int] nextIndex = (name: 0 | name <- names, count(name) > 1);

// Return the generator
return str(Production p) {
str name = toName(p.def);
if (name in nextIndex) { // Suffix an index if needed
nextIndex += (name: nextIndex[name] + 1);
name += ".<nextIndex[name]>";
}
return name;
};
}

NameGenerator newNameGenerator(list[Production] _, long(pretty = pretty)) {
return str(Production p) {
return pretty ? "<prod2rascal(p)>" : "<p>";
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ import lang::pico::\syntax::Main;
Grammar rsc = grammar(#Program);

list[ConversionUnit] units = [
unit(rsc, prod(lex("delimiters"),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";"),lit("nil-type")})],{})),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";"),lit("nil-type")})],{})),
unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("Comment"))})),
unit(rsc, prod(lex("keywords"),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}))
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}))
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ lexical WhitespaceAndComment
Grammar rsc = grammar(#Program);

list[ConversionUnit] units = [
unit(rsc, prod(lex("delimiters"),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";")})],{})),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";")})],{})),
unit(rsc, prod(label("natural",sort("Type")),[lit("natural")],{\tag("category"("storage.type"))})),
unit(rsc, prod(label("natcon",sort("Expression")),[label("natcon",lex("Natural"))],{\tag("category"("constant.numeric"))})),
unit(rsc, prod(label("string",sort("Type")),[lit("string")],{\tag("category"("storage.type"))})),
unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("comment.line"))})),
unit(rsc, prod(label("nil",sort("Type")),[lit("nil-type")],{\tag("category"("storage.type"))})),
unit(rsc, prod(label("id",sort("Expression")),[label("name",lex("Id"))],{\tag("category"("variable.other"))})),
unit(rsc, prod(lex("keywords"),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}))
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}))
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import lang::rascal::\syntax::Rascal;
Grammar rsc = grammar(#Module);

list[ConversionUnit] units = [
unit(rsc, prod(lex("delimiters"),[alt({lit("bottom-up-break"),lit(")"),lit("≫"),lit("\n"),lit("!:="),lit("\'"),lit("!="),lit("\>="),lit("://"),lit("non-assoc"),lit("&="),lit("\<-"),lit("*="),lit("+="),lit("top-down-break"),lit(","),lit("..."),lit("/="),lit("⇨"),lit("("),lit("*/"),lit("%"),lit("!\<\<"),lit("=\>"),lit("!\>\>"),lit("||"),lit("\>\>"),lit("::"),lit("⚠"),lit("&&"),lit("@"),lit(":="),lit("#"),lit("\<==\>"),lit("\""),lit("\<\<="),lit("}"),lit("?="),lit("\<:"),lit("==\>"),lit("^"),lit("/*"),lit(";"),lit("{"),lit("-="),lit("$T")})],{})),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("bottom-up-break"),lit(")"),lit("≫"),lit("\n"),lit("!:="),lit("\'"),lit("!="),lit("\>="),lit("://"),lit("non-assoc"),lit("&="),lit("\<-"),lit("*="),lit("+="),lit("top-down-break"),lit(","),lit("..."),lit("/="),lit("⇨"),lit("("),lit("*/"),lit("%"),lit("!\<\<"),lit("=\>"),lit("!\>\>"),lit("||"),lit("\>\>"),lit("::"),lit("⚠"),lit("&&"),lit("@"),lit(":="),lit("#"),lit("\<==\>"),lit("\""),lit("\<\<="),lit("}"),lit("?="),lit("\<:"),lit("==\>"),lit("^"),lit("/*"),lit(";"),lit("{"),lit("-="),lit("$T")})],{})),
unit(rsc, prod(label("gt",lex("ConcretePart")),[lit("\\\>")],{\tag("category"("MetaSkipped"))})),
unit(rsc, prod(label("text",lex("ConcretePart")),[conditional(iter(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\not-follow(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{\tag("category"("MetaSkipped"))})),
unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("Comment"))})),
Expand All @@ -18,7 +18,7 @@ list[ConversionUnit] units = [
unit(rsc, prod(label("bq",lex("ConcretePart")),[lit("\\`")],{\tag("category"("MetaSkipped"))})),
unit(rsc, prod(lex("Char"),[lex("UnicodeEscape")],{\tag("category"("Constant"))})),
unit(rsc, prod(label("bs",lex("ConcretePart")),[lit("\\\\")],{\tag("category"("MetaSkipped"))})),
unit(rsc, prod(lex("keywords"),[alt({lit("lexical"),lit("loc"),lit("if"),lit("assoc"),lit("test"),lit("lrel"),lit("throws"),lit("clear"),lit("module"),lit("any"),lit("int"),lit("quit"),lit("o"),lit("anno"),lit("true"),lit("public"),lit("keyword"),lit("for"),lit("tuple"),lit("bracket"),lit("bag"),lit("it"),lit("visit"),lit("do"),lit("data"),lit("layout"),lit("bool"),lit("edit"),lit("join"),lit("is"),lit("import"),lit("view"),lit("in"),lit("rat"),lit("modules"),lit("continue"),lit("left"),lit("num"),lit("assert"),lit("throw"),lit("one"),lit("help"),lit("default"),lit("all"),lit("global"),lit("syntax"),lit("false"),lit("finally"),lit("private"),lit("mod"),lit("java"),lit("node"),lit("start"),lit("set"),lit("right"),lit("variable"),lit("map"),lit("10"),lit("on"),lit("break"),lit("dynamic"),lit("solve"),lit("fail"),lit("unimport"),lit("outermost"),lit("real"),lit("list"),lit("insert"),lit("innermost"),lit("declarations"),lit("else"),lit("rel"),lit("function"),lit("notin"),lit("filter"),lit("datetime"),lit("catch"),lit("try"),lit("renaming"),lit("tag"),lit("has"),lit("Z"),lit("when"),lit("type"),lit("append"),lit("extend"),lit("switch"),lit("void"),lit("history"),lit("T"),lit("while"),lit("str"),lit("value"),lit("undeclare"),lit("case"),lit("alias"),lit("return"),lit("0")})],{\tag("category"("keyword.control"))}))
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("lexical"),lit("loc"),lit("if"),lit("assoc"),lit("test"),lit("lrel"),lit("throws"),lit("clear"),lit("module"),lit("any"),lit("int"),lit("quit"),lit("o"),lit("anno"),lit("true"),lit("public"),lit("keyword"),lit("for"),lit("tuple"),lit("bracket"),lit("bag"),lit("it"),lit("visit"),lit("do"),lit("data"),lit("layout"),lit("bool"),lit("edit"),lit("join"),lit("is"),lit("import"),lit("view"),lit("in"),lit("rat"),lit("modules"),lit("continue"),lit("left"),lit("num"),lit("assert"),lit("throw"),lit("one"),lit("help"),lit("default"),lit("all"),lit("global"),lit("syntax"),lit("false"),lit("finally"),lit("private"),lit("mod"),lit("java"),lit("node"),lit("start"),lit("set"),lit("right"),lit("variable"),lit("map"),lit("10"),lit("on"),lit("break"),lit("dynamic"),lit("solve"),lit("fail"),lit("unimport"),lit("outermost"),lit("real"),lit("list"),lit("insert"),lit("innermost"),lit("declarations"),lit("else"),lit("rel"),lit("function"),lit("notin"),lit("filter"),lit("datetime"),lit("catch"),lit("try"),lit("renaming"),lit("tag"),lit("has"),lit("Z"),lit("when"),lit("type"),lit("append"),lit("extend"),lit("switch"),lit("void"),lit("history"),lit("T"),lit("while"),lit("str"),lit("value"),lit("undeclare"),lit("case"),lit("alias"),lit("return"),lit("0")})],{\tag("category"("keyword.control"))}))
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ lexical UnicodeEscape
Grammar rsc = grammar(#Class);

list[ConversionUnit] units = [
unit(rsc, prod(lex("delimiters"),[alt({lit("-"),lit(")"),lit("("),lit("!"),lit("||"),lit("&&")})],{})),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(")"),lit("("),lit("!"),lit("||"),lit("&&")})],{})),
unit(rsc, prod(lex("Char"),[lit("\\"),\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{\tag("category"("Constant"))})),
unit(rsc, prod(lex("Char"),[lex("UnicodeEscape")],{\tag("category"("Constant"))})),
unit(rsc, prod(lex("keywords"),[alt({lit("10"),lit("0")})],{\tag("category"("keyword.control"))}))
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("10"),lit("0")})],{\tag("category"("keyword.control"))}))
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ syntax ConcreteHole
Grammar rsc = grammar(#Concrete);

list[ConversionUnit] units = [
unit(rsc, prod(lex("delimiters"),[alt({lit("\n"),lit("\'"),lit("\<"),lit("\>")})],{})),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'"),lit("\<"),lit("\>")})],{})),
unit(rsc, prod(label("gt",lex("ConcretePart")),[lit("\\\>")],{\tag("category"("MetaSkipped"))})),
unit(rsc, prod(label("text",lex("ConcretePart")),[conditional(iter(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\not-follow(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{\tag("category"("MetaSkipped"))})),
unit(rsc, prod(label("bs",lex("ConcretePart")),[lit("\\\\")],{\tag("category"("MetaSkipped"))})),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,12 @@ lexical Boolean
Grammar rsc = grammar(#Value);

list[ConversionUnit] units = [
unit(rsc, prod(lex("delimiters"),[alt({lit(","),lit("+"),lit("*/"),lit("}"),lit("|"),lit("?"),lit("://"),lit("/*"),lit("{")})],{})),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit(","),lit("+"),lit("*/"),lit("}"),lit("|"),lit("?"),lit("://"),lit("/*"),lit("{")})],{})),
unit(rsc, prod(label("alnum",lex("RegExpBody")),[conditional(iter(lex("Alnum")),{\not-follow(\char-class([range(48,57),range(65,90),range(97,122)]))})],{\tag("category"("markup.italic"))})),
unit(rsc, prod(lex("String"),[lit("\""),\iter-star(lex("Alnum")),lit("\"")],{\tag("category"("string.quoted.double"))})),
unit(rsc, prod(lex("Number"),[conditional(iter(lex("Digit")),{\not-follow(\char-class([range(48,57)]))})],{\tag("category"("constant.numeric"))})),
unit(rsc, prod(label("line",lex("Comment")),[lit("//"),conditional(\iter-star(alt({lex("Blank"),lex("Alnum")})),{\end-of-line()})],{\tag("category"("comment.line.double-slash"))})),
unit(rsc, prod(lex("keywords"),[alt({lit("true"),lit("false")})],{\tag("category"("keyword.control"))}))
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("true"),lit("false")})],{\tag("category"("keyword.control"))}))
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Loading

0 comments on commit 814cbe7

Please sign in to comment.