Skip to content

Commit

Permalink
Merge pull request #19 from SWAT-engineering/no-conversion-when-delim…
Browse files Browse the repository at this point in the history
…iter-occurs-elsewhere-too

No conversion when delimiter occurs elsewhere too
  • Loading branch information
sungshik authored Sep 23, 2024
2 parents 899fb3d + 416bc7d commit 5eda17f
Show file tree
Hide file tree
Showing 18 changed files with 161 additions and 323 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
@synopsis{
Utility functions to work with grammars, productions, and symbols.
Utility functions to work with grammars, productions, and symbols
}

module lang::rascal::grammar::Util
Expand All @@ -9,6 +9,8 @@ import Grammar;
import ParseTree;
import String;

import util::ListUtil;

@synopsis{
Utility functions for grammars
}
Expand Down Expand Up @@ -47,6 +49,62 @@ bool isRecursive(Grammar g, Symbol s) {
return check({}, s);
}
@synopsis{
Representation of a pointer to a symbol in (the list of symbols of) a
production. This is useful to distinguish between different occurrences of
the same symbol in a grammar (i.e., they have different pointers).
}
alias Pointer = tuple[Production p, int index];
@synopsis{
Finds the list of pointers -- a *trace* -- to the first occurrence of symbol
`s`, if any, starting from production `p`, optionally in a particular
direction (default: `forward()`). That is: if `<p1,i>` is followed by
`<p2,_>` in the returned list, then `p1.symbols[i]` is a non-terminal and
`p2` is one of its productions.
}
@description{
For instance, consider the following grammar:
```
lexical X = Y;
lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">";
lexical Z1 = "foo" "bar";
lexical Z2 = "baz";
```
The list of pointers to `"bar"`, starting from `X`, is:
- `<X,0>`
- `<Y.alt1,3>`
- `<Z1,1>`
The list of pointers to `"qux"` is just empty.
}
list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward()) {
list[Pointer] doFind(set[Production] doing, Production haystack, Symbol needle) {
for (haystack notin doing, i <- reorder([0..size(haystack.symbols)], dir)) {
Symbol ith = delabel(haystack.symbols[i]);
if (ith == needle) {
return [<haystack, i>];
}
for (isNonTerminalType(ith), child <- lookup(g, ith)) {
if (list[Pointer] l: [_, *_] := doFind(doing + haystack, child, s)) {
return [<haystack, i>] + l;
}
}
}
return [];
}
return doFind({}, p, s);
}
@synopsis{
Lookups a list of productions for symbol `s` in grammar `g`, replacing
formal parameters with actual parameters when needed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,9 @@ default Maybe[Symbol] unique(set[Maybe[Symbol]] _) = nothing();
}
bool isDelimiter(lit(string))
= /^\w+$/ !:= string;
= /^\W+$/ := string;
bool isDelimiter(cilit(string))
= /^\w+$/ !:= string;
= isDelimiter(lit(string));
default bool isDelimiter(Symbol _)
= false;
Expand All @@ -205,9 +205,9 @@ default bool isDelimiter(Symbol _)
}
bool isKeyword(lit(string))
= /^\w+$/ := string;
= /^\w.*$/ := string;
bool isKeyword(cilit(string))
= /^\w+$/ := string;
= isKeyword(lit(string));
default bool isKeyword(Symbol _)
= false;
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,16 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
// Convert all units in the group to match patterns (including,
// optimistically, multi-line units as-if they are single-line)
for (u <- group, !u.recursive) {
TmRule r = toTmRule(toRegExp(u.rsc, u.prod, guard = true))
// Add the guard (i.e., look-behind condition to match layout) only
// when the units in the group don't begin with a delimiter. Why is
// is this? We *don't* want `32` to be highlighted as a number in
// `int aer32 = 34`. However, we *do* want `>bar"` to be highlighted
// as a string in `"foo<x==5>bar"`. As a heuristic, if the token
// starts with a delimiter (e.g., `>`), then it should be allowed
// for its occurrence to not be preceded by layout.
bool guard = nothing() := u.innerDelimiters.begin;
TmRule r = toTmRule(toRegExp(u.rsc, u.prod, guard = guard))
[name = "/inner/single/<u.name>"];
rules = insertIn(rules, (u: r));
Expand All @@ -217,6 +226,25 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) {
// Simple case: each unit does have an `end` inner delimiter
if (_ <- group && all(u <- group, just(_) := u.innerDelimiters.end)) {
// Create a set of pointers to the first (resp. last) occurrence
// of `pivot` in each unit, when `pivot` is a `begin` delimiter
// (resp. an `end` delimiter) of the group. If `pivot` occurs
// elsewhere in the grammar as well, then skip the conversion
// of these multi-line units to a begin/end pattern. This is to
// avoid tokenization mistakes in which the other occurrences of
// `pivot` in the input are mistakenly interpreted as the
// beginning or ending of a unit in the group.
Symbol pivot = key.val;
set[Pointer] pointers = {};
pointers += pivot in begins ? {*find(rsc, u.prod, pivot, dir = forward()) [-1..] | u <- group} : {};
pointers += pivot in ends ? {*find(rsc, u.prod, pivot, dir = backward())[-1..] | u <- group} : {};
if (any(/p: prod(_, [*before, pivot, *_], _) := rsc.rules, <p, size(before)> notin pointers)) {
continue;
}
// Compute a set of segments that need to be consumed between
// the `begin` delimiter and the `end` delimiters. Each of these
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ bool doTransformTest(list[ConversionUnit] units, RepositoryStats expect, str nam
assert actual.include == expect.include : "Actual number of top-level include patterns in repository: <actual.include>. Expected: <expect.include>.";

// Test behavioral properties of the TextMate grammar

loc lTest = lProject + "/src/main/rascal/lang/textmate/conversiontests/<name>.test";
loc lTester = lProject + "/node_modules/vscode-tmgrammar-test";
if (!exists(lTest)) {
Expand All @@ -103,7 +102,21 @@ bool doTransformTest(list[ConversionUnit] units, RepositoryStats expect, str nam
resolveLocation(lTest).path[(windows ? 1 : 0)..]
];

if (<output, exitCode> := execWithCode(lExec, args = args) && exitCode != 0) {
// TODO: The following function serves as a workaround for a race
// in (the Java-part of) the implementation of `execWithCode`. A fix is
// already available but not yet released. When it is, this function
// should be removed (and `execWithCode` called directly). See also:
// https://github.com/usethesource/rascal/commit/1ce9e59dfd7098327bbaf55a985c2a643ff52861
tuple[str, int] execWithCodeUntilSuccess() {
try {
return execWithCode(lExec, args = args);
} catch e: {
println("[LOG] Retrying after unexpected exception: <e>");
return execWithCodeUntilSuccess();
}
}

if (<output, exitCode> := execWithCodeUntilSuccess() && exitCode != 0) {
println(output);
assert false : "Actual tokenization does not match expected tokenization (see output above for details)";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import lang::pico::\syntax::Main;
Grammar rsc = preprocess(grammar(#Program));

list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";"),lit("nil-type")})],{}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";")})],{}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("Comment"))}), false, false, <nothing(),nothing()>, <just(lit("%%")),nothing()>),
unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%"),iter(\char-class([range(1,36),range(38,1114111)])),lit("%")],{\tag("category"("Comment"))}), false, true, <nothing(),nothing()>, <just(lit("%")),just(lit("%"))>),
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>)
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("nil-type"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>)
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ Grammar rsc = preprocess(grammar(#Program));
list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\\")})],{}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(label("natural",sort("Type")),[lit("natural")],{\tag("category"("storage.type"))}), false, false, <just(lit(":")),just(lit(";"))>, <nothing(),nothing()>),
unit(rsc, prod(label("nil",sort("Type")),[lit("nil-type")],{\tag("category"("storage.type"))}), false, false, <just(lit(":")),just(lit(";"))>, <just(lit("nil-type")),just(lit("nil-type"))>),
unit(rsc, prod(label("nil",sort("Type")),[lit("nil-type")],{\tag("category"("storage.type"))}), false, false, <just(lit(":")),just(lit(";"))>, <nothing(),nothing()>),
unit(rsc, prod(label("string",sort("Type")),[lit("string")],{\tag("category"("storage.type"))}), false, false, <just(lit(":")),just(lit(";"))>, <nothing(),nothing()>),
unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("comment.line"))}), false, false, <nothing(),nothing()>, <just(lit("%%")),nothing()>),
unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%"),iter(\char-class([range(1,36),range(38,1114111)])),lit("%")],{\tag("category"("comment.block"))}), false, true, <nothing(),nothing()>, <just(lit("%")),just(lit("%"))>),
unit(rsc, prod(label("strcon",sort("Expression")),[label("string",lex("String"))],{\tag("category"("string.quoted.double"))}), false, true, <nothing(),nothing()>, <just(lit("\"")),just(lit("\""))>),
unit(rsc, prod(label("id",sort("Expression")),[label("name",lex("Id"))],{\tag("category"("variable.other"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(label("natcon",sort("Expression")),[label("natcon",lex("Natural"))],{\tag("category"("constant.numeric"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>)
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("nil-type"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>)
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import lang::rascal::\syntax::Rascal;
Grammar rsc = preprocess(grammar(#Module));

list[ConversionUnit] units = [
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("bottom-up-break"),lit(")"),lit("("),lit("%"),lit("!:="),lit("\<==\>"),lit("\<\<="),lit("!="),lit("\>="),lit("://"),lit("non-assoc"),lit("&="),lit("\<-"),lit("*="),lit("+="),lit("top-down-break"),lit(","),lit("..."),lit("/="),lit("!\<\<"),lit("=\>"),lit("!\>\>"),lit("||"),lit("\>\>"),lit("::"),lit("&&"),lit(":="),lit("#"),lit("?="),lit("\<:"),lit("==\>"),lit("^"),lit(";"),lit("{"),lit("-="),lit("$T")})],{}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit(","),lit(")"),lit("("),lit("%"),lit("\<==\>"),lit("\<\<="),lit("!="),lit("\>="),lit("://"),lit("&="),lit("\<-"),lit("-="),lit("*="),lit("+="),lit("..."),lit("/="),lit("!:="),lit("$"),lit("!\<\<"),lit("=\>"),lit("!\>\>"),lit("||"),lit("\>\>"),lit("::"),lit("&&"),lit(":="),lit("#"),lit("?="),lit("\<:"),lit("==\>"),lit("^"),lit(";"),lit("{")})],{}), false, false, <nothing(),nothing()>, <nothing(),nothing()>),
unit(rsc, prod(label("stderrOutput",lex("Output")),[conditional(lit("⚠"),{\begin-of-line()}),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("StdErr"))}), false, false, <nothing(),nothing()>, <just(lit("⚠")),just(lit("\n"))>),
unit(rsc, prod(label("stdoutOutput",lex("Output")),[conditional(lit("≫"),{\begin-of-line()}),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("StdOut"))}), false, false, <nothing(),nothing()>, <just(lit("≫")),just(lit("\n"))>),
unit(rsc, prod(label("resultOutput",lex("Output")),[lit("⇨"),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("Result"))}), false, false, <nothing(),nothing()>, <just(lit("⇨")),just(lit("\n"))>),
Expand All @@ -35,8 +35,8 @@ list[ConversionUnit] units = [
unit(rsc, prod(lex("CaseInsensitiveStringConstant"),[lit("\'"),label("chars",\iter-star(lex("StringCharacter"))),lit("\'")],{\tag("category"("Constant"))}), false, true, <nothing(),nothing()>, <just(lit("\'")),just(lit("\'"))>),
unit(rsc, prod(lex("PreStringChars"),[lit("\""),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), false, true, <nothing(),nothing()>, <just(lit("\"")),just(lit("\<"))>),
unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), false, true, <nothing(),nothing()>, <just(lit("\"")),just(lit("\""))>),
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("lexical"),lit("loc"),lit("if"),lit("assoc"),lit("test"),lit("lrel"),lit("throws"),lit("clear"),lit("module"),lit("any"),lit("int"),lit("quit"),lit("o"),lit("anno"),lit("true"),lit("public"),lit("keyword"),lit("for"),lit("tuple"),lit("bracket"),lit("bag"),lit("it"),lit("visit"),lit("do"),lit("data"),lit("layout"),lit("bool"),lit("edit"),lit("join"),lit("is"),lit("import"),lit("view"),lit("in"),lit("rat"),lit("modules"),lit("continue"),lit("left"),lit("num"),lit("assert"),lit("throw"),lit("one"),lit("help"),lit("default"),lit("all"),lit("global"),lit("syntax"),lit("false"),lit("finally"),lit("private"),lit("mod"),lit("java"),lit("node"),lit("start"),lit("set"),lit("right"),lit("variable"),lit("map"),lit("10"),lit("on"),lit("break"),lit("dynamic"),lit("solve"),lit("fail"),lit("unimport"),lit("outermost"),lit("real"),lit("list"),lit("insert"),lit("innermost"),lit("declarations"),lit("else"),lit("rel"),lit("function"),lit("notin"),lit("filter"),lit("datetime"),lit("catch"),lit("try"),lit("renaming"),lit("tag"),lit("has"),lit("Z"),lit("when"),lit("type"),lit("append"),lit("extend"),lit("switch"),lit("void"),lit("history"),lit("T"),lit("while"),lit("str"),lit("value"),lit("undeclare"),lit("case"),lit("alias"),lit("return"),lit("0")})],{\tag("category"("keyword.control"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>)
unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("lexical"),lit("loc"),lit("test"),lit("lrel"),lit("throws"),lit("clear"),lit("top-down-break"),lit("module"),lit("any"),lit("int"),lit("quit"),lit("bottom-up-break"),lit("o"),lit("anno"),lit("true"),lit("public"),lit("keyword"),lit("for"),lit("tuple"),lit("bracket"),lit("bag"),lit("it"),lit("visit"),lit("do"),lit("data"),lit("layout"),lit("bool"),lit("edit"),lit("join"),lit("is"),lit("import"),lit("view"),lit("in"),lit("rat"),lit("modules"),lit("continue"),lit("left"),lit("num"),lit("assert"),lit("throw"),lit("one"),lit("help"),lit("default"),lit("all"),lit("global"),lit("syntax"),lit("false"),lit("finally"),lit("private"),lit("mod"),lit("java"),lit("node"),lit("start"),lit("set"),lit("if"),lit("bottom-up"),lit("right"),lit("variable"),lit("map"),lit("10"),lit("on"),lit("break"),lit("dynamic"),lit("solve"),lit("fail"),lit("unimport"),lit("outermost"),lit("real"),lit("list"),lit("insert"),lit("innermost"),lit("declarations"),lit("else"),lit("rel"),lit("function"),lit("notin"),lit("filter"),lit("datetime"),lit("catch"),lit("try"),lit("renaming"),lit("tag"),lit("has"),lit("top-down"),lit("Z"),lit("when"),lit("type"),lit("append"),lit("extend"),lit("non-assoc"),lit("assoc"),lit("switch"),lit("void"),lit("history"),lit("T"),lit("while"),lit("str"),lit("value"),lit("undeclare"),lit("case"),lit("alias"),lit("return"),lit("0")})],{\tag("category"("keyword.control"))}), false, false, <nothing(),nothing()>, <nothing(),nothing()>)
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <20, 8, 0>);
test bool transformTest() = doTransformTest(units, <20, 4, 0>, name = "Rascal");
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# SYNTAX TEST "Rascal"

"foo bar"
# ^^^^^^^^^ Constant

"foo<x + 1>bar"
# ^^^^^ ^^^^^ Constant
# ^^^^^ -Constant

### TODO: The following test shows that, currently, multi-line strings are
### disabled. This is because the converter determines that:
### - `>` doesn't uniquely delineate interpolation (it could also be
### greater-than in expressions or prioritize-before in grammars);
### - `"` doesn't uniquely delineate strings (it could also be the end of
### interpolation).
### Therefore, to avoid excessive tokenization errors, the converter doesn't
### generate begin/end patterns that begin with `>` or `"`. This might be
### improved in the future.

"foo
# ^^^^ -Constant
bar"
# ^^^^ -Constant
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <5, 1, 0>);
test bool transformTest() = doTransformTest(units, <5, 1, 0>, name = "RascalClass");
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SYNTAX TEST "RascalClass"
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <7, 1, 0>);
test bool transformTest() = doTransformTest(units, <7, 1, 0>, name = "RascalConcrete");
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SYNTAX TEST "RascalConcrete"
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <6, 2, 0>, name = "RascalStringLiteral");
test bool transformTest() = doTransformTest(units, <6, 0, 0>, name = "RascalStringLiteral");
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,5 @@

"foo <5 > 6> bar"
# ^^^^^^ Constant
# ^^ -Constant
# ^^^^^^^^^ Constant
## TODO: Improve this? (Probably very hard to do with TextMate...)
# ^^^^^ -Constant
# ^^^^^^ Constant
Original file line number Diff line number Diff line change
Expand Up @@ -298,4 +298,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <7, 2, 0>);
test bool transformTest() = doTransformTest(units, <7, 2, 0>, name = "Walkthrough");
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SYNTAX TEST "Walkthrough"
Loading

0 comments on commit 5eda17f

Please sign in to comment.