diff --git a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc index 6091c11..9203ae3 100644 --- a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc @@ -170,10 +170,10 @@ str encode(list[int] chars, bool withBounds = false) ? "\\b\\b" : intercalate("", [encode(i) | i <- chars]); -str encode(int char) - = char in alnum - ? stringChar(char) - : "\\x{}"; +str encode(int char) = preEncoded[char] ? "\\x{}"; + + +private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]}; private str toHex(int i) = i < 16 @@ -184,5 +184,24 @@ private list[str] hex = ["" | i <- [0..10]] + ["A", "B", "C", "D", "E", "F"]; -private set[int] alnum - = {*[48..58], *[65..91], *[97..123]}; \ No newline at end of file +private set[int] printable + = charRange("0", "9") + + charRange("a", "z") + + charRange("A", "Z") + ; + +private map[int, str] escapes = ( + 0x09: "\\t", + 0x0A: "\\n", + 0x0D: "\\r", + 0x20: "\\x20" // spaces look a bit strange in a regex, although they are valid, people tend to read over them as layout +) + ( c : "\\" | c <- [0x21..0x7F], c notin printable); // regular ascii characters that might have special meaning in a regex + + +private map[int, str] addFallback(map[int, str] defined) + = ( char : "\\x" | char <- [0..256], char notin defined) + + defined + ; + +private map[int, str] preEncoded + = addFallback(escapes + ( c : stringChar(c) | c <- printable)); \ No newline at end of file diff --git a/vscode-extension/syntaxes/rascal.tmLanguage.json b/vscode-extension/syntaxes/rascal.tmLanguage.json index b747e31..05935b3 100644 --- a/vscode-extension/syntaxes/rascal.tmLanguage.json +++ b/vscode-extension/syntaxes/rascal.tmLanguage.json @@ -1,7 +1,7 @@ { "repository": { "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})": { - "match": "((?:\\x{2F}\\x{2F})(?:(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{10FFFF}])*?(?!(?:[\\x{9}-\\x{9}]|[\\x{D}-\\x{D}]|[\\x{20}-\\x{20}]|[\\x{A0}-\\x{A0}]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))", + "match": "((?:\\/\\/)(?:(?:[\\x01-\\t]|[\\x0B-\\x{10FFFF}])*?(?!(?:[\\t-\\t]|[\\r-\\r]|[\\x20-\\x20]|[\\xA0-\\xA0]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))", "name": "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})", "captures": { "1": { @@ -19,16 +19,16 @@ } }, "prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"≫\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"⇨\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"⚠\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})": { - "match": "(?:(?:bottom\\x{2D}up\\x{2D}break)|(?:\\x{29})|(?:\\x{226B})|(?:\\x{A})|(?:\\x{21}\\x{3A}\\x{3D})|(?:\\x{27})|(?:\\x{21}\\x{3D})|(?:\\x{3E}\\x{3D})|(?:\\x{3A}\\x{2F}\\x{2F})|(?:non\\x{2D}assoc)|(?:\\x{26}\\x{3D})|(?:\\x{3C}\\x{2D})|(?:\\x{2A}\\x{3D})|(?:\\x{2B}\\x{3D})|(?:top\\x{2D}down\\x{2D}break)|(?:\\x{2C})|(?:\\x{2E}\\x{2E}\\x{2E})|(?:\\x{2F}\\x{3D})|(?:\\x{21E8})|(?:\\x{28})|(?:\\x{2A}\\x{2F})|(?:\\x{25})|(?:\\x{21}\\x{3C}\\x{3C})|(?:\\x{3D}\\x{3E})|(?:\\x{21}\\x{3E}\\x{3E})|(?:\\x{7C}\\x{7C})|(?:\\x{3E}\\x{3E})|(?:\\x{3A}\\x{3A})|(?:\\x{26A0})|(?:\\x{26}\\x{26})|(?:\\x{40})|(?:\\x{3A}\\x{3D})|(?:\\x{23})|(?:\\x{3C}\\x{3D}\\x{3D}\\x{3E})|(?:\\x{22})|(?:\\x{3C}\\x{3C}\\x{3D})|(?:\\x{7D})|(?:\\x{3F}\\x{3D})|(?:\\x{3C}\\x{3A})|(?:\\x{3D}\\x{3D}\\x{3E})|(?:\\x{5E})|(?:\\x{2F}\\x{2A})|(?:\\x{3B})|(?:\\x{7B})|(?:\\x{2D}\\x{3D})|(?:\\x{24}T))", + "match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\x{226B})|(?:\\n)|(?:\\!\\:\\=)|(?:\\')|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\x{21E8})|(?:\\()|(?:\\*\\/)|(?:\\%)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\@)|(?:\\:\\=)|(?:\\#)|(?:\\<\\=\\=\\>)|(?:\\\")|(?:\\<\\<\\=)|(?:\\})|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\/\\*)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))", "name": "prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"≫\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"⇨\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"⚠\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})", - "captures": [] + "captures": {} }, "lit(\"`\"):lit(\"`\")": { - "begin": "(?:\\x{60})", - "end": "(?:\\x{60})", + "begin": "(?:\\`)", + "end": "(?:\\`)", "patterns": [ { - "match": "(\\x{5C}\\x{5C})", + "match": "(\\\\\\\\)", "name": "prod(label(\"bs\",lex(\"ConcretePart\")),[lit(\"\\\\\\\\\")],{tag(\"category\"(\"string\"))})", "captures": { "1": { @@ -37,7 +37,7 @@ } }, { - "match": "(\\x{5C}\\x{3E})", + "match": "(\\\\\\>)", "name": "prod(label(\"gt\",lex(\"ConcretePart\")),[lit(\"\\\\\\>\")],{tag(\"category\"(\"string\"))})", "captures": { "1": { @@ -46,7 +46,7 @@ } }, { - "match": "((?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|[\\x{3D}-\\x{3D}]|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])+?(?!(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|[\\x{3D}-\\x{3D}]|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])))", + "match": "((?:[\\x01-\\t]|[\\x0B-\\;]|[\\=-\\=]|[\\?-\\[]|[\\]-\\_]|[a-\\x{10FFFF}])+?(?!(?:[\\x01-\\t]|[\\x0B-\\;]|[\\=-\\=]|[\\?-\\[]|[\\]-\\_]|[a-\\x{10FFFF}])))", "name": "prod(label(\"text\",lex(\"ConcretePart\")),[conditional(iter(\\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\\not-follow(\\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{tag(\"category\"(\"string\"))})", "captures": { "1": { @@ -55,7 +55,7 @@ } }, { - "match": "(\\x{5C}\\x{3C})", + "match": "(\\\\\\<)", "name": "prod(label(\"lt\",lex(\"ConcretePart\")),[lit(\"\\\\\\<\")],{tag(\"category\"(\"string\"))})", "captures": { "1": { @@ -64,7 +64,7 @@ } }, { - "match": "(\\x{5C}\\x{60})", + "match": "(\\\\\\`)", "name": "prod(label(\"bq\",lex(\"ConcretePart\")),[lit(\"\\\\`\")],{tag(\"category\"(\"string\"))})", "captures": { "1": { @@ -76,11 +76,11 @@ "name": "lit(\"`\"):lit(\"`\")" }, "lit(\"[\"):lit(\"]\")": { - "begin": "(?:\\x{5B})", - "end": "(?:\\x{5D})", + "begin": "(?:\\[)", + "end": "(?:\\])", "patterns": [ { - "match": "((?:\\x{5C})(?:[\\x{20}-\\x{20}]|[\\x{22}-\\x{22}]|[\\x{27}-\\x{27}]|[\\x{2D}-\\x{2D}]|[\\x{3C}-\\x{3C}]|[\\x{3E}-\\x{3E}]|[\\x{5B}-\\x{5D}]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))", + "match": "((?:\\\\)(?:[\\x20-\\x20]|[\\\"-\\\"]|[\\'-\\']|[\\--\\-]|[\\<-\\<]|[\\>-\\>]|[\\[-\\]]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))", "name": "prod(lex(\"Char\"),[lit(\"\\\\\"),\\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{tag(\"category\"(\"constant\"))})", "captures": { "1": { @@ -89,7 +89,7 @@ } }, { - "match": "((?:(?:\\x{5C})[U-U](?:(?:\\b10\\b)|(?:(?:\\b0\\b)(?:[0-9]|[A-F]|[a-f])))(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})[u-u](?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})[a-a][0-7](?:[0-9]|[A-F]|[a-f])))", + "match": "((?:(?:\\\\)[U-U](?:(?:\\b10\\b)|(?:(?:\\b0\\b)(?:[0-9]|[A-F]|[a-f])))(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\\\)[u-u](?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\\\)[a-a][0-7](?:[0-9]|[A-F]|[a-f])))", "name": "prod(lex(\"Char\"),[lex(\"UnicodeEscape\")],{tag(\"category\"(\"constant\"))})", "captures": { "1": {