From aae40abdd579eafe46ea7af1dd1b3abd79479662 Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Tue, 9 Jul 2024 14:39:43 +0200 Subject: [PATCH] Precalculate all ascii values --- .../main/rascal/lang/oniguruma/Conversion.rsc | 80 +++++++------------ .../syntaxes/rascal.tmLanguage.json | 6 +- 2 files changed, 30 insertions(+), 56 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc index 2de88a4..c9266f5 100644 --- a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc @@ -170,59 +170,14 @@ str encode(list[int] chars, bool withBounds = false) ? "\\b\\b" : intercalate("", [encode(i) | i <- chars]); -str encode(int char) { - if (char in alnum) { - return stringChar(char); - } - if (char in shorthands) { - return shorthands[char]; - } - if (char < 256) { - return "\\x"; - } - return "\\x{}"; - -} +str encode(int char) + = char in preEncoded + ? preEncoded[char] + : "\\x{}" + ; -map[int, str] shorthands = ( - 0x09: "\\t", - 0x0A: "\\n", - 0x0D: "\\r", - 0x20: " ", - 0x21: "\\!", - 0x22: "\"", - 0x23: "\\#", - 0x24: "\\$", - 0x25: "\\%", - 0x26: "\\&", - 0x27: "\'", - 0x28: "\\(", - 0x29: "\\)", - 0x2A: "\\*", - 0x2B: "\\+", - 0x2C: "\\,", - 0x2D: "\\-", - 0x2E: "\\.", - 0x2F: "\\/", - 0x3A: "\\:", - 0x3B: "\\;", - 0x3C: "\\\<", - 0x3D: "\\=", - 0x3E: "\\\>", - 0x3F: "\\?", - 0x40: "\\@", - 0x5B: "\\[", - 0x5C: "\\\\", - 0x5D: "\\]", - 0x5E: "\\^", - 0x5F: "\\_", - 0x60: "\\`", - 0x7B: "\\{", - 0x7C: "\\|", - 0x7D: "\\{", - 0x7E: "\\~" -); +private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]}; private str toHex(int i) = i < 16 @@ -233,5 +188,24 @@ private list[str] hex = ["" | i <- [0..10]] + ["A", "B", "C", "D", "E", "F"]; -private set[int] alnum - = {*[48..58], *[65..91], *[97..123]}; \ No newline at end of file +private set[int] printable + = charRange("0", "9") + + charRange("a", "z") + + charRange("A", "Z") + ; + +private map[int, str] escapes = ( + 0x09: "\\t", + 0x0A: "\\n", + 0x0D: "\\r", + 0x20: "\\x20" // spaces look a bit strange in a regex, although they are valid, people tend to read over them as layout +) + ( c : "\\" | c <- [0x21..0x7F], c notin printable); // regular ascii characters that might have special meaning in a regex + + +private map[int, str] addFallBack(map[int, str] defined) + = ( char : "\\x" | char <- [0..256], char notin defined) + + defined + ; + +private map[int, str] preEncoded + = addFallBack(escapes + ( c : stringChar(c) | c <- printable)); \ No newline at end of file diff --git a/vscode-extension/syntaxes/rascal.tmLanguage.json b/vscode-extension/syntaxes/rascal.tmLanguage.json index 4db3fcb..05935b3 100644 --- a/vscode-extension/syntaxes/rascal.tmLanguage.json +++ b/vscode-extension/syntaxes/rascal.tmLanguage.json @@ -1,7 +1,7 @@ { "repository": { "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})": { - "match": "((?:\\/\\/)(?:(?:[\\x01-\\t]|[\\x0B-\\x{10FFFF}])*?(?!(?:[\\t-\\t]|[\\r-\\r]|[ - ]|[\\xA0-\\xA0]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))", + "match": "((?:\\/\\/)(?:(?:[\\x01-\\t]|[\\x0B-\\x{10FFFF}])*?(?!(?:[\\t-\\t]|[\\r-\\r]|[\\x20-\\x20]|[\\xA0-\\xA0]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))", "name": "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})", "captures": { "1": { @@ -19,7 +19,7 @@ } }, "prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"≫\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"⇨\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"⚠\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})": { - "match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\x{226B})|(?:\\n)|(?:\\!\\:\\=)|(?:')|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\x{21E8})|(?:\\()|(?:\\*\\/)|(?:\\%)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\@)|(?:\\:\\=)|(?:\\#)|(?:\\<\\=\\=\\>)|(?:\")|(?:\\<\\<\\=)|(?:\\{)|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\/\\*)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))", + "match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\x{226B})|(?:\\n)|(?:\\!\\:\\=)|(?:\\')|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\x{21E8})|(?:\\()|(?:\\*\\/)|(?:\\%)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\@)|(?:\\:\\=)|(?:\\#)|(?:\\<\\=\\=\\>)|(?:\\\")|(?:\\<\\<\\=)|(?:\\})|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\/\\*)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))", "name": "prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"≫\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"⇨\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"⚠\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})", "captures": {} }, @@ -80,7 +80,7 @@ "end": "(?:\\])", "patterns": [ { - "match": "((?:\\\\)(?:[ - ]|[\"-\"]|['-']|[\\--\\-]|[\\<-\\<]|[\\>-\\>]|[\\[-\\]]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))", + "match": "((?:\\\\)(?:[\\x20-\\x20]|[\\\"-\\\"]|[\\'-\\']|[\\--\\-]|[\\<-\\<]|[\\>-\\>]|[\\[-\\]]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))", "name": "prod(lex(\"Char\"),[lit(\"\\\\\"),\\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{tag(\"category\"(\"constant\"))})", "captures": { "1": {