Skip to content

Commit

Permalink
Precalculate all ascii values
Browse files Browse the repository at this point in the history
  • Loading branch information
DavyLandman committed Jul 9, 2024
1 parent d9a3f21 commit aae40ab
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 56 deletions.
80 changes: 27 additions & 53 deletions rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -170,59 +170,14 @@ str encode(list[int] chars, bool withBounds = false)
? "\\b<encode(chars, withBounds = false)>\\b"
: intercalate("", [encode(i) | i <- chars]);
str encode(int char) {
if (char in alnum) {
return stringChar(char);
}
if (char in shorthands) {
return shorthands[char];
}
if (char < 256) {
return "\\x<right(toHex(char),2, "0")>";
}
return "\\x{<toHex(char)>}";
}
str encode(int char)
= char in preEncoded
? preEncoded[char]
: "\\x{<toHex(char)>}"
;
map[int, str] shorthands = (
0x09: "\\t",
0x0A: "\\n",
0x0D: "\\r",
0x20: " ",
0x21: "\\!",
0x22: "\"",
0x23: "\\#",
0x24: "\\$",
0x25: "\\%",
0x26: "\\&",
0x27: "\'",
0x28: "\\(",
0x29: "\\)",
0x2A: "\\*",
0x2B: "\\+",
0x2C: "\\,",
0x2D: "\\-",
0x2E: "\\.",
0x2F: "\\/",
0x3A: "\\:",
0x3B: "\\;",
0x3C: "\\\<",
0x3D: "\\=",
0x3E: "\\\>",
0x3F: "\\?",
0x40: "\\@",
0x5B: "\\[",
0x5C: "\\\\",
0x5D: "\\]",
0x5E: "\\^",
0x5F: "\\_",
0x60: "\\`",
0x7B: "\\{",
0x7C: "\\|",
0x7D: "\\{",
0x7E: "\\~"
);
private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]};
private str toHex(int i)
= i < 16
Expand All @@ -233,5 +188,24 @@ private list[str] hex
= ["<i>" | i <- [0..10]]
+ ["A", "B", "C", "D", "E", "F"];
private set[int] alnum
= {*[48..58], *[65..91], *[97..123]};
private set[int] printable
= charRange("0", "9")
+ charRange("a", "z")
+ charRange("A", "Z")
;
private map[int, str] escapes = (
0x09: "\\t",
0x0A: "\\n",
0x0D: "\\r",
0x20: "\\x20" // spaces look a bit strange in a regex, although they are valid, people tend to read over them as layout
) + ( c : "\\<stringChar(c)>" | c <- [0x21..0x7F], c notin printable); // regular ascii characters that might have special meaning in a regex
private map[int, str] addFallBack(map[int, str] defined)
= ( char : "\\x<right(toHex(char),2, "0")>" | char <- [0..256], char notin defined)
+ defined
;
private map[int, str] preEncoded
= addFallBack(escapes + ( c : stringChar(c) | c <- printable));
6 changes: 3 additions & 3 deletions vscode-extension/syntaxes/rascal.tmLanguage.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"repository": {
"prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})": {
"match": "((?:\\/\\/)(?:(?:[\\x01-\\t]|[\\x0B-\\x{10FFFF}])*?(?!(?:[\\t-\\t]|[\\r-\\r]|[ - ]|[\\xA0-\\xA0]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))",
"match": "((?:\\/\\/)(?:(?:[\\x01-\\t]|[\\x0B-\\x{10FFFF}])*?(?!(?:[\\t-\\t]|[\\r-\\r]|[\\x20-\\x20]|[\\xA0-\\xA0]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))",
"name": "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})",
"captures": {
"1": {
Expand All @@ -19,7 +19,7 @@
}
},
"prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})": {
"match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\x{226B})|(?:\\n)|(?:\\!\\:\\=)|(?:')|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\x{21E8})|(?:\\()|(?:\\*\\/)|(?:\\%)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\@)|(?:\\:\\=)|(?:\\#)|(?:\\<\\=\\=\\>)|(?:\")|(?:\\<\\<\\=)|(?:\\{)|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\/\\*)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))",
"match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\x{226B})|(?:\\n)|(?:\\!\\:\\=)|(?:\\')|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\x{21E8})|(?:\\()|(?:\\*\\/)|(?:\\%)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\@)|(?:\\:\\=)|(?:\\#)|(?:\\<\\=\\=\\>)|(?:\\\")|(?:\\<\\<\\=)|(?:\\})|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\/\\*)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))",
"name": "prod(lex(\"delimiters\"),[alt({lit(\"bottom-up-break\"),lit(\")\"),lit(\"\"),lit(\"\\n\"),lit(\"!:=\"),lit(\"\\'\"),lit(\"!=\"),lit(\"\\>=\"),lit(\"://\"),lit(\"non-assoc\"),lit(\"&=\"),lit(\"\\<-\"),lit(\"*=\"),lit(\"+=\"),lit(\"top-down-break\"),lit(\",\"),lit(\"...\"),lit(\"/=\"),lit(\"\"),lit(\"(\"),lit(\"*/\"),lit(\"%\"),lit(\"!\\<\\<\"),lit(\"=\\>\"),lit(\"!\\>\\>\"),lit(\"||\"),lit(\"\\>\\>\"),lit(\"::\"),lit(\"\"),lit(\"&&\"),lit(\"@\"),lit(\":=\"),lit(\"#\"),lit(\"\\<==\\>\"),lit(\"\\\"\"),lit(\"\\<\\<=\"),lit(\"}\"),lit(\"?=\"),lit(\"\\<:\"),lit(\"==\\>\"),lit(\"^\"),lit(\"/*\"),lit(\";\"),lit(\"{\"),lit(\"-=\"),lit(\"$T\")})],{})",
"captures": {}
},
Expand Down Expand Up @@ -80,7 +80,7 @@
"end": "(?:\\])",
"patterns": [
{
"match": "((?:\\\\)(?:[ - ]|[\"-\"]|['-']|[\\--\\-]|[\\<-\\<]|[\\>-\\>]|[\\[-\\]]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))",
"match": "((?:\\\\)(?:[\\x20-\\x20]|[\\\"-\\\"]|[\\'-\\']|[\\--\\-]|[\\<-\\<]|[\\>-\\>]|[\\[-\\]]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))",
"name": "prod(lex(\"Char\"),[lit(\"\\\\\"),\\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{tag(\"category\"(\"constant\"))})",
"captures": {
"1": {
Expand Down

0 comments on commit aae40ab

Please sign in to comment.