From 09144ea97186ae64343b6e02a0a204eef130f4ea Mon Sep 17 00:00:00 2001 From: peterz Date: Mon, 3 Jul 2023 18:47:21 +0300 Subject: [PATCH 01/17] Use -1 as max value for eden and survivor spaces --- .../svm/core/genscavenge/AbstractMemoryPoolMXBean.java | 8 +++++--- .../core/genscavenge/GenScavengeMemoryPoolMXBeans.java | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java index f2a23be29ba3..d155352a01be 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java @@ -45,12 +45,14 @@ public abstract class AbstractMemoryPoolMXBean extends AbstractMXBean implements MemoryPoolMXBean { + protected static final UnsignedWord UNDEFINED = WordFactory.signed(UNDEFINED_MEMORY_USAGE); + private static final UnsignedWord UNINITIALIZED = WordFactory.zero(); + private final String name; private final String[] managerNames; protected final UninterruptibleUtils.AtomicUnsigned peakUsage = new UninterruptibleUtils.AtomicUnsigned(); - private static final UnsignedWord UNDEFINED = WordFactory.zero(); - protected UnsignedWord initialValue = UNDEFINED; + protected UnsignedWord initialValue = UNINITIALIZED; @Platforms(Platform.HOSTED_ONLY.class) protected AbstractMemoryPoolMXBean(String name, String... managerNames) { @@ -59,7 +61,7 @@ protected AbstractMemoryPoolMXBean(String name, String... managerNames) { } UnsignedWord getInitialValue() { - if (initialValue.equal(UNDEFINED)) { + if (initialValue.equal(UNINITIALIZED)) { initialValue = computeInitialValue(); } return initialValue; diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java index 441b4e6d6b33..2352e150bec0 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java @@ -81,7 +81,7 @@ UnsignedWord computeInitialValue() { @Override UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumEdenSize(); + return UNDEFINED; } @Override @@ -124,7 +124,7 @@ UnsignedWord computeInitialValue() { @Override UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumSurvivorSize(); + return UNDEFINED; } @Override From 00b287215ee4a8f4394d2103a05634df3922b18a Mon Sep 17 00:00:00 2001 From: peterz Date: Mon, 21 Aug 2023 15:28:03 +0300 Subject: [PATCH 02/17] Use -1 as max value for all memory pools --- .../svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java index 2352e150bec0..53216cb91664 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java @@ -162,7 +162,7 @@ UnsignedWord computeInitialValue() { @Override UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumOldSize(); + return UNDEFINED; } @Override @@ -200,7 +200,7 @@ UnsignedWord computeInitialValue() { @Override UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumHeapSize(); + return UNDEFINED; } @Override From d2a1fe28975499ad60a30bf024026aea3496374a Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Mon, 21 Aug 2023 12:22:08 +0200 Subject: [PATCH 03/17] TRegex: casefolding infrastructure rework and finalization of OracleDB flavor. --- regex/ci/ci.jsonnet | 13 +- .../regex/tregex/test/OracleDBTests.java | 686 +++++- .../regex/tregex/test/PythonTests.java | 4 +- .../truffle/regex/analysis/RegexUnifier.java | 6 + .../regex/charset/ClassSetContents.java | 14 +- .../truffle/regex/charset/Constants.java | 13 +- .../TRegexBacktrackingNFAExecutorNode.java | 6 +- .../regex/tregex/parser/CaseFoldData.java | 1912 +++++++++++++++ .../regex/tregex/parser/CaseFoldTable.java | 2098 ----------------- ...oldingTrie.java => CaseUnfoldingTrie.java} | 43 +- .../regex/tregex/parser/JSRegexLexer.java | 13 +- .../regex/tregex/parser/JSRegexParser.java | 45 +- .../regex/tregex/parser/JSRegexValidator.java | 7 + ...ng.java => MultiCharacterCaseFolding.java} | 122 +- .../regex/tregex/parser/RegexASTBuilder.java | 6 +- .../regex/tregex/parser/RegexLexer.java | 169 +- .../truffle/regex/tregex/parser/Token.java | 64 +- .../parser/flavors/ECMAScriptFlavor.java | 6 +- .../parser/flavors/OracleDBConstants.java | 355 +++ .../tregex/parser/flavors/OracleDBFlavor.java | 13 +- .../parser/flavors/OracleDBRegexLexer.java | 67 +- .../parser/flavors/OracleDBRegexParser.java | 128 +- .../tregex/parser/flavors/PythonFlavor.java | 4 +- .../parser/flavors/PythonRegexLexer.java | 14 +- .../parser/flavors/PythonRegexParser.java | 39 +- .../parser/flavors/RubyCaseFoldingData.java | 1584 ------------- .../tregex/parser/flavors/RubyFlavor.java | 19 +- .../parser/flavors/RubyRegexParser.java | 146 +- .../tools/casefolding/.gitignore | 2 + .../tools/casefolding/Cargo.lock | 1661 +++++++++++++ .../tools/casefolding/Cargo.toml | 13 + .../tools/casefolding/src/main.rs | 1987 ++++++++++++++++ .../tools/generate_case_fold_table.clj | 406 ---- .../tools/generate_nonunicode_fold_table.py | 79 - .../tools/generate_ruby_case_folding.py | 115 - .../generate_special_casing_equivalences.py | 70 - .../tools/run_scripts.sh | 18 +- .../tools/unicode-script.sh | 108 - .../tools/update_case_fold_table.py | 80 - 39 files changed, 7218 insertions(+), 4917 deletions(-) create mode 100644 regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java delete mode 100644 regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java rename regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/{flavors/RubyCaseUnfoldingTrie.java => CaseUnfoldingTrie.java} (77%) rename regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/{flavors/RubyCaseFolding.java => MultiCharacterCaseFolding.java} (64%) create mode 100644 regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java delete mode 100644 regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java create mode 100644 regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore create mode 100644 regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock create mode 100644 regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml create mode 100644 regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs delete mode 100755 regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj delete mode 100755 regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py delete mode 100755 regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py delete mode 100755 regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py delete mode 100755 regex/src/com.oracle.truffle.regex/tools/unicode-script.sh delete mode 100755 regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py diff --git a/regex/ci/ci.jsonnet b/regex/ci/ci.jsonnet index a94b9707d80f..af51df34ca95 100644 --- a/regex/ci/ci.jsonnet +++ b/regex/ci/ci.jsonnet @@ -14,6 +14,15 @@ targets: ["gate"], }, + local regex_gate_jdkLatest = regex_common + common.deps.eclipse + common.deps.jdt + { + name: 'gate-regex-jdk' + self.jdk_version, + run: [ + ["mx", "build"], + ["mx", "unittest", "com.oracle.truffle.regex"], + ], + targets: ["gate"], + }, + local regex_gate_lite = regex_common + { name: 'gate-regex-mac-lite-jdk' + self.jdk_version, run: [ @@ -40,5 +49,7 @@ ] for jdk in [ common.labsjdk21, ] - ]), + ]) + [ + common.linux_amd64 + common.labsjdkLatest + regex_gate_jdkLatest, + ], } diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java index 14e843ffc635..b57c4b8fdda0 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java @@ -107,18 +107,36 @@ public void testQuantifiers() { test("x{4294967296}", "", "x{4294967296}", 0, true, 0, 13); test("x{4294967297}", "", "x{4294967297}", 0, true, 0, 13); test("x??", "", "x", 0, true, 0, 0); - test("x???", "", "x", 0, true, 0, 1); test("x{2}+", "", "x", 0, false); test("x{2}+", "", "xx", 0, true, 0, 2); test("x{2}+", "", "xxx", 0, true, 0, 2); test("x{2}+", "", "xxxx", 0, true, 0, 4); test("x{2}*", "", "xxxx", 0, true, 0, 4); test("x{2}*?", "", "xxxx", 0, true, 0, 0); - test("x{2}*??", "", "xxxx", 0, true, 0, 2); test("x{2}*???", "", "xxxx", 0, true, 0, 0); test("\\A*x\\Z+", "", "x", 0, true, 0, 1); test("\\A*x\\Z+", "", "xx", 0, true, 1, 2); test("\\A+x\\Z+", "", "xx", 0, false); + test("x????", "", "x?", 0, true, 0, 0); + test("x????", "", "xx?", 0, true, 0, 0); + test("x??????", "", "x?", 0, true, 0, 0); + test("x??????", "", "xx?", 0, true, 0, 0); + test("x{2}?", "", "xxxxx", 0, true, 0, 2); + test("x{2}??", "", "xxxxx", 0, true, 0, 2); + test("x{2}+", "", "xxxxx", 0, true, 0, 4); + test("x{2}*", "", "xxxxx", 0, true, 0, 4); + + // known to fail, suspected to be caused by LXR bug 35718208 + + // test("x???", "", "x", 0, true, 0, 1); + // test("x{2}*??", "", "xxxx", 0, true, 0, 2); + // test("x???", "", "x?", 0, true, 0, 1); + // test("x???", "", "xx?", 0, true, 0, 1); + // test("x?????", "", "x?", 0, true, 0, 1); + // test("x?????", "", "xx?", 0, true, 0, 1); + // test("(a{0,1})*b\\1", "", "aab", 0, true, 1, 3, 2, 2); + // test("(a{0,1})*b\\1", "", "aaba", 0, true, 1, 3, 2, 2); + // test("(a{0,1})*b\\1", "", "aabaa", 0, true, 1, 3, 2, 2); } @Test @@ -154,15 +172,8 @@ public void testCharClasses() { expectSyntaxError("[[.a.]-[:lower:]]+", "", "invalid range in regular expression", 7); expectSyntaxError("[[=a=]-[:lower:]]+", "", "invalid range in regular expression", 7); test("[[:upper:]-[.a.]]+", "", "a-A", 0, true, 0, 3); - test("[[=a=]]", "", "\u00e4", 0, false); - test("[[=c=]]", "", "\u010D", 0, false); test("[[=c=]-c]", "", "\u010d-=c", 0, true, 3, 4); - test("[[=c=]-]+", "", "\u010d-=c", 0, true, 1, 2); - // TODO: collator support - // test("[[=a=]]", "", "\u00e4", 0, true, 0, 1); - // test("[[=c=]]", "", "\u010D", 0, true, 0, 1); - // test("[[=c=]-c]", "", "\u010d-=c", 0, true, 3, 4); - // test("[[=c=]-]+", "", "\u010d-=c", 0, true, 0, 2); + test("[[=c=]-]+", "", "\u010d-=c", 0, true, 0, 2); } @Test @@ -170,7 +181,7 @@ public void testBackReferences() { expectSyntaxError("(\\2())", "", "invalid back reference in regular expression", 1); test("(\\1a)", "", "aa", 0, false); test("(\\1a|){2}", "", "aa", 0, true, 0, 0, 0, 0); - test("(\\1a|)*", "", "aa", 0, true, 0, 0, -1, -1); + test("(\\1a|)*", "", "aa", 0, true, 0, 0, 0, 0); test("(()b|\\2a){2}", "", "ba", 0, true, 0, 2, 1, 2, 0, 0); test("(a\\1)", "", "aa", 0, false); test("(a|b\\1){2}", "", "aba", 0, true, 0, 3, 1, 3); @@ -217,4 +228,657 @@ public void testSpecialGroups() { test(String.format("(?%s)", s), "", "?" + s, 0, true, 1, s.length() + 1, 1, s.length() + 1); } } + + @Test + public void generatedTests() { + /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + test("abracadabra$", "", "abracadabracadabra", 0, true, 7, 18); + test("a...b", "", "abababbb", 0, true, 2, 7); + test("XXXXXX", "", "..XXXXXX", 0, true, 2, 8); + test("\\)", "", "()", 0, true, 1, 2); + test("a]", "", "a]a", 0, true, 0, 2); + test("}", "", "}", 0, true, 0, 1); + test("\\}", "", "}", 0, true, 0, 1); + test("\\]", "", "]", 0, true, 0, 1); + test("]", "", "]", 0, true, 0, 1); + test("]", "", "]", 0, true, 0, 1); + test("{", "", "{", 0, true, 0, 1); + test("}", "", "}", 0, true, 0, 1); + test("^a", "", "ax", 0, true, 0, 1); + test("\\^a", "", "a^a", 0, true, 1, 3); + test("a\\^", "", "a^", 0, true, 0, 2); + test("a$", "", "aa", 0, true, 1, 2); + test("a\\$", "", "a$", 0, true, 0, 2); + test("a($)", "", "aa", 0, true, 1, 2, 2, 2); + test("a*(^a)", "", "aa", 0, true, 0, 1, 0, 1); + test("(..)*(...)*", "", "a", 0, true, 0, 0, -1, -1, -1, -1); + test("(..)*(...)*", "", "abcd", 0, true, 0, 4, 2, 4, -1, -1); + test("(ab|a)(bc|c)", "", "abc", 0, true, 0, 3, 0, 2, 2, 3); + test("(ab)c|abc", "", "abc", 0, true, 0, 3, 0, 2); + test("a{0}b", "", "ab", 0, true, 1, 2); + test("(a*)(b?)(b+)b{3}", "", "aaabbbbbbb", 0, true, 0, 10, 0, 3, 3, 4, 4, 7); + test("(a*)(b{0,1})(b{1,})b{3}", "", "aaabbbbbbb", 0, true, 0, 10, 0, 3, 3, 4, 4, 7); + test("a{9876543210}", "", "a", 0, false); + test("((a|a)|a)", "", "a", 0, true, 0, 1, 0, 1, 0, 1); + test("(a*)(a|aa)", "", "aaaa", 0, true, 0, 4, 0, 3, 3, 4); + test("a*(a.|aa)", "", "aaaa", 0, true, 0, 4, 2, 4); + test("a(b)|c(d)|a(e)f", "", "aef", 0, true, 0, 3, -1, -1, -1, -1, 1, 2); + test("(a|b)?.*", "", "b", 0, true, 0, 1, 0, 1); + test("(a|b)c|a(b|c)", "", "ac", 0, true, 0, 2, 0, 1, -1, -1); + test("(a|b)c|a(b|c)", "", "ab", 0, true, 0, 2, -1, -1, 1, 2); + test("(a|b)*c|(a|ab)*c", "", "abc", 0, true, 0, 3, 1, 2, -1, -1); + test("(a|b)*c|(a|ab)*c", "", "xc", 0, true, 1, 2, -1, -1, -1, -1); + test("(.a|.b).*|.*(.a|.b)", "", "xa", 0, true, 0, 2, 0, 2, -1, -1); + test("a?(ab|ba)ab", "", "abab", 0, true, 0, 4, 0, 2); + test("a?(ac{0}b|ba)ab", "", "abab", 0, true, 0, 4, 0, 2); + test("ab|abab", "", "abbabab", 0, true, 0, 2); + test("aba|bab|bba", "", "baaabbbaba", 0, true, 5, 8); + test("aba|bab", "", "baaabbbaba", 0, true, 6, 9); + test("(aa|aaa)*|(a|aaaaa)", "", "aa", 0, true, 0, 2, 0, 2, -1, -1); + test("(a.|.a.)*|(a|.a...)", "", "aa", 0, true, 0, 2, 0, 2, -1, -1); + test("ab|a", "", "xabc", 0, true, 1, 3); + test("ab|a", "", "xxabc", 0, true, 2, 4); + test("(Ab|cD)*", "", "aBcD", 0, true, 0, 0, -1, -1); + test("[^-]", "", "--a", 0, true, 2, 3); + test("[a-]*", "", "--a", 0, true, 0, 3); + test("[a-m-]*", "", "--amoma--", 0, true, 0, 4); + test(":::1:::0:|:::1:1:0:", "", ":::0:::1:::1:::0:", 0, true, 8, 17); + test(":::1:::0:|:::1:1:1:", "", ":::0:::1:::1:::0:", 0, true, 8, 17); + test("[[:upper:]]", "", "A", 0, true, 0, 1); + test("[[:lower:]]+", "", "`az{", 0, true, 1, 3); + test("[[:upper:]]+", "", "@AZ[", 0, true, 1, 3); + test("[[-]]", "", "[[-]]", 0, true, 2, 4); + test("\\n", "", "\\n", 0, true, 1, 2); + test("\\n", "", "\\n", 0, true, 1, 2); + test("[^a]", "", "\\n", 0, true, 0, 1); + test("\\na", "", "\\na", 0, true, 1, 3); + test("(a)(b)(c)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("xxx", "", "xxx", 0, true, 0, 3); + test("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 6,", 0, true, 0, 6, 0, 0, 0, 5, 0, 5, 0, 4, 5, 6); + test("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "2/7", 0, true, 0, 3, 0, 0, 0, 3, 0, 3, 0, 2, 3, 3); + test("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 1,Feb 6", 0, true, 5, 11, 5, 6, 6, 11, 6, 11, 6, 10, 11, 11); + test("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", "", "x", 0, true, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + test("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", "", "xx", 0, true, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + test("a?(ab|ba)*", "", "ababababababababababababababababababababababababababababababababababababababababa", 0, true, 0, 81, 79, 81); + test("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabbbbaa", 0, true, 18, 25); + test("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabaa", 0, true, 18, 22); + test("aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", "", "baaabbbabac", 0, true, 7, 11); + test(".*", "", "\\x01\\xff", 0, true, 0, 8); + test("aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", 0, true, 53, 57); + test("aaaa\\nbbbb\\ncccc\\nddddd\\neeeeee\\nfffffff\\ngggg\\nhhhh\\niiiii\\njjjjj\\nkkkkk\\nllll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", 0, false); + test("a*a*a*a*a*b", "", "aaaaaaaaab", 0, true, 0, 10); + test("^", "", "a", 0, true, 0, 0); + test("$", "", "a", 0, true, 1, 1); + test("^$", "", "a", 0, false); + test("^a$", "", "a", 0, true, 0, 1); + test("abc", "", "abc", 0, true, 0, 3); + test("abc", "", "xabcy", 0, true, 1, 4); + test("abc", "", "ababc", 0, true, 2, 5); + test("ab*c", "", "abc", 0, true, 0, 3); + test("ab*bc", "", "abc", 0, true, 0, 3); + test("ab*bc", "", "abbc", 0, true, 0, 4); + test("ab*bc", "", "abbbbc", 0, true, 0, 6); + test("ab+bc", "", "abbc", 0, true, 0, 4); + test("ab+bc", "", "abbbbc", 0, true, 0, 6); + test("ab?bc", "", "abbc", 0, true, 0, 4); + test("ab?bc", "", "abc", 0, true, 0, 3); + test("ab?c", "", "abc", 0, true, 0, 3); + test("^abc$", "", "abc", 0, true, 0, 3); + test("^abc", "", "abcc", 0, true, 0, 3); + test("abc$", "", "aabc", 0, true, 1, 4); + test("^", "", "abc", 0, true, 0, 0); + test("$", "", "abc", 0, true, 3, 3); + test("a.c", "", "abc", 0, true, 0, 3); + test("a.c", "", "axc", 0, true, 0, 3); + test("a.*c", "", "axyzc", 0, true, 0, 5); + test("a[bc]d", "", "abd", 0, true, 0, 3); + test("a[b-d]e", "", "ace", 0, true, 0, 3); + test("a[b-d]", "", "aac", 0, true, 1, 3); + test("a[-b]", "", "a-", 0, true, 0, 2); + test("a[b-]", "", "a-", 0, true, 0, 2); + test("a]", "", "a]", 0, true, 0, 2); + test("a[]]b", "", "a]b", 0, true, 0, 3); + test("a[^bc]d", "", "aed", 0, true, 0, 3); + test("a[^-b]c", "", "adc", 0, true, 0, 3); + test("a[^]b]c", "", "adc", 0, true, 0, 3); + test("ab|cd", "", "abc", 0, true, 0, 2); + test("ab|cd", "", "abcd", 0, true, 0, 2); + test("a\\(b", "", "a(b", 0, true, 0, 3); + test("a\\(*b", "", "ab", 0, true, 0, 2); + test("a\\(*b", "", "a((b", 0, true, 0, 4); + test("((a))", "", "abc", 0, true, 0, 1, 0, 1, 0, 1); + test("(a)b(c)", "", "abc", 0, true, 0, 3, 0, 1, 2, 3); + test("a+b+c", "", "aabbabc", 0, true, 4, 7); + test("a*", "", "aaa", 0, true, 0, 3); + test("(a*)*", "", "-", 0, true, 0, 0, 0, 0); + test("(a*)+", "", "-", 0, true, 0, 0, 0, 0); + test("(a*|b)*", "", "-", 0, true, 0, 0, 0, 0); + test("(a+|b)*", "", "ab", 0, true, 0, 2, 1, 2); + test("(a+|b)+", "", "ab", 0, true, 0, 2, 1, 2); + test("(a+|b)?", "", "ab", 0, true, 0, 1, 0, 1); + test("[^ab]*", "", "cde", 0, true, 0, 3); + test("(^)*", "", "-", 0, true, 0, 0, 0, 0); + test("a*", "", "a", 0, true, 0, 1); + test("([abc])*d", "", "abbbcd", 0, true, 0, 6, 4, 5); + test("([abc])*bcd", "", "abcd", 0, true, 0, 4, 0, 1); + test("a|b|c|d|e", "", "e", 0, true, 0, 1); + test("(a|b|c|d|e)f", "", "ef", 0, true, 0, 2, 0, 1); + test("((a*|b))*", "", "-", 0, true, 0, 0, 0, 0, 0, 0); + test("abcd*efg", "", "abcdefg", 0, true, 0, 7); + test("ab*", "", "xabyabbbz", 0, true, 1, 3); + test("ab*", "", "xayabbbz", 0, true, 1, 2); + test("(ab|cd)e", "", "abcde", 0, true, 2, 5, 2, 4); + test("[abhgefdc]ij", "", "hij", 0, true, 0, 3); + test("(a|b)c*d", "", "abcd", 0, true, 1, 4, 1, 2); + test("(ab|ab*)bc", "", "abc", 0, true, 0, 3, 0, 1); + test("a([bc]*)c*", "", "abc", 0, true, 0, 3, 1, 3); + test("a([bc]*)(c*d)", "", "abcd", 0, true, 0, 4, 1, 3, 3, 4); + test("a([bc]+)(c*d)", "", "abcd", 0, true, 0, 4, 1, 3, 3, 4); + test("a([bc]*)(c+d)", "", "abcd", 0, true, 0, 4, 1, 2, 2, 4); + test("a[bcd]*dcdcde", "", "adcdcde", 0, true, 0, 7); + test("(ab|a)b*c", "", "abc", 0, true, 0, 3, 0, 2); + test("((a)(b)c)(d)", "", "abcd", 0, true, 0, 4, 0, 3, 0, 1, 1, 2, 3, 4); + test("[A-Za-z_][A-Za-z0-9_]*", "", "alpha", 0, true, 0, 5); + test("^a(bc+|b[eh])g|.h$", "", "abh", 0, true, 1, 3, -1, -1); + test("(bc+d$|ef*g.|h?i(j|k))", "", "effgz", 0, true, 0, 5, 0, 5, -1, -1); + test("(bc+d$|ef*g.|h?i(j|k))", "", "ij", 0, true, 0, 2, 0, 2, 1, 2); + test("(bc+d$|ef*g.|h?i(j|k))", "", "reffgz", 0, true, 1, 6, 1, 6, -1, -1); + test("(((((((((a)))))))))", "", "a", 0, true, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + test("multiple words", "", "multiple words yeah", 0, true, 0, 14); + test("(.*)c(.*)", "", "abcde", 0, true, 0, 5, 0, 2, 3, 5); + test("abcd", "", "abcd", 0, true, 0, 4); + test("a(bc)d", "", "abcd", 0, true, 0, 4, 1, 3); + test("a[\u0001-\u0003]?c", "", "a\u0002c", 0, true, 0, 3); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qaddafi", 0, true, 0, 15, -1, -1, 10, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mo'ammar Gadhafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Kaddafi", 0, true, 0, 15, -1, -1, 10, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qadhafi", 0, true, 0, 15, -1, -1, 10, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gadafi", 0, true, 0, 14, -1, -1, 10, 11); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadafi", 0, true, 0, 15, -1, -1, 11, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moamar Gaddafi", 0, true, 0, 14, -1, -1, 9, 11); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadhdhafi", 0, true, 0, 18, -1, -1, 13, 15); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Khaddafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafy", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghadafi", 0, true, 0, 15, -1, -1, 11, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muamar Kaddafi", 0, true, 0, 14, -1, -1, 9, 11); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Quathafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gheddafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Khadafy", 0, true, 0, 15, -1, -1, 11, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Qudhafi", 0, true, 0, 15, -1, -1, 10, 12); + test("a+(b|c)*d+", "", "aabcdd", 0, true, 0, 6, 3, 4); + test("^.+$", "", "vivi", 0, true, 0, 4); + test("^(.+)$", "", "vivi", 0, true, 0, 4, 0, 4); + test("^([^!.]+).att.com!(.+)$", "", "gryphon.att.com!eby", 0, true, 0, 19, 0, 7, 16, 19); + test("^([^!]+!)?([^!]+)$", "", "bas", 0, true, 0, 3, -1, -1, 0, 3); + test("^([^!]+!)?([^!]+)$", "", "bar!bas", 0, true, 0, 7, 0, 4, 4, 7); + test("^([^!]+!)?([^!]+)$", "", "foo!bas", 0, true, 0, 7, 0, 4, 4, 7); + test("^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas", 0, true, 0, 11, 4, 8, 8, 11); + test("((foo)|(bar))!bas", "", "bar!bas", 0, true, 0, 7, 0, 3, -1, -1, 0, 3); + test("((foo)|(bar))!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7, -1, -1, 4, 7); + test("((foo)|(bar))!bas", "", "foo!bas", 0, true, 0, 7, 0, 3, 0, 3, -1, -1); + test("((foo)|bar)!bas", "", "bar!bas", 0, true, 0, 7, 0, 3, -1, -1); + test("((foo)|bar)!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7, -1, -1); + test("((foo)|bar)!bas", "", "foo!bas", 0, true, 0, 7, 0, 3, 0, 3); + test("(foo|(bar))!bas", "", "bar!bas", 0, true, 0, 7, 0, 3, 0, 3); + test("(foo|(bar))!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7, 4, 7); + test("(foo|(bar))!bas", "", "foo!bas", 0, true, 0, 7, 0, 3, -1, -1); + test("(foo|bar)!bas", "", "bar!bas", 0, true, 0, 7, 0, 3); + test("(foo|bar)!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7); + test("(foo|bar)!bas", "", "foo!bas", 0, true, 0, 7, 0, 3); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas", 0, true, 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bas", 0, true, 0, 3, -1, -1, 0, 3, -1, -1, -1, -1); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bar!bas", 0, true, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas", 0, true, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bas", 0, true, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bas", 0, true, 0, 3, 0, 3, -1, -1, 0, 3, -1, -1, -1, -1); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bar!bas", 0, true, 0, 7, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas", 0, true, 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bas", 0, true, 0, 7, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test(".*(/XXX).*", "", "/XXX", 0, true, 0, 4, 0, 4); + test(".*(\\\\XXX).*", "", "\\XXX", 0, true, 0, 4, 0, 4); + test("\\\\XXX", "", "\\XXX", 0, true, 0, 4); + test(".*(/000).*", "", "/000", 0, true, 0, 4, 0, 4); + test(".*(\\\\000).*", "", "\\000", 0, true, 0, 4, 0, 4); + test("\\\\000", "", "\\000", 0, true, 0, 4); + test("aa*", "", "xaxaax", 0, true, 1, 2); + test("(a*)(ab)*(b*)", "", "abc", 0, true, 0, 2, 0, 1, -1, -1, 1, 2); + test("(a*)(ab)*(b*)", "", "abc", 0, true, 0, 2, 0, 1, -1, -1, 1, 2); + test("((a*)(ab)*)((b*)(a*))", "", "aba", 0, true, 0, 3, 0, 1, 0, 1, -1, -1, 1, 3, 1, 2, 2, 3); + test("((a*)(ab)*)((b*)(a*))", "", "aba", 0, true, 0, 3, 0, 1, 0, 1, -1, -1, 1, 3, 1, 2, 2, 3); + test("(...?.?)*", "", "xxxxxx", 0, true, 0, 6, 4, 6); + test("(...?.?)*", "", "xxxxxx", 0, true, 0, 6, 4, 6); + test("(...?.?)*", "", "xxxxxx", 0, true, 0, 6, 4, 6); + test("(a|ab)(bc|c)", "", "abcabc", 0, true, 0, 3, 0, 1, 1, 3); + test("(a|ab)(bc|c)", "", "abcabc", 0, true, 0, 3, 0, 1, 1, 3); + test("(aba|a*b)(aba|a*b)", "", "ababa", 0, true, 0, 4, 0, 3, 3, 4); + test("(aba|a*b)(aba|a*b)", "", "ababa", 0, true, 0, 4, 0, 3, 3, 4); + test("a(b)*\\1", "", "a", 0, false); + test("a(b)*\\1", "", "a", 0, false); + test("a(b)*\\1", "", "abab", 0, false); + test("(a*){2}", "", "xxxxx", 0, true, 0, 0, 0, 0); + test("(a*){2}", "", "xxxxx", 0, true, 0, 0, 0, 0); + test("a(b)*\\1", "", "abab", 0, false); + test("a(b)*\\1", "", "abab", 0, false); + test("a(b)*\\1", "", "abab", 0, false); + test("(a*)*", "", "a", 0, true, 0, 1, 1, 1); + test("(a*)*", "", "ax", 0, true, 0, 1, 1, 1); + test("(a*)*", "", "a", 0, true, 0, 1, 1, 1); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("(a(b)*)*\\2", "", "abab", 0, true, 0, 4, 2, 3, 1, 2); + test("(a(b)*)*\\2", "", "abab", 0, true, 0, 4, 2, 3, 1, 2); + test("(a?)((ab)?)(b?)a?(ab)?b?", "", "abab", 0, true, 0, 4, 0, 1, 1, 1, -1, -1, 1, 2, -1, -1); + test(".*(.*)", "", "ab", 0, true, 0, 2, 2, 2); + test(".*(.*)", "", "ab", 0, true, 0, 2, 2, 2); + test("(a|ab)(c|bcd)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4); + test("(a|ab)(bcd|c)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4); + test("(ab|a)(c|bcd)", "", "abcd", 0, true, 0, 3, 0, 2, 2, 3); + test("(ab|a)(bcd|c)", "", "abcd", 0, true, 0, 3, 0, 2, 2, 3); + test("((a|ab)(c|bcd))(d*)", "", "abcd", 0, true, 0, 4, 0, 4, 0, 1, 1, 4, 4, 4); + test("((a|ab)(bcd|c))(d*)", "", "abcd", 0, true, 0, 4, 0, 4, 0, 1, 1, 4, 4, 4); + test("((ab|a)(c|bcd))(d*)", "", "abcd", 0, true, 0, 4, 0, 3, 0, 2, 2, 3, 3, 4); + test("((ab|a)(bcd|c))(d*)", "", "abcd", 0, true, 0, 4, 0, 3, 0, 2, 2, 3, 3, 4); + test("(a|ab)((c|bcd)(d*))", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 1, 4, 4, 4); + test("(a|ab)((bcd|c)(d*))", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 1, 4, 4, 4); + test("(ab|a)((c|bcd)(d*))", "", "abcd", 0, true, 0, 4, 0, 2, 2, 4, 2, 3, 3, 4); + test("(ab|a)((bcd|c)(d*))", "", "abcd", 0, true, 0, 4, 0, 2, 2, 4, 2, 3, 3, 4); + test("(a*)(b|abc)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("(a*)(abc|b)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("((a*)(b|abc))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("((a*)(abc|b))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("(a*)((b|abc)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a*)((abc|b)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a*)(b|abc)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("(a*)(abc|b)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("((a*)(b|abc))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("((a*)(abc|b))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("(a*)((b|abc)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a*)((abc|b)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a|ab)", "", "ab", 0, true, 0, 1, 0, 1); + test("(ab|a)", "", "ab", 0, true, 0, 2, 0, 2); + test("(a|ab)(b*)", "", "ab", 0, true, 0, 2, 0, 1, 1, 2); + test("(ab|a)(b*)", "", "ab", 0, true, 0, 2, 0, 2, 2, 2); + test("a+", "", "xaax", 0, true, 1, 3); + test(".(a*).", "", "xaax", 0, true, 0, 4, 1, 3); + test("(a?)((ab)?)", "", "ab", 0, true, 0, 1, 0, 1, 1, 1, -1, -1); + test("(a?)((ab)?)(b?)", "", "ab", 0, true, 0, 2, 0, 1, 1, 1, -1, -1, 1, 2); + test("((a?)((ab)?))(b?)", "", "ab", 0, true, 0, 2, 0, 1, 0, 1, 1, 1, -1, -1, 1, 2); + test("(a?)(((ab)?)(b?))", "", "ab", 0, true, 0, 2, 0, 1, 1, 2, 1, 1, -1, -1, 1, 2); + test("(.?)", "", "x", 0, true, 0, 1, 0, 1); + test("(.?){1}", "", "x", 0, true, 0, 1, 0, 1); + test("(.?)(.?)", "", "x", 0, true, 0, 1, 0, 1, 1, 1); + test("(.?){2}", "", "x", 0, true, 0, 1, 1, 1); + test("(.?)*", "", "x", 0, true, 0, 1, 1, 1); + test("(.?.?)", "", "xxx", 0, true, 0, 2, 0, 2); + test("(.?.?){1}", "", "xxx", 0, true, 0, 2, 0, 2); + test("(.?.?)(.?.?)", "", "xxx", 0, true, 0, 3, 0, 2, 2, 3); + test("(.?.?){2}", "", "xxx", 0, true, 0, 3, 2, 3); + test("(.?.?)(.?.?)(.?.?)", "", "xxx", 0, true, 0, 3, 0, 2, 2, 3, 3, 3); + test("(.?.?){3}", "", "xxx", 0, true, 0, 3, 3, 3); + test("(.?.?)*", "", "xxx", 0, true, 0, 3, 3, 3); + test("a?((ab)?)(b?)", "", "ab", 0, true, 0, 2, 1, 1, -1, -1, 1, 2); + test("(a?)((ab)?)b?", "", "ab", 0, true, 0, 2, 0, 1, 1, 1, -1, -1); + test("a?((ab)?)b?", "", "ab", 0, true, 0, 2, 1, 1, -1, -1); + test("(a*){2}", "", "xxxxx", 0, true, 0, 0, 0, 0); + test("(ab?)(b?a)", "", "aba", 0, true, 0, 3, 0, 2, 2, 3); + test("(a|ab)(ba|a)", "", "aba", 0, true, 0, 3, 0, 1, 1, 3); + test("(a|ab|ba)", "", "aba", 0, true, 0, 1, 0, 1); + test("(a|ab|ba)(a|ab|ba)", "", "aba", 0, true, 0, 3, 0, 1, 1, 3); + test("(a|ab|ba)*", "", "aba", 0, true, 0, 3, 1, 3); + test("(aba|a*b)", "", "ababa", 0, true, 0, 3, 0, 3); + test("(aba|a*b)(aba|a*b)", "", "ababa", 0, true, 0, 4, 0, 3, 3, 4); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(aba|ab|a)", "", "ababa", 0, true, 0, 3, 0, 3); + test("(aba|ab|a)(aba|ab|a)", "", "ababa", 0, true, 0, 5, 0, 2, 2, 5); + test("(aba|ab|a)*", "", "ababa", 0, true, 0, 3, 0, 3); + test("(a(b)?)", "", "aba", 0, true, 0, 2, 0, 2, 1, 2); + test("(a(b)?)(a(b)?)", "", "aba", 0, true, 0, 3, 0, 2, 1, 2, 2, 3, -1, -1); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("(.*)(.*)", "", "xx", 0, true, 0, 2, 0, 2, 2, 2); + test(".*(.*)", "", "xx", 0, true, 0, 2, 2, 2); + test("(a.*z|b.*y)", "", "azbazby", 0, true, 0, 5, 0, 5); + test("(a.*z|b.*y)(a.*z|b.*y)", "", "azbazby", 0, true, 0, 7, 0, 5, 5, 7); + test("(a.*z|b.*y)*", "", "azbazby", 0, true, 0, 7, 5, 7); + test("(.|..)(.*)", "", "ab", 0, true, 0, 2, 0, 1, 1, 2); + test("((..)*(...)*)", "", "xxx", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)*(...)*)((..)*(...)*)", "", "xxx", 0, true, 0, 2, 0, 2, 0, 2, -1, -1, 2, 2, -1, -1, -1, -1); + test("((..)*(...)*)*", "", "xxx", 0, true, 0, 2, 2, 2, 0, 2, -1, -1); + test("(a{0,1})*b\\1", "", "ab", 0, true, 0, 2, 1, 1); + test("(a*)*b\\1", "", "ab", 0, true, 0, 2, 1, 1); + test("(a*)b\\1*", "", "ab", 0, true, 0, 2, 0, 1); + test("(a*)*b\\1*", "", "ab", 0, true, 0, 2, 1, 1); + test("(a{0,1})*b(\\1)", "", "ab", 0, true, 0, 2, 1, 1, 2, 2); + test("(a*)*b(\\1)", "", "ab", 0, true, 0, 2, 1, 1, 2, 2); + test("(a*)b(\\1)*", "", "ab", 0, true, 0, 2, 0, 1, -1, -1); + test("(a*)*b(\\1)*", "", "ab", 0, true, 0, 2, 1, 1, 2, 2); + test("(a{0,1})*b\\1", "", "aba", 0, true, 0, 2, 1, 1); + test("(a*)*b\\1", "", "aba", 0, true, 0, 2, 1, 1); + test("(a*)b\\1*", "", "aba", 0, true, 0, 3, 0, 1); + test("(a*)*b\\1*", "", "aba", 0, true, 0, 2, 1, 1); + test("(a*)*b(\\1)*", "", "aba", 0, true, 0, 2, 1, 1, 2, 2); + test("(a{0,1})*b\\1", "", "abaa", 0, true, 0, 2, 1, 1); + test("(a*)*b\\1", "", "abaa", 0, true, 0, 2, 1, 1); + test("(a*)b\\1*", "", "abaa", 0, true, 0, 4, 0, 1); + test("(a*)*b\\1*", "", "abaa", 0, true, 0, 2, 1, 1); + test("(a*)*b(\\1)*", "", "abaa", 0, true, 0, 2, 1, 1, 2, 2); + test("(a*)*b\\1", "", "aab", 0, true, 0, 3, 2, 2); + test("(a*)b\\1*", "", "aab", 0, true, 0, 3, 0, 2); + test("(a*)*b\\1*", "", "aab", 0, true, 0, 3, 2, 2); + test("(a*)*b(\\1)*", "", "aab", 0, true, 0, 3, 2, 2, 3, 3); + test("(a*)*b\\1", "", "aaba", 0, true, 0, 3, 2, 2); + test("(a*)b\\1*", "", "aaba", 0, true, 0, 3, 0, 2); + test("(a*)*b\\1*", "", "aaba", 0, true, 0, 3, 2, 2); + test("(a*)*b(\\1)*", "", "aaba", 0, true, 0, 3, 2, 2, 3, 3); + test("(a*)*b\\1", "", "aabaa", 0, true, 0, 3, 2, 2); + test("(a*)b\\1*", "", "aabaa", 0, true, 0, 5, 0, 2); + test("(a*)*b\\1*", "", "aabaa", 0, true, 0, 3, 2, 2); + test("(a*)*b(\\1)*", "", "aabaa", 0, true, 0, 3, 2, 2, 3, 3); + test("(x)*a\\1", "", "a", 0, false); + test("(x)*a\\1*", "", "a", 0, true, 0, 1, -1, -1); + test("(x)*a(\\1)", "", "a", 0, false); + test("(x)*a(\\1)*", "", "a", 0, true, 0, 1, -1, -1, -1, -1); + test("(aa(b(b))?)+", "", "aabbaa", 0, true, 0, 6, 4, 6, 2, 4, 3, 4); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("([ab]+)([bc]+)([cd]*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1", "", "abcdaa", 0, true, 0, 5, 0, 1, 1, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1", "", "abcdab", 0, true, 0, 6, 0, 2, 2, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdaa", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdab", 0, true, 0, 6, 0, 2, 2, 3, 3, 4); + test("^(A([^B]*))?(B(.*))?", "", "Aa", 0, true, 0, 2, 0, 2, 1, 2, -1, -1, -1, -1); + test("^(A([^B]*))?(B(.*))?", "", "Bb", 0, true, 0, 2, -1, -1, -1, -1, 0, 2, 1, 2); + test(".*([AB]).*\\1", "", "ABA", 0, true, 0, 3, 0, 1); + test("[^A]*A", "", "\\nA", 0, true, 0, 3); + test("(a|ab)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a|ab)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)*", "", "a", 0, true, 0, 1, 1, 1); + test("(a*)*", "", "x", 0, true, 0, 0, 0, 0); + test("(a*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("(a*)*", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("(a*)+", "", "a", 0, true, 0, 1, 1, 1); + test("(a*)+", "", "x", 0, true, 0, 0, 0, 0); + test("(a*)+", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("(a*)+", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("(a+)*", "", "a", 0, true, 0, 1, 0, 1); + test("(a+)*", "", "x", 0, true, 0, 0, -1, -1); + test("(a+)*", "", "aaaaaa", 0, true, 0, 6, 0, 6); + test("(a+)*", "", "aaaaaax", 0, true, 0, 6, 0, 6); + test("(a+)+", "", "a", 0, true, 0, 1, 0, 1); + test("(a+)+", "", "x", 0, false); + test("(a+)+", "", "aaaaaa", 0, true, 0, 6, 0, 6); + test("(a+)+", "", "aaaaaax", 0, true, 0, 6, 0, 6); + test("([a]*)*", "", "a", 0, true, 0, 1, 1, 1); + test("([a]*)*", "", "x", 0, true, 0, 0, 0, 0); + test("([a]*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([a]*)*", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("([a]*)+", "", "a", 0, true, 0, 1, 1, 1); + test("([a]*)+", "", "x", 0, true, 0, 0, 0, 0); + test("([a]*)+", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([a]*)+", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("([^b]*)*", "", "a", 0, true, 0, 1, 1, 1); + test("([^b]*)*", "", "b", 0, true, 0, 0, 0, 0); + test("([^b]*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([^b]*)*", "", "aaaaaab", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "a", 0, true, 0, 1, 1, 1); + test("([ab]*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "ababab", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "bababa", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "b", 0, true, 0, 1, 1, 1); + test("([ab]*)*", "", "bbbbbb", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "aaaabcde", 0, true, 0, 5, 5, 5); + test("([^a]*)*", "", "b", 0, true, 0, 1, 1, 1); + test("([^a]*)*", "", "bbbbbb", 0, true, 0, 6, 6, 6); + test("([^a]*)*", "", "aaaaaa", 0, true, 0, 0, 0, 0); + test("([^ab]*)*", "", "ccccxx", 0, true, 0, 6, 6, 6); + test("([^ab]*)*", "", "ababab", 0, true, 0, 0, 0, 0); + test("((z)+|a)*", "", "zabcde", 0, true, 0, 2, 1, 2, 0, 1); + test("a+?", "", "aaaaaa", 0, true, 0, 1); + test("(a)", "", "aaa", 0, true, 0, 1, 0, 1); + test("(a*?)", "", "aaa", 0, true, 0, 0, 0, 0); + test("(a)*?", "", "aaa", 0, true, 0, 0, -1, -1); + test("(a*?)*?", "", "aaa", 0, true, 0, 0, -1, -1); + test("(a*)*(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*)*(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)*(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)*(x)(\\1)", "", "x", 0, true, 0, 1, 0, 0, 0, 1, 1, 1); + test("(a*)*(x)(\\1)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2, 2, 2); + test("(a*)*(x)(\\1)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2, 2, 2); + test("(a*)*(x)(\\1)(x)", "", "axax", 0, true, 0, 4, 0, 1, 1, 2, 2, 3, 3, 4); + test("(a*)*(x)(\\1)(x)", "", "axxa", 0, true, 0, 3, 1, 1, 1, 2, 2, 2, 2, 3); + test("(a*)*(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*)*(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)*(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)+(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*)+(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)+(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*){2}(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*){2}(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*){2}(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("((..)|(.))", "", "a", 0, true, 0, 1, 0, 1, -1, -1, 0, 1); + test("((..)|(.))((..)|(.))", "", "a", 0, false); + test("((..)|(.))((..)|(.))((..)|(.))", "", "a", 0, false); + test("((..)|(.)){1}", "", "a", 0, true, 0, 1, 0, 1, -1, -1, 0, 1); + test("((..)|(.)){2}", "", "a", 0, false); + test("((..)|(.)){3}", "", "a", 0, false); + test("((..)|(.))*", "", "a", 0, true, 0, 1, 0, 1, -1, -1, 0, 1); + test("((..)|(.))", "", "aa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aa", 0, true, 0, 2, 0, 1, -1, -1, 0, 1, 1, 2, -1, -1, 1, 2); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aa", 0, false); + test("((..)|(.)){1}", "", "aa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aa", 0, true, 0, 2, 1, 2, -1, -1, 1, 2); + test("((..)|(.)){3}", "", "aa", 0, false); + test("((..)|(.))*", "", "aa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))", "", "aaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaa", 0, true, 0, 3, 0, 2, 0, 2, -1, -1, 2, 3, -1, -1, 2, 3); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaa", 0, true, 0, 3, 0, 1, -1, -1, 0, 1, 1, 2, -1, -1, 1, 2, 2, 3, -1, -1, 2, 3); + test("((..)|(.)){1}", "", "aaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaa", 0, true, 0, 3, 2, 3, 0, 2, 2, 3); + test("((..)|(.)){3}", "", "aaa", 0, true, 0, 3, 2, 3, -1, -1, 2, 3); + test("((..)|(.))*", "", "aaa", 0, true, 0, 3, 2, 3, 0, 2, 2, 3); + test("((..)|(.))", "", "aaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 3, -1, -1, 2, 3, 3, 4, -1, -1, 3, 4); + test("((..)|(.)){1}", "", "aaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.)){3}", "", "aaaa", 0, true, 0, 4, 3, 4, 0, 2, 3, 4); + test("((..)|(.))*", "", "aaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.))", "", "aaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaa", 0, true, 0, 5, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1, 4, 5, -1, -1, 4, 5); + test("((..)|(.)){1}", "", "aaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.)){3}", "", "aaaaa", 0, true, 0, 5, 4, 5, 2, 4, 4, 5); + test("((..)|(.))*", "", "aaaaa", 0, true, 0, 5, 4, 5, 2, 4, 4, 5); + test("((..)|(.))", "", "aaaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaaaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaaa", 0, true, 0, 6, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1, 4, 6, 4, 6, -1, -1); + test("((..)|(.)){1}", "", "aaaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaaaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.)){3}", "", "aaaaaa", 0, true, 0, 6, 4, 6, 4, 6, -1, -1); + test("((..)|(.))*", "", "aaaaaa", 0, true, 0, 6, 4, 6, 4, 6, -1, -1); + test("X(.?){0,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){1,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){2,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){3,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){4,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){5,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){6,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){7,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){8,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){0,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){1,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){2,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){3,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){4,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){5,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){6,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){7,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){8,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("(a|ab|c|bcd){0,}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){1,}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){2,}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){3,}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){4,}(d*)", "", "ababcd", 0, false); + test("(a|ab|c|bcd){0,10}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){1,10}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){2,10}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){3,10}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){4,10}(d*)", "", "ababcd", 0, false); + test("(a|ab|c|bcd)*(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd)+(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(ab|a|c|bcd){0,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){1,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){2,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){3,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){4,}(d*)", "", "ababcd", 0, false); + test("(ab|a|c|bcd){0,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){1,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){2,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){3,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){4,10}(d*)", "", "ababcd", 0, false); + test("(ab|a|c|bcd)*(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd)+(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(a|ab)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a|ab)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a|ab)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a|ab)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("\ufb00", "i", "FF", 0, true, 0, 2); + test("(\ufb00)\\1", "i", "FFFF", 0, true, 0, 4, 0, 2); + test("(\ufb00)\\1", "i", "FF\ufb00", 0, false); + test("(\ufb00)\\1", "i", "\ufb00FF", 0, false); + test("\ufb01", "i", "FI", 0, true, 0, 2); + test("(\ufb01)\\1", "i", "FIFI", 0, true, 0, 4, 0, 2); + test("\ufb02", "i", "FL", 0, true, 0, 2); + test("\ufb03", "i", "FFI", 0, true, 0, 3); + test("\ufb04", "i", "FFL", 0, true, 0, 3); + test("\ufb00I", "i", "\ufb03", 0, true, 0, 1); + test("\ufb03", "i", "\ufb00I", 0, true, 0, 2); + test("F\ufb01", "i", "\ufb03", 0, true, 0, 1); + test("\ufb03", "i", "F\ufb01", 0, true, 0, 2); + test("\ufb00L", "i", "\ufb04", 0, true, 0, 1); + test("\ufb04", "i", "\ufb00L", 0, true, 0, 2); + test("F\ufb02", "i", "\ufb04", 0, true, 0, 1); + test("\ufb04", "i", "F\ufb02", 0, true, 0, 2); + test("[\ufb04[=a=]o]+", "i", "F\ufb02a\u00c4\u00f6", 0, true, 0, 4); + test("\u1f50", "i", "\u03c5\u0313", 0, true, 0, 2); + test("\u1f52", "i", "\u03c5\u0313\u0300", 0, true, 0, 3); + test("\u1f54", "i", "\u03c5\u0313\u0301", 0, true, 0, 3); + test("\u1f56", "i", "\u03c5\u0313\u0342", 0, true, 0, 3); + test("\u1f50\u0300", "i", "\u1f52", 0, true, 0, 1); + test("\u1f52", "i", "\u1f50\u0300", 0, true, 0, 2); + test("\u1f50\u0301", "i", "\u1f54", 0, true, 0, 1); + test("\u1f54", "i", "\u1f50\u0301", 0, true, 0, 2); + test("\u1f50\u0342", "i", "\u1f56", 0, true, 0, 1); + test("\u1f56", "i", "\u1f50\u0342", 0, true, 0, 2); + test("\u1fb6", "i", "\u03b1\u0342", 0, true, 0, 2); + test("\u1fb7", "i", "\u03b1\u0342\u03b9", 0, true, 0, 3); + test("\u1fb6\u03b9", "i", "\u1fb7", 0, true, 0, 1); + test("\u1fb7", "i", "\u1fb6\u03b9", 0, true, 0, 2); + test("\u1fc6", "i", "\u03b7\u0342", 0, true, 0, 2); + test("\u1fc7", "i", "\u03b7\u0342\u03b9", 0, true, 0, 3); + test("\u1fc6\u03b9", "i", "\u1fc7", 0, true, 0, 1); + test("\u1fc7", "i", "\u1fc6\u03b9", 0, true, 0, 2); + test("\u1ff6", "i", "\u03c9\u0342", 0, true, 0, 2); + test("\u1ff7", "i", "\u03c9\u0342\u03b9", 0, true, 0, 3); + test("\u1ff6\u03b9", "i", "\u1ff7", 0, true, 0, 1); + test("\u1ff7", "i", "\u1ff6\u03b9", 0, true, 0, 2); + test("f*", "i", "ff", 0, true, 0, 2); + test("f*", "i", "\ufb00", 0, true, 0, 0); + test("f+", "i", "ff", 0, true, 0, 2); + test("f+", "i", "\ufb00", 0, false); + test("f{1,}", "i", "ff", 0, true, 0, 2); + test("f{1,}", "i", "\ufb00", 0, false); + test("f{1,2}", "i", "ff", 0, true, 0, 2); + test("f{1,2}", "i", "\ufb00", 0, false); + test("f{,2}", "i", "ff", 0, false); + test("f{,2}", "i", "\ufb00", 0, false); + test("ff?", "i", "ff", 0, true, 0, 2); + test("ff?", "i", "\ufb00", 0, false); + test("f{2}", "i", "ff", 0, true, 0, 2); + test("f{2}", "i", "\ufb00", 0, false); + test("f{2,2}", "i", "ff", 0, true, 0, 2); + test("f{2,2}", "i", "\ufb00", 0, false); + test("K", "i", "\u212a", 0, true, 0, 1); + test("k", "i", "\u212a", 0, true, 0, 1); + test("\\w", "i", "\u212a", 0, true, 0, 1); + test("\\W", "i", "\u212a", 0, false); + test("[\\w]", "i", "\u212a", 0, false); + test("[\\w]+", "i", "a\\wWc", 0, true, 1, 4); + test("[\\W]+", "i", "a\\wWc", 0, true, 1, 4); + test("[\\d]+", "i", "0\\dD9", 0, true, 1, 4); + test("[\\D]+", "i", "a\\dDc", 0, true, 1, 4); + test("[\\s]+", "i", " \\sS\u0009", 0, true, 1, 4); + test("[\\S]+", "i", " \\sS\u0009", 0, true, 1, 4); + test("[kx]", "i", "\u212a", 0, true, 0, 1); + test("ff", "i", "\ufb00", 0, true, 0, 1); + test("[f]f", "i", "\ufb00", 0, false); + test("f[f]", "i", "\ufb00", 0, false); + test("[f][f]", "i", "\ufb00", 0, false); + test("(?:f)f", "i", "\ufb00", 0, false); + test("f(?:f)", "i", "\ufb00", 0, false); + test("(?:f)(?:f)", "i", "\ufb00", 0, false); + test("\\A[\ufb00]\\z", "i", "\ufb00", 0, true, 0, 1); + test("\\A[\ufb00]\\z", "i", "ff", 0, true, 0, 2); + test("\\A[^\ufb00]\\z", "i", "\ufb00", 0, false); + test("\\A[^\ufb00]\\z", "i", "ff", 0, false); + test("\\A[^[^\ufb00]]\\z", "i", "\ufb00", 0, false); + test("\\A[^[^\ufb00]]\\z", "i", "ff", 0, false); + test("\\A[[^[^\ufb00]]]\\z", "i", "\ufb00", 0, false); + test("\\A[[^[^\ufb00]]]\\z", "i", "ff", 0, false); + test("[^a-c]", "i", "A", 0, false); + test("[[^a-c]]", "i", "A", 0, false); + test("[^a]", "i", "a", 0, false); + test("[[^a]]", "i", "a", 0, false); + test("\\A\\W\\z", "i", "\ufb00", 0, false); + test("\\A\\W\\z", "i", "ff", 0, false); + test("\\A[\\p{L}]\\z", "i", "\ufb00", 0, false); + test("\\A[\\p{L}]\\z", "i", "ff", 0, false); + test("\\A\\W\\z", "i", "\ufb03", 0, false); + test("\\A\\W\\z", "i", "ffi", 0, false); + test("\\A\\W\\z", "i", "\ufb00i", 0, false); + test("\\A[\\p{L}]\\z", "i", "\ufb03", 0, false); + test("\\A[\\p{L}]\\z", "i", "ffi", 0, false); + test("\\A[\\p{L}]\\z", "i", "\ufb00i", 0, false); + test("([[=a=]])\\1", "i", "aA", 0, true, 0, 2, 0, 1); + test("([[=a=]])\\1", "i", "Aa", 0, true, 0, 2, 0, 1); + test("([[=a=]])\\1", "i", "a\u00e4", 0, false); + test("([[=a=]])\\1", "i", "a\u00c4", 0, false); + test("([[=a=]])\\1", "i", "\u00e4a", 0, false); + test("([[=a=]])\\1", "i", "\u00c4a", 0, false); + test("([[=a=]])\\1", "i", "\u00c4A", 0, false); + /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + } } diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java index 98246584b233..b3e450b79284 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java @@ -40,14 +40,14 @@ */ package com.oracle.truffle.regex.tregex.test; -import com.oracle.truffle.regex.tregex.TRegexOptions; -import com.oracle.truffle.regex.tregex.string.Encodings; import org.graalvm.polyglot.PolyglotException; import org.graalvm.polyglot.Value; import org.junit.Assert; import org.junit.Test; import com.oracle.truffle.regex.errors.PyErrorMessages; +import com.oracle.truffle.regex.tregex.TRegexOptions; +import com.oracle.truffle.regex.tregex.string.Encodings; public class PythonTests extends RegexTestBase { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java index e983fa1ebe99..97e9bad06d40 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java @@ -127,6 +127,9 @@ public String getUnifiedPattern() throws RegexSyntaxException { case groupEnd: dump.append(")"); break; + case literalChar: + dump.append("x"); + break; case charClass: if (((Token.CharacterClass) token).getCodePointSet().matchesSingleChar()) { dump.append("x"); @@ -134,6 +137,9 @@ public String getUnifiedPattern() throws RegexSyntaxException { dump.append("[c]"); } break; + case charClassEnd: + dump.append("[c]"); + break; } } dump.append("/"); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java index e8977c3a4543..6754fec3e688 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java @@ -40,7 +40,7 @@ */ package com.oracle.truffle.regex.charset; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.util.json.JsonConvertible; import com.oracle.truffle.regex.tregex.util.json.JsonValue; import org.graalvm.collections.EconomicSet; @@ -93,7 +93,7 @@ public static ClassSetContents createRange(int lo, int hi) { } public static ClassSetContents createPOSIXCollationElement(int codePoint) { - return new ClassSetContents(Kind.POSIXCollationElement, CodePointSet.create(codePoint), EconomicSet.create(), true); + return new ClassSetContents(Kind.POSIXCollationElement, CodePointSet.create(codePoint), EconomicSet.create(), false); } public static ClassSetContents createPOSIXCollationElement(String string) { @@ -103,7 +103,7 @@ public static ClassSetContents createPOSIXCollationElement(String string) { } public static ClassSetContents createPOSIXCollationEquivalenceClass(int codePoint) { - return new ClassSetContents(Kind.POSIXCollationEquivalenceClass, CodePointSet.create(codePoint), EconomicSet.create(), true); + return new ClassSetContents(Kind.POSIXCollationEquivalenceClass, CodePointSet.create(codePoint), EconomicSet.create(), false); } public static ClassSetContents createPOSIXCollationEquivalenceClass(String string) { @@ -115,9 +115,9 @@ public static ClassSetContents createPOSIXCollationEquivalenceClass(String strin public ClassSetContents caseFold(CodePointSetAccumulator tmp) { EconomicSet foldedStrings = EconomicSet.create(strings.size()); for (String string : strings) { - foldedStrings.add(CaseFoldTable.simpleCaseFold(string)); + foldedStrings.add(CaseFoldData.icuSimpleCaseFold(string)); } - return new ClassSetContents(kind, CaseFoldTable.simpleCaseFold(codePointSet, tmp), foldedStrings, mayContainStrings); + return new ClassSetContents(kind, CaseFoldData.simpleCaseFold(codePointSet, tmp), foldedStrings, mayContainStrings); } public EconomicSet getStrings() { @@ -136,6 +136,10 @@ public boolean isRange() { return kind == Kind.Range; } + public boolean isPosixCollationEquivalenceClass() { + return kind == Kind.POSIXCollationEquivalenceClass; + } + public boolean isAllowedInRange() { return kind == Kind.Character || kind == Kind.POSIXCollationElement || kind == Kind.POSIXCollationEquivalenceClass; } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java index f8157658d8b4..b927b17ebd8f 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java @@ -41,7 +41,7 @@ package com.oracle.truffle.regex.charset; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.string.Encodings; public final class Constants { @@ -253,13 +253,14 @@ public final class Constants { HEX_CHARS }; - public static final CodePointSet FOLDABLE_CHARACTERS = CodePointSet.createNoDedup(CaseFoldTable.SIMPLE_CASE_FOLDING_ENTRIES); + public static final CodePointSet WORD_CHARS_UNICODE_SETS_IGNORE_CASE; - public static final CodePointSet FOLDED_CHARACTERS = FOLDABLE_CHARACTERS.createInverse(Encodings.UTF_16); + static { + CodePointSetAccumulator tmp = new CodePointSetAccumulator(); + WORD_CHARS_UNICODE_SETS_IGNORE_CASE = CaseFoldData.simpleCaseFold(WORD_CHARS, tmp); + } - public static final CodePointSet WORD_CHARS_UNICODE_SETS_IGNORE_CASE = CaseFoldTable.simpleCaseFold(WORD_CHARS, new CodePointSetAccumulator()); - - public static final CodePointSet NON_WORD_CHARS_UNICODE_SETS_IGNORE_CASE = WORD_CHARS_UNICODE_SETS_IGNORE_CASE.createInverse(FOLDABLE_CHARACTERS, + public static final CodePointSet NON_WORD_CHARS_UNICODE_SETS_IGNORE_CASE = WORD_CHARS_UNICODE_SETS_IGNORE_CASE.createInverse(CaseFoldData.FOLDABLE_CHARACTERS, new CompilationBuffer(Encodings.UTF_16)); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java index d5a10da063eb..f74d36b5ab10 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java @@ -825,7 +825,7 @@ protected void updateState(TRegexBacktrackingNFAExecutorLocals locals, PureNFATr * OracleDBFlavor. */ assert isForward(); - for (int i = 0; i < nGuards; i += 1) { + for (int i = 0; i < nGuards; i++) { QuantifierGuard guard = transition.getQuantifierGuards()[i]; CompilerAsserts.partialEvaluationConstant(guard); if (guard.getKind() == QuantifierGuard.Kind.updateRecursiveBackrefPointer) { @@ -895,7 +895,6 @@ protected boolean tryUpdateState(VirtualFrame frame, TRegexBacktrackingNFAExecut CompilerAsserts.partialEvaluationConstant(transition); PureNFAState target = transition.getTarget(isForward()); CompilerAsserts.partialEvaluationConstant(target); - assert !isRecursiveBackreferences() : "not implemented"; if (transition.hasCaretGuard() && index != 0) { return false; } @@ -965,6 +964,9 @@ protected boolean tryUpdateState(VirtualFrame frame, TRegexBacktrackingNFAExecut locals.setLastGroup(guard.getIndex() / 2); } break; + case updateRecursiveBackrefPointer: + locals.saveRecursiveBackrefGroupStart(guard.getIndex()); + break; case enterZeroWidth: locals.setZeroWidthQuantifierGuardIndex(q); locals.setZeroWidthQuantifierResults(q); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java new file mode 100644 index 000000000000..1eaddb621203 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java @@ -0,0 +1,1912 @@ +/* + * Copyright (c) 2023, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.regex.tregex.parser; + +import java.util.function.BiConsumer; +import java.util.function.BiPredicate; + +import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; + +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; +import com.oracle.truffle.regex.charset.Range; +import com.oracle.truffle.regex.charset.RangesBuffer; +import com.oracle.truffle.regex.charset.SortedListOfRanges; +import com.oracle.truffle.regex.tregex.buffer.IntRangesBuffer; +import com.oracle.truffle.regex.tregex.string.Encodings; + +public class CaseFoldData { + + private static final int INTEGER_OFFSET = 1; + private static final int DIRECT_MAPPING = 2; + private static final int ALTERNATING_UL = 3; + private static final int ALTERNATING_AL = 4; + private static final int DIRECT_SINGLE = 5; + + public enum CaseFoldUnfoldAlgorithm { + ECMAScriptNonUnicode, + ECMAScriptUnicode, + PythonAscii, + PythonUnicode; + + public BiPredicate getEqualsPredicate() { + return (codePointA, codePointB) -> getTable(this).equalsIgnoreCase(codePointA, codePointB); + } + } + + public enum CaseFoldAlgorithm { + Ruby, + OracleDB, + OracleDBAI + } + + private static CaseFoldEquivalenceTable getTable(CaseFoldUnfoldAlgorithm algorithm) { + switch (algorithm) { + case ECMAScriptNonUnicode: + return JS_NON_UNICODE; + case ECMAScriptUnicode: + return UNICODE_15_0_0_SIMPLE; + case PythonAscii: + return PYTHON_ASCII; + case PythonUnicode: + return PYTHON_UNICODE; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static CaseFoldTable getTable(CaseFoldAlgorithm algorithm) { + switch (algorithm) { + case Ruby: + return UNICODE_15_0_0_FULL; + case OracleDB: + return ORACLE_DB; + case OracleDBAI: + return ORACLE_DB_AI; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static CaseUnfoldingTrie getUnfoldingTrie(CaseFoldAlgorithm algorithm) { + switch (algorithm) { + case Ruby: + return UNFOLDING_TRIE_RUBY; + case OracleDB: + return UNFOLDING_TRIE_ORACLE_DB; + case OracleDBAI: + return UNFOLDING_TRIE_ORACLE_DB_AI; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static String icuSimpleCaseFold(String string) { + int[] folded = string.codePoints().map(CaseFoldData::icuSimpleCaseFold).toArray(); + return new String(folded, 0, folded.length); + } + + public static int icuSimpleCaseFold(int codePoint) { + return UCharacter.foldCase(codePoint, UCharacter.FOLD_CASE_DEFAULT); + } + + static CodePointSet rangeSet(int... ranges) { + return CodePointSet.createNoDedup(ranges); + } + + public static void applyCaseFoldUnfold(CodePointSetAccumulator codePointSet, CodePointSetAccumulator tmp, CaseFoldUnfoldAlgorithm algorithm) { + codePointSet.copyTo(tmp); + getTable(algorithm).applyCaseFold(codePointSet, tmp); + } + + public static CodePointSet simpleCaseFold(CodePointSet codePointSet, CodePointSetAccumulator tmp) { + tmp.addSet(codePointSet); + UNICODE_15_0_0_SIMPLE.applyCaseFold(tmp, codePointSet); + tmp.intersectWith(FOLDED_CHARACTERS); + return tmp.toCodePointSet(); + } + + /** + * Maps characters to their respective set of equivalent characters in case-insensitive context, + * e.g. {@code A -> [Aa]}. + */ + public static final class CaseFoldEquivalenceTable implements SortedListOfRanges { + + private final CaseFoldEquivalenceTable parent; + private final CodePointSet[] directMappings; + private final int[] ranges; + + CaseFoldEquivalenceTable(CaseFoldEquivalenceTable parent, CodePointSet[] directMappings, int[] ranges) { + this.parent = parent; + this.directMappings = directMappings; + this.ranges = ranges; + } + + void applyCaseFold(CodePointSetAccumulator dst, Iterable src) { + for (Range r : src) { + applyCaseFold(dst, r); + } + } + + private void applyCaseFold(CodePointSetAccumulator dst, Range r) { + int search = binarySearch(r.lo); + if (binarySearchExactMatch(search, r.lo, r.hi)) { + apply(dst, search, r.lo, r.hi); + return; + } + int firstIntersection = binarySearchGetFirstIntersecting(search, r.lo, r.hi); + if (binarySearchNoIntersectingFound(firstIntersection)) { + if (parent != null) { + parent.applyCaseFold(dst, r); + } + return; + } + int lastIntersectionHi = r.lo - 1; + for (int j = firstIntersection; j < size(); j++) { + if (rightOf(j, r.lo, r.hi)) { + break; + } + assert intersects(j, r.lo, r.hi); + int intersectionLo = Math.max(getLo(j), r.lo); + int intersectionHi = Math.min(getHi(j), r.hi); + apply(dst, j, intersectionLo, intersectionHi); + if (parent != null && intersectionLo > lastIntersectionHi + 1) { + parent.applyCaseFold(dst, new Range(lastIntersectionHi + 1, intersectionLo - 1)); + } + lastIntersectionHi = intersectionHi; + } + if (parent != null && r.hi > lastIntersectionHi) { + parent.applyCaseFold(dst, new Range(lastIntersectionHi + 1, r.hi)); + } + } + + private void apply(CodePointSetAccumulator codePointSet, int tblEntryIndex, int intersectionLo, int intersectionHi) { + switch (ranges[tblEntryIndex * 4 + 2]) { + case INTEGER_OFFSET: + int delta = ranges[tblEntryIndex * 4 + 3]; + addRange(codePointSet, intersectionLo + delta, intersectionHi + delta); + break; + case DIRECT_MAPPING: + CodePointSet set = directMappings[ranges[tblEntryIndex * 4 + 3]]; + codePointSet.addSet(set.createIntersection(Encodings.UTF_8.getFullSet(), new IntRangesBuffer())); + break; + case ALTERNATING_UL: + int loUL = Math.min(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); + int hiUL = Math.max(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); + if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loUL, hiUL)) { + addRange(codePointSet, loUL, hiUL); + } + break; + case ALTERNATING_AL: + int loAL = Math.min(intersectionLo ^ 1, intersectionHi ^ 1); + int hiAL = Math.max(intersectionLo ^ 1, intersectionHi ^ 1); + if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loAL, hiAL)) { + addRange(codePointSet, loAL, hiAL); + } + break; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + private static void addRange(CodePointSetAccumulator codePointSet, int lo, int hi) { + if (lo < 0x11_0000) { + codePointSet.addRange(lo, Math.min(hi, 0x10_ffff)); + } + } + + boolean equalsIgnoreCase(int codePointA, int codePointB) { + if (codePointA == codePointB) { + return true; + } + int search = binarySearch(codePointA); + if (binarySearchExactMatch(search, codePointA, codePointA)) { + return equalsIgnoreCase(search, codePointA, codePointB); + } + int firstIntersection = binarySearchGetFirstIntersecting(search, codePointA, codePointA); + if (binarySearchNoIntersectingFound(firstIntersection) || rightOf(firstIntersection, codePointA, codePointA)) { + return parent != null && parent.equalsIgnoreCase(codePointA, codePointB); + } + assert intersects(firstIntersection, codePointA, codePointA); + return equalsIgnoreCase(firstIntersection, codePointA, codePointB); + } + + private boolean equalsIgnoreCase(int tblEntryIndex, int codePointA, int codePointB) { + switch (ranges[tblEntryIndex * 4 + 2]) { + case INTEGER_OFFSET: + int delta = ranges[tblEntryIndex * 4 + 3]; + return codePointA + delta == codePointB; + case DIRECT_MAPPING: + CodePointSet set = directMappings[ranges[tblEntryIndex * 4 + 3]]; + return set.contains(codePointB); + case ALTERNATING_UL: + return ((codePointA - 1) ^ 1) + 1 == codePointB; + case ALTERNATING_AL: + return (codePointA ^ 1) == codePointB; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + @Override + public int getLo(int i) { + return ranges[i * 4]; + } + + @Override + public int getHi(int i) { + return ranges[i * 4 + 1]; + } + + @Override + public int size() { + return ranges.length / 4; + } + + @Override + public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static final class CaseFoldTable implements SortedListOfRanges { + + private final CaseFoldTable parent; + private final int[] ranges; + + CaseFoldTable(CaseFoldTable parent, int[] ranges) { + this.parent = parent; + this.ranges = ranges; + } + + public int[] caseFold(int codepoint) { + final int[][] ret = new int[1][]; + caseFold(new Range(codepoint, codepoint), (cp, caseFolded) -> ret[0] = caseFolded); + return ret[0]; + } + + public void caseFold(CodePointSetAccumulator cps, BiConsumer caseFoldItem) { + for (Range r : cps) { + caseFold(r, caseFoldItem); + } + } + + private void caseFold(Range r, BiConsumer caseFoldItem) { + int search = binarySearch(r.lo); + if (binarySearchExactMatch(search, r.lo, r.hi)) { + apply(search, r.lo, r.hi, caseFoldItem); + return; + } + int firstIntersection = binarySearchGetFirstIntersecting(search, r.lo, r.hi); + if (binarySearchNoIntersectingFound(firstIntersection)) { + if (parent != null) { + parent.caseFold(r, caseFoldItem); + } + return; + } + int lastIntersectionHi = r.lo - 1; + for (int j = firstIntersection; j < size(); j++) { + if (rightOf(j, r.lo, r.hi)) { + break; + } + assert intersects(j, r.lo, r.hi); + int intersectionLo = Math.max(getLo(j), r.lo); + int intersectionHi = Math.min(getHi(j), r.hi); + apply(j, intersectionLo, intersectionHi, caseFoldItem); + if (parent != null && intersectionLo > lastIntersectionHi + 1) { + parent.caseFold(new Range(lastIntersectionHi + 1, intersectionLo - 1), caseFoldItem); + } + lastIntersectionHi = intersectionHi; + } + if (parent != null && r.hi > lastIntersectionHi) { + parent.caseFold(new Range(lastIntersectionHi + 1, r.hi), caseFoldItem); + } + } + + private void apply(int tblEntryIndex, int intersectionLo, int intersectionHi, BiConsumer caseFoldItem) { + int kind = ranges[tblEntryIndex * 4 + 2]; + switch (kind) { + case INTEGER_OFFSET: + int delta = ranges[tblEntryIndex * 4 + 3]; + if (delta != 0) { + for (int i = intersectionLo; i <= intersectionHi; i++) { + applyMapping(i, i + delta, caseFoldItem); + } + } + break; + case ALTERNATING_AL, ALTERNATING_UL: + int loUL = kind == ALTERNATING_UL ? intersectionLo | 1 : intersectionLo + (intersectionLo & 1); + for (int i = loUL; i <= intersectionHi; i += 2) { + applyMapping(i, i + 1, caseFoldItem); + } + break; + case DIRECT_SINGLE: + int dst = ranges[tblEntryIndex * 4 + 3]; + for (int i = intersectionLo; i <= intersectionHi; i++) { + applyMapping(i, dst, caseFoldItem); + } + break; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + private static void applyMapping(int from, int to, BiConsumer caseFoldItem) { + assert from <= 0x10_ffff; + caseFoldItem.accept(from, mappingToCodepoints(to)); + } + + private static int[] mappingToCodepoints(int mapping) { + if (mapping > 0x10_ffff) { + return MULTI_CHAR_SEQUENCES[mapping - 0x11_0000].codePoints().toArray(); + } else { + return new int[]{mapping}; + } + } + + private CaseUnfoldingTrie createCaseUnfoldTrie() { + CaseUnfoldingTrie trie = new CaseUnfoldingTrie(0); + if (parent == null) { + for (int i = 0; i < ranges.length; i += 4) { + switch (ranges[i + 2]) { + case INTEGER_OFFSET -> { + for (int j = ranges[i]; j <= ranges[i + 1]; j++) { + trie.add(j, mappingToCodepoints(j + ranges[i + 3]), 0); + } + } + case ALTERNATING_UL, ALTERNATING_AL -> { + for (int j = ranges[i]; j <= ranges[i + 1]; j += 2) { + trie.add(j, mappingToCodepoints(j + 1), 0); + } + } + case DIRECT_SINGLE -> { + for (int j = ranges[i]; j <= ranges[i + 1]; j++) { + trie.add(j, mappingToCodepoints(ranges[i + 3]), 0); + } + } + default -> throw CompilerDirectives.shouldNotReachHere(); + } + } + } else { + caseFold(new Range(0, 0x10_ffff), (from, to) -> trie.add(from, to, 0)); + } + return trie; + } + + @Override + public int getLo(int i) { + return ranges[i * 4]; + } + + @Override + public int getHi(int i) { + return ranges[i * 4 + 1]; + } + + @Override + public int size() { + return ranges.length / 4; + } + + @Override + public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static final CaseFoldEquivalenceTable PYTHON_ASCII = new CaseFoldEquivalenceTable(null, new CodePointSet[0], new int[]{ + 0x000041, 0x00005a, INTEGER_OFFSET, 32, + 0x000061, 0x00007a, INTEGER_OFFSET, -32 + }); + + /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + public static final String[] MULTI_CHAR_SEQUENCES = { + "i\u0307", + "SS", + "FF", + "FI", + "FL", + "FFI", + "FFL", + "ST", + "\u0535\u0552", + "\u0544\u0546", + "\u0544\u0535", + "\u0544\u053b", + "\u054e\u0546", + "\u0544\u053d", + "\u02bcN", + "\u0399\u0308\u0301", + "\u03a5\u0308\u0301", + "J\u030c", + "H\u0331", + "T\u0308", + "W\u030a", + "Y\u030a", + "A\u02be", + "\u03a5\u0313", + "\u03a5\u0313\u0300", + "\u03a5\u0313\u0301", + "\u03a5\u0313\u0342", + "\u0391\u0342", + "\u0397\u0342", + "\u0399\u0308\u0300", + "\u0399\u0342", + "\u0399\u0308\u0342", + "\u03a5\u0308\u0300", + "\u03a1\u0313", + "\u03a5\u0342", + "\u03a5\u0308\u0342", + "\u03a9\u0342", + "\u1f08\u0399", + "\u1f09\u0399", + "\u1f0a\u0399", + "\u1f0b\u0399", + "\u1f0c\u0399", + "\u1f0d\u0399", + "\u1f0e\u0399", + "\u1f0f\u0399", + "\u1f28\u0399", + "\u1f29\u0399", + "\u1f2a\u0399", + "\u1f2b\u0399", + "\u1f2c\u0399", + "\u1f2d\u0399", + "\u1f2e\u0399", + "\u1f2f\u0399", + "\u1f68\u0399", + "\u1f69\u0399", + "\u1f6a\u0399", + "\u1f6b\u0399", + "\u1f6c\u0399", + "\u1f6d\u0399", + "\u1f6e\u0399", + "\u1f6f\u0399", + "\u0391\u0399", + "\u0397\u0399", + "\u03a9\u0399", + "\u1fba\u0399", + "\u0386\u0399", + "\u1fca\u0399", + "\u0389\u0399", + "\u1ffa\u0399", + "\u038f\u0399", + "\u0391\u0342\u0399", + "\u0397\u0342\u0399", + "\u03a9\u0342\u0399", + "ss", + "\u02bcn", + "j\u030c", + "\u03b9\u0308\u0301", + "\u03c5\u0308\u0301", + "\u0565\u0582", + "h\u0331", + "t\u0308", + "w\u030a", + "y\u030a", + "a\u02be", + "\u03c5\u0313", + "\u03c5\u0313\u0300", + "\u03c5\u0313\u0301", + "\u03c5\u0313\u0342", + "\u1f00\u03b9", + "\u1f01\u03b9", + "\u1f02\u03b9", + "\u1f03\u03b9", + "\u1f04\u03b9", + "\u1f05\u03b9", + "\u1f06\u03b9", + "\u1f07\u03b9", + "\u1f20\u03b9", + "\u1f21\u03b9", + "\u1f22\u03b9", + "\u1f23\u03b9", + "\u1f24\u03b9", + "\u1f25\u03b9", + "\u1f26\u03b9", + "\u1f27\u03b9", + "\u1f60\u03b9", + "\u1f61\u03b9", + "\u1f62\u03b9", + "\u1f63\u03b9", + "\u1f64\u03b9", + "\u1f65\u03b9", + "\u1f66\u03b9", + "\u1f67\u03b9", + "\u1f70\u03b9", + "\u03b1\u03b9", + "\u03ac\u03b9", + "\u03b1\u0342", + "\u03b1\u0342\u03b9", + "\u1f74\u03b9", + "\u03b7\u03b9", + "\u03ae\u03b9", + "\u03b7\u0342", + "\u03b7\u0342\u03b9", + "\u03b9\u0308\u0300", + "\u03b9\u0342", + "\u03b9\u0308\u0342", + "\u03c5\u0308\u0300", + "\u03c1\u0313", + "\u03c5\u0342", + "\u03c5\u0308\u0342", + "\u1f7c\u03b9", + "\u03c9\u03b9", + "\u03ce\u03b9", + "\u03c9\u0342", + "\u03c9\u0342\u03b9", + "ff", + "fi", + "fl", + "ffi", + "ffl", + "st", + "\u0574\u0576", + "\u0574\u0565", + "\u0574\u056b", + "\u057e\u0576", + "\u0574\u056d", + "ij", + "oe", + "lj", + "nj", + "dz", + "d\u0292", + "d\u0291", + "ts", + "t\u0283", + "t\u0255", + "co", + "no", + "sm", + "del", + "tm", + "ii", + "iii", + "iv", + "vi", + "vii", + "ix", + "xi", + "xii", + "fo", + }; + private static final CaseFoldEquivalenceTable UNICODE_15_0_0_SIMPLE = new CaseFoldEquivalenceTable(null, new CodePointSet[]{ + rangeSet(0x00004b, 0x00004b, 0x00006b, 0x00006b, 0x00212a, 0x00212a), + rangeSet(0x000053, 0x000053, 0x000073, 0x000073, 0x00017f, 0x00017f), + rangeSet(0x0000b5, 0x0000b5, 0x00039c, 0x00039c, 0x0003bc, 0x0003bc), + rangeSet(0x0000c5, 0x0000c5, 0x0000e5, 0x0000e5, 0x00212b, 0x00212b), + rangeSet(0x0001c4, 0x0001c6), + rangeSet(0x0001c7, 0x0001c9), + rangeSet(0x0001ca, 0x0001cc), + rangeSet(0x0001f1, 0x0001f3), + rangeSet(0x000345, 0x000345, 0x000399, 0x000399, 0x0003b9, 0x0003b9, 0x001fbe, 0x001fbe), + rangeSet(0x000392, 0x000392, 0x0003b2, 0x0003b2, 0x0003d0, 0x0003d0), + rangeSet(0x000395, 0x000395, 0x0003b5, 0x0003b5, 0x0003f5, 0x0003f5), + rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1, 0x0003f4, 0x0003f4), + rangeSet(0x00039a, 0x00039a, 0x0003ba, 0x0003ba, 0x0003f0, 0x0003f0), + rangeSet(0x0003a0, 0x0003a0, 0x0003c0, 0x0003c0, 0x0003d6, 0x0003d6), + rangeSet(0x0003a1, 0x0003a1, 0x0003c1, 0x0003c1, 0x0003f1, 0x0003f1), + rangeSet(0x0003a3, 0x0003a3, 0x0003c2, 0x0003c3), + rangeSet(0x0003a6, 0x0003a6, 0x0003c6, 0x0003c6, 0x0003d5, 0x0003d5), + rangeSet(0x0003a9, 0x0003a9, 0x0003c9, 0x0003c9, 0x002126, 0x002126), + rangeSet(0x000412, 0x000412, 0x000432, 0x000432, 0x001c80, 0x001c80), + rangeSet(0x000414, 0x000414, 0x000434, 0x000434, 0x001c81, 0x001c81), + rangeSet(0x00041e, 0x00041e, 0x00043e, 0x00043e, 0x001c82, 0x001c82), + rangeSet(0x000421, 0x000421, 0x000441, 0x000441, 0x001c83, 0x001c83), + rangeSet(0x000422, 0x000422, 0x000442, 0x000442, 0x001c84, 0x001c85), + rangeSet(0x00042a, 0x00042a, 0x00044a, 0x00044a, 0x001c86, 0x001c86), + rangeSet(0x000462, 0x000463, 0x001c87, 0x001c87), + rangeSet(0x001c88, 0x001c88, 0x00a64a, 0x00a64b), + rangeSet(0x001e60, 0x001e61, 0x001e9b, 0x001e9b), + }, new int[]{ + 0x000041, 0x00004a, INTEGER_OFFSET, 32, + 0x00004b, 0x00004b, DIRECT_MAPPING, 0, + 0x00004c, 0x000052, INTEGER_OFFSET, 32, + 0x000053, 0x000053, DIRECT_MAPPING, 1, + 0x000054, 0x00005a, INTEGER_OFFSET, 32, + 0x000061, 0x00006a, INTEGER_OFFSET, -32, + 0x00006b, 0x00006b, DIRECT_MAPPING, 0, + 0x00006c, 0x000072, INTEGER_OFFSET, -32, + 0x000073, 0x000073, DIRECT_MAPPING, 1, + 0x000074, 0x00007a, INTEGER_OFFSET, -32, + 0x0000b5, 0x0000b5, DIRECT_MAPPING, 2, + 0x0000c0, 0x0000c4, INTEGER_OFFSET, 32, + 0x0000c5, 0x0000c5, DIRECT_MAPPING, 3, + 0x0000c6, 0x0000d6, INTEGER_OFFSET, 32, + 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 7615, + 0x0000e0, 0x0000e4, INTEGER_OFFSET, -32, + 0x0000e5, 0x0000e5, DIRECT_MAPPING, 3, + 0x0000e6, 0x0000f6, INTEGER_OFFSET, -32, + 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, + 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, + 0x000100, 0x00012f, ALTERNATING_AL, 0, + 0x000132, 0x000137, ALTERNATING_AL, 0, + 0x000139, 0x000148, ALTERNATING_UL, 0, + 0x00014a, 0x000177, ALTERNATING_AL, 0, + 0x000178, 0x000178, INTEGER_OFFSET, -121, + 0x000179, 0x00017e, ALTERNATING_UL, 0, + 0x00017f, 0x00017f, DIRECT_MAPPING, 1, + 0x000180, 0x000180, INTEGER_OFFSET, 195, + 0x000181, 0x000181, INTEGER_OFFSET, 210, + 0x000182, 0x000185, ALTERNATING_AL, 0, + 0x000186, 0x000186, INTEGER_OFFSET, 206, + 0x000187, 0x000188, ALTERNATING_UL, 0, + 0x000189, 0x00018a, INTEGER_OFFSET, 205, + 0x00018b, 0x00018c, ALTERNATING_UL, 0, + 0x00018e, 0x00018e, INTEGER_OFFSET, 79, + 0x00018f, 0x00018f, INTEGER_OFFSET, 202, + 0x000190, 0x000190, INTEGER_OFFSET, 203, + 0x000191, 0x000192, ALTERNATING_UL, 0, + 0x000193, 0x000193, INTEGER_OFFSET, 205, + 0x000194, 0x000194, INTEGER_OFFSET, 207, + 0x000195, 0x000195, INTEGER_OFFSET, 97, + 0x000196, 0x000196, INTEGER_OFFSET, 211, + 0x000197, 0x000197, INTEGER_OFFSET, 209, + 0x000198, 0x000199, ALTERNATING_AL, 0, + 0x00019a, 0x00019a, INTEGER_OFFSET, 163, + 0x00019c, 0x00019c, INTEGER_OFFSET, 211, + 0x00019d, 0x00019d, INTEGER_OFFSET, 213, + 0x00019e, 0x00019e, INTEGER_OFFSET, 130, + 0x00019f, 0x00019f, INTEGER_OFFSET, 214, + 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, + 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, + 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, + 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, + 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, + 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, + 0x0001af, 0x0001b0, ALTERNATING_UL, 0, + 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, + 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, + 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, + 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, + 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, + 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, + 0x0001c4, 0x0001c6, DIRECT_MAPPING, 4, + 0x0001c7, 0x0001c9, DIRECT_MAPPING, 5, + 0x0001ca, 0x0001cc, DIRECT_MAPPING, 6, + 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, + 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, + 0x0001de, 0x0001ef, ALTERNATING_AL, 0, + 0x0001f1, 0x0001f3, DIRECT_MAPPING, 7, + 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, + 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, + 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, + 0x0001f8, 0x00021f, ALTERNATING_AL, 0, + 0x000220, 0x000220, INTEGER_OFFSET, -130, + 0x000222, 0x000233, ALTERNATING_AL, 0, + 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, + 0x00023b, 0x00023c, ALTERNATING_UL, 0, + 0x00023d, 0x00023d, INTEGER_OFFSET, -163, + 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, + 0x00023f, 0x000240, INTEGER_OFFSET, 10815, + 0x000241, 0x000242, ALTERNATING_UL, 0, + 0x000243, 0x000243, INTEGER_OFFSET, -195, + 0x000244, 0x000244, INTEGER_OFFSET, 69, + 0x000245, 0x000245, INTEGER_OFFSET, 71, + 0x000246, 0x00024f, ALTERNATING_AL, 0, + 0x000250, 0x000250, INTEGER_OFFSET, 10783, + 0x000251, 0x000251, INTEGER_OFFSET, 10780, + 0x000252, 0x000252, INTEGER_OFFSET, 10782, + 0x000253, 0x000253, INTEGER_OFFSET, -210, + 0x000254, 0x000254, INTEGER_OFFSET, -206, + 0x000256, 0x000257, INTEGER_OFFSET, -205, + 0x000259, 0x000259, INTEGER_OFFSET, -202, + 0x00025b, 0x00025b, INTEGER_OFFSET, -203, + 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, + 0x000260, 0x000260, INTEGER_OFFSET, -205, + 0x000261, 0x000261, INTEGER_OFFSET, 42315, + 0x000263, 0x000263, INTEGER_OFFSET, -207, + 0x000265, 0x000265, INTEGER_OFFSET, 42280, + 0x000266, 0x000266, INTEGER_OFFSET, 42308, + 0x000268, 0x000268, INTEGER_OFFSET, -209, + 0x000269, 0x000269, INTEGER_OFFSET, -211, + 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, + 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, + 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, + 0x00026f, 0x00026f, INTEGER_OFFSET, -211, + 0x000271, 0x000271, INTEGER_OFFSET, 10749, + 0x000272, 0x000272, INTEGER_OFFSET, -213, + 0x000275, 0x000275, INTEGER_OFFSET, -214, + 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, + 0x000280, 0x000280, INTEGER_OFFSET, -218, + 0x000282, 0x000282, INTEGER_OFFSET, 42307, + 0x000283, 0x000283, INTEGER_OFFSET, -218, + 0x000287, 0x000287, INTEGER_OFFSET, 42282, + 0x000288, 0x000288, INTEGER_OFFSET, -218, + 0x000289, 0x000289, INTEGER_OFFSET, -69, + 0x00028a, 0x00028b, INTEGER_OFFSET, -217, + 0x00028c, 0x00028c, INTEGER_OFFSET, -71, + 0x000292, 0x000292, INTEGER_OFFSET, -219, + 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, + 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, + 0x000345, 0x000345, DIRECT_MAPPING, 8, + 0x000370, 0x000373, ALTERNATING_AL, 0, + 0x000376, 0x000377, ALTERNATING_AL, 0, + 0x00037b, 0x00037d, INTEGER_OFFSET, 130, + 0x00037f, 0x00037f, INTEGER_OFFSET, 116, + 0x000386, 0x000386, INTEGER_OFFSET, 38, + 0x000388, 0x00038a, INTEGER_OFFSET, 37, + 0x00038c, 0x00038c, INTEGER_OFFSET, 64, + 0x00038e, 0x00038f, INTEGER_OFFSET, 63, + 0x000391, 0x000391, INTEGER_OFFSET, 32, + 0x000392, 0x000392, DIRECT_MAPPING, 9, + 0x000393, 0x000394, INTEGER_OFFSET, 32, + 0x000395, 0x000395, DIRECT_MAPPING, 10, + 0x000396, 0x000397, INTEGER_OFFSET, 32, + 0x000398, 0x000398, DIRECT_MAPPING, 11, + 0x000399, 0x000399, DIRECT_MAPPING, 8, + 0x00039a, 0x00039a, DIRECT_MAPPING, 12, + 0x00039b, 0x00039b, INTEGER_OFFSET, 32, + 0x00039c, 0x00039c, DIRECT_MAPPING, 2, + 0x00039d, 0x00039f, INTEGER_OFFSET, 32, + 0x0003a0, 0x0003a0, DIRECT_MAPPING, 13, + 0x0003a1, 0x0003a1, DIRECT_MAPPING, 14, + 0x0003a3, 0x0003a3, DIRECT_MAPPING, 15, + 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, + 0x0003a6, 0x0003a6, DIRECT_MAPPING, 16, + 0x0003a7, 0x0003a8, INTEGER_OFFSET, 32, + 0x0003a9, 0x0003a9, DIRECT_MAPPING, 17, + 0x0003aa, 0x0003ab, INTEGER_OFFSET, 32, + 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, + 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, + 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, + 0x0003b2, 0x0003b2, DIRECT_MAPPING, 9, + 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, + 0x0003b5, 0x0003b5, DIRECT_MAPPING, 10, + 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, + 0x0003b8, 0x0003b8, DIRECT_MAPPING, 11, + 0x0003b9, 0x0003b9, DIRECT_MAPPING, 8, + 0x0003ba, 0x0003ba, DIRECT_MAPPING, 12, + 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, + 0x0003bc, 0x0003bc, DIRECT_MAPPING, 2, + 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, + 0x0003c0, 0x0003c0, DIRECT_MAPPING, 13, + 0x0003c1, 0x0003c1, DIRECT_MAPPING, 14, + 0x0003c2, 0x0003c3, DIRECT_MAPPING, 15, + 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, + 0x0003c6, 0x0003c6, DIRECT_MAPPING, 16, + 0x0003c7, 0x0003c8, INTEGER_OFFSET, -32, + 0x0003c9, 0x0003c9, DIRECT_MAPPING, 17, + 0x0003ca, 0x0003cb, INTEGER_OFFSET, -32, + 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, + 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, + 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, + 0x0003d0, 0x0003d0, DIRECT_MAPPING, 9, + 0x0003d1, 0x0003d1, DIRECT_MAPPING, 11, + 0x0003d5, 0x0003d5, DIRECT_MAPPING, 16, + 0x0003d6, 0x0003d6, DIRECT_MAPPING, 13, + 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, + 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, + 0x0003f0, 0x0003f0, DIRECT_MAPPING, 12, + 0x0003f1, 0x0003f1, DIRECT_MAPPING, 14, + 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, + 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, + 0x0003f4, 0x0003f4, DIRECT_MAPPING, 11, + 0x0003f5, 0x0003f5, DIRECT_MAPPING, 10, + 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, + 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, + 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, + 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, + 0x000400, 0x00040f, INTEGER_OFFSET, 80, + 0x000410, 0x000411, INTEGER_OFFSET, 32, + 0x000412, 0x000412, DIRECT_MAPPING, 18, + 0x000413, 0x000413, INTEGER_OFFSET, 32, + 0x000414, 0x000414, DIRECT_MAPPING, 19, + 0x000415, 0x00041d, INTEGER_OFFSET, 32, + 0x00041e, 0x00041e, DIRECT_MAPPING, 20, + 0x00041f, 0x000420, INTEGER_OFFSET, 32, + 0x000421, 0x000421, DIRECT_MAPPING, 21, + 0x000422, 0x000422, DIRECT_MAPPING, 22, + 0x000423, 0x000429, INTEGER_OFFSET, 32, + 0x00042a, 0x00042a, DIRECT_MAPPING, 23, + 0x00042b, 0x00042f, INTEGER_OFFSET, 32, + 0x000430, 0x000431, INTEGER_OFFSET, -32, + 0x000432, 0x000432, DIRECT_MAPPING, 18, + 0x000433, 0x000433, INTEGER_OFFSET, -32, + 0x000434, 0x000434, DIRECT_MAPPING, 19, + 0x000435, 0x00043d, INTEGER_OFFSET, -32, + 0x00043e, 0x00043e, DIRECT_MAPPING, 20, + 0x00043f, 0x000440, INTEGER_OFFSET, -32, + 0x000441, 0x000441, DIRECT_MAPPING, 21, + 0x000442, 0x000442, DIRECT_MAPPING, 22, + 0x000443, 0x000449, INTEGER_OFFSET, -32, + 0x00044a, 0x00044a, DIRECT_MAPPING, 23, + 0x00044b, 0x00044f, INTEGER_OFFSET, -32, + 0x000450, 0x00045f, INTEGER_OFFSET, -80, + 0x000460, 0x000461, ALTERNATING_AL, 0, + 0x000462, 0x000463, DIRECT_MAPPING, 24, + 0x000464, 0x000481, ALTERNATING_AL, 0, + 0x00048a, 0x0004bf, ALTERNATING_AL, 0, + 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, + 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, + 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, + 0x0004d0, 0x00052f, ALTERNATING_AL, 0, + 0x000531, 0x000556, INTEGER_OFFSET, 48, + 0x000561, 0x000586, INTEGER_OFFSET, -48, + 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, + 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, + 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, + 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, + 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, + 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, + 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, + 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, + 0x001c80, 0x001c80, DIRECT_MAPPING, 18, + 0x001c81, 0x001c81, DIRECT_MAPPING, 19, + 0x001c82, 0x001c82, DIRECT_MAPPING, 20, + 0x001c83, 0x001c83, DIRECT_MAPPING, 21, + 0x001c84, 0x001c85, DIRECT_MAPPING, 22, + 0x001c86, 0x001c86, DIRECT_MAPPING, 23, + 0x001c87, 0x001c87, DIRECT_MAPPING, 24, + 0x001c88, 0x001c88, DIRECT_MAPPING, 25, + 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, + 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, + 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, + 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, + 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, + 0x001e00, 0x001e5f, ALTERNATING_AL, 0, + 0x001e60, 0x001e61, DIRECT_MAPPING, 26, + 0x001e62, 0x001e95, ALTERNATING_AL, 0, + 0x001e9b, 0x001e9b, DIRECT_MAPPING, 26, + 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, + 0x001ea0, 0x001eff, ALTERNATING_AL, 0, + 0x001f00, 0x001f07, INTEGER_OFFSET, 8, + 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, + 0x001f10, 0x001f15, INTEGER_OFFSET, 8, + 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, + 0x001f20, 0x001f27, INTEGER_OFFSET, 8, + 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, + 0x001f30, 0x001f37, INTEGER_OFFSET, 8, + 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, + 0x001f40, 0x001f45, INTEGER_OFFSET, 8, + 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, + 0x001f51, 0x001f51, INTEGER_OFFSET, 8, + 0x001f53, 0x001f53, INTEGER_OFFSET, 8, + 0x001f55, 0x001f55, INTEGER_OFFSET, 8, + 0x001f57, 0x001f57, INTEGER_OFFSET, 8, + 0x001f59, 0x001f59, INTEGER_OFFSET, -8, + 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, + 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, + 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, + 0x001f60, 0x001f67, INTEGER_OFFSET, 8, + 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, + 0x001f70, 0x001f71, INTEGER_OFFSET, 74, + 0x001f72, 0x001f75, INTEGER_OFFSET, 86, + 0x001f76, 0x001f77, INTEGER_OFFSET, 100, + 0x001f78, 0x001f79, INTEGER_OFFSET, 128, + 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, + 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, + 0x001f80, 0x001f87, INTEGER_OFFSET, 8, + 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, + 0x001f90, 0x001f97, INTEGER_OFFSET, 8, + 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, + 0x001fa0, 0x001fa7, INTEGER_OFFSET, 8, + 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, + 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, + 0x001fb3, 0x001fb3, INTEGER_OFFSET, 9, + 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, + 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, + 0x001fbe, 0x001fbe, DIRECT_MAPPING, 8, + 0x001fc3, 0x001fc3, INTEGER_OFFSET, 9, + 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, + 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, + 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, + 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, + 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, + 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, + 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, + 0x001fea, 0x001feb, INTEGER_OFFSET, -112, + 0x001fec, 0x001fec, INTEGER_OFFSET, -7, + 0x001ff3, 0x001ff3, INTEGER_OFFSET, 9, + 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, + 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, + 0x002126, 0x002126, DIRECT_MAPPING, 17, + 0x00212a, 0x00212a, DIRECT_MAPPING, 0, + 0x00212b, 0x00212b, DIRECT_MAPPING, 3, + 0x002132, 0x002132, INTEGER_OFFSET, 28, + 0x00214e, 0x00214e, INTEGER_OFFSET, -28, + 0x002160, 0x00216f, INTEGER_OFFSET, 16, + 0x002170, 0x00217f, INTEGER_OFFSET, -16, + 0x002183, 0x002184, ALTERNATING_UL, 0, + 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, + 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, + 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, + 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, + 0x002c60, 0x002c61, ALTERNATING_AL, 0, + 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, + 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, + 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, + 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, + 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, + 0x002c67, 0x002c6c, ALTERNATING_UL, 0, + 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, + 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, + 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, + 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, + 0x002c72, 0x002c73, ALTERNATING_AL, 0, + 0x002c75, 0x002c76, ALTERNATING_UL, 0, + 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, + 0x002c80, 0x002ce3, ALTERNATING_AL, 0, + 0x002ceb, 0x002cee, ALTERNATING_UL, 0, + 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, + 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, + 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, + 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, + 0x00a640, 0x00a649, ALTERNATING_AL, 0, + 0x00a64a, 0x00a64b, DIRECT_MAPPING, 25, + 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, + 0x00a680, 0x00a69b, ALTERNATING_AL, 0, + 0x00a722, 0x00a72f, ALTERNATING_AL, 0, + 0x00a732, 0x00a76f, ALTERNATING_AL, 0, + 0x00a779, 0x00a77c, ALTERNATING_UL, 0, + 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, + 0x00a77e, 0x00a787, ALTERNATING_AL, 0, + 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, + 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, + 0x00a790, 0x00a793, ALTERNATING_AL, 0, + 0x00a794, 0x00a794, INTEGER_OFFSET, 48, + 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, + 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, + 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, + 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, + 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, + 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, + 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, + 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, + 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, + 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, + 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, + 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, + 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, + 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, + 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, + 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, + 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, + 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, + 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, + 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, + 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, + 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32, + 0x010400, 0x010427, INTEGER_OFFSET, 40, + 0x010428, 0x01044f, INTEGER_OFFSET, -40, + 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, + 0x0104d8, 0x0104fb, INTEGER_OFFSET, -40, + 0x010570, 0x01057a, INTEGER_OFFSET, 39, + 0x01057c, 0x01058a, INTEGER_OFFSET, 39, + 0x01058c, 0x010592, INTEGER_OFFSET, 39, + 0x010594, 0x010595, INTEGER_OFFSET, 39, + 0x010597, 0x0105a1, INTEGER_OFFSET, -39, + 0x0105a3, 0x0105b1, INTEGER_OFFSET, -39, + 0x0105b3, 0x0105b9, INTEGER_OFFSET, -39, + 0x0105bb, 0x0105bc, INTEGER_OFFSET, -39, + 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, + 0x010cc0, 0x010cf2, INTEGER_OFFSET, -64, + 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, + 0x0118c0, 0x0118df, INTEGER_OFFSET, -32, + 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, + 0x016e60, 0x016e7f, INTEGER_OFFSET, -32, + 0x01e900, 0x01e921, INTEGER_OFFSET, 34, + 0x01e922, 0x01e943, INTEGER_OFFSET, -34, + }); + private static final CaseFoldEquivalenceTable JS_NON_UNICODE = new CaseFoldEquivalenceTable(UNICODE_15_0_0_SIMPLE, new CodePointSet[]{ + rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1), + }, new int[]{ + 0x00004b, 0x00005a, INTEGER_OFFSET, 32, + 0x00006b, 0x00007a, INTEGER_OFFSET, -32, + 0x0000c5, 0x0000d6, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 0, + 0x0000e5, 0x0000f6, INTEGER_OFFSET, -32, + 0x00017f, 0x00017f, INTEGER_OFFSET, 0, + 0x000398, 0x000398, DIRECT_MAPPING, 0, + 0x0003a9, 0x0003ab, INTEGER_OFFSET, 32, + 0x0003b8, 0x0003b8, DIRECT_MAPPING, 0, + 0x0003c9, 0x0003cb, INTEGER_OFFSET, -32, + 0x0003d1, 0x0003d1, DIRECT_MAPPING, 0, + 0x0003f4, 0x0003f4, INTEGER_OFFSET, 0, + 0x001e9e, 0x001e9e, INTEGER_OFFSET, 0, + 0x001f80, 0x001f87, INTEGER_OFFSET, 0, + 0x001f88, 0x001f8f, INTEGER_OFFSET, 0, + 0x001f90, 0x001f97, INTEGER_OFFSET, 0, + 0x001f98, 0x001f9f, INTEGER_OFFSET, 0, + 0x001fa0, 0x001fa7, INTEGER_OFFSET, 0, + 0x001fa8, 0x001faf, INTEGER_OFFSET, 0, + 0x001fb3, 0x001fb3, INTEGER_OFFSET, 0, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, 0, + 0x001fc3, 0x001fc3, INTEGER_OFFSET, 0, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, 0, + 0x001ff3, 0x001ff3, INTEGER_OFFSET, 0, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, 0, + 0x002126, 0x002126, INTEGER_OFFSET, 0, + 0x00212a, 0x00212a, INTEGER_OFFSET, 0, + 0x00212b, 0x00212b, INTEGER_OFFSET, 0, + 0x010400, 0x010427, INTEGER_OFFSET, 0, + 0x010428, 0x01044f, INTEGER_OFFSET, 0, + 0x0104b0, 0x0104d3, INTEGER_OFFSET, 0, + 0x0104d8, 0x0104fb, INTEGER_OFFSET, 0, + 0x010570, 0x01057a, INTEGER_OFFSET, 0, + 0x01057c, 0x01058a, INTEGER_OFFSET, 0, + 0x01058c, 0x010592, INTEGER_OFFSET, 0, + 0x010594, 0x010595, INTEGER_OFFSET, 0, + 0x010597, 0x0105a1, INTEGER_OFFSET, 0, + 0x0105a3, 0x0105b1, INTEGER_OFFSET, 0, + 0x0105b3, 0x0105b9, INTEGER_OFFSET, 0, + 0x0105bb, 0x0105bc, INTEGER_OFFSET, 0, + 0x010c80, 0x010cb2, INTEGER_OFFSET, 0, + 0x010cc0, 0x010cf2, INTEGER_OFFSET, 0, + 0x0118a0, 0x0118bf, INTEGER_OFFSET, 0, + 0x0118c0, 0x0118df, INTEGER_OFFSET, 0, + 0x016e40, 0x016e5f, INTEGER_OFFSET, 0, + 0x016e60, 0x016e7f, INTEGER_OFFSET, 0, + 0x01e900, 0x01e921, INTEGER_OFFSET, 0, + 0x01e922, 0x01e943, INTEGER_OFFSET, 0, + }); + private static final CaseFoldEquivalenceTable PYTHON_UNICODE = new CaseFoldEquivalenceTable(UNICODE_15_0_0_SIMPLE, new CodePointSet[]{ + rangeSet(0x000049, 0x000049, 0x000069, 0x000069, 0x000130, 0x000131), + }, new int[]{ + 0x000049, 0x000049, DIRECT_MAPPING, 0, + 0x000069, 0x000069, DIRECT_MAPPING, 0, + 0x000130, 0x000131, DIRECT_MAPPING, 0, + 0x000390, 0x000390, INTEGER_OFFSET, 7235, + 0x0003b0, 0x0003b0, INTEGER_OFFSET, 7219, + 0x001fd3, 0x001fd3, INTEGER_OFFSET, -7235, + 0x001fe3, 0x001fe3, INTEGER_OFFSET, -7219, + 0x00fb05, 0x00fb06, ALTERNATING_UL, 0, + }); + private static final CaseFoldTable UNICODE_15_0_0_FULL = new CaseFoldTable(null, new int[]{ + 0x000041, 0x00005a, INTEGER_OFFSET, 32, + 0x0000b5, 0x0000b5, INTEGER_OFFSET, 775, + 0x0000c0, 0x0000d6, INTEGER_OFFSET, 32, + 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 1113962, + 0x000100, 0x00012e, ALTERNATING_AL, 0, + 0x000130, 0x000130, INTEGER_OFFSET, 1113808, + 0x000132, 0x000136, ALTERNATING_AL, 0, + 0x000139, 0x000147, ALTERNATING_UL, 0, + 0x000149, 0x000149, INTEGER_OFFSET, 1113857, + 0x00014a, 0x000176, ALTERNATING_AL, 0, + 0x000178, 0x000178, INTEGER_OFFSET, -121, + 0x000179, 0x00017d, ALTERNATING_UL, 0, + 0x00017f, 0x00017f, INTEGER_OFFSET, -268, + 0x000181, 0x000181, INTEGER_OFFSET, 210, + 0x000182, 0x000184, ALTERNATING_AL, 0, + 0x000186, 0x000186, INTEGER_OFFSET, 206, + 0x000187, 0x000187, ALTERNATING_UL, 0, + 0x000189, 0x00018a, INTEGER_OFFSET, 205, + 0x00018b, 0x00018b, ALTERNATING_UL, 0, + 0x00018e, 0x00018e, INTEGER_OFFSET, 79, + 0x00018f, 0x00018f, INTEGER_OFFSET, 202, + 0x000190, 0x000190, INTEGER_OFFSET, 203, + 0x000191, 0x000191, ALTERNATING_UL, 0, + 0x000193, 0x000193, INTEGER_OFFSET, 205, + 0x000194, 0x000194, INTEGER_OFFSET, 207, + 0x000196, 0x000196, INTEGER_OFFSET, 211, + 0x000197, 0x000197, INTEGER_OFFSET, 209, + 0x000198, 0x000198, ALTERNATING_AL, 0, + 0x00019c, 0x00019c, INTEGER_OFFSET, 211, + 0x00019d, 0x00019d, INTEGER_OFFSET, 213, + 0x00019f, 0x00019f, INTEGER_OFFSET, 214, + 0x0001a0, 0x0001a4, ALTERNATING_AL, 0, + 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, + 0x0001a7, 0x0001a7, ALTERNATING_UL, 0, + 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, + 0x0001ac, 0x0001ac, ALTERNATING_AL, 0, + 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, + 0x0001af, 0x0001af, ALTERNATING_UL, 0, + 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, + 0x0001b3, 0x0001b5, ALTERNATING_UL, 0, + 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, + 0x0001b8, 0x0001b8, ALTERNATING_AL, 0, + 0x0001bc, 0x0001bc, ALTERNATING_AL, 0, + 0x0001c4, 0x0001c5, DIRECT_SINGLE, 454, + 0x0001c7, 0x0001c8, DIRECT_SINGLE, 457, + 0x0001ca, 0x0001cb, DIRECT_SINGLE, 460, + 0x0001cd, 0x0001db, ALTERNATING_UL, 0, + 0x0001de, 0x0001ee, ALTERNATING_AL, 0, + 0x0001f0, 0x0001f0, INTEGER_OFFSET, 1113691, + 0x0001f1, 0x0001f2, DIRECT_SINGLE, 499, + 0x0001f4, 0x0001f4, ALTERNATING_AL, 0, + 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, + 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, + 0x0001f8, 0x00021e, ALTERNATING_AL, 0, + 0x000220, 0x000220, INTEGER_OFFSET, -130, + 0x000222, 0x000232, ALTERNATING_AL, 0, + 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, + 0x00023b, 0x00023b, ALTERNATING_UL, 0, + 0x00023d, 0x00023d, INTEGER_OFFSET, -163, + 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, + 0x000241, 0x000241, ALTERNATING_UL, 0, + 0x000243, 0x000243, INTEGER_OFFSET, -195, + 0x000244, 0x000244, INTEGER_OFFSET, 69, + 0x000245, 0x000245, INTEGER_OFFSET, 71, + 0x000246, 0x00024e, ALTERNATING_AL, 0, + 0x000345, 0x000345, INTEGER_OFFSET, 116, + 0x000370, 0x000372, ALTERNATING_AL, 0, + 0x000376, 0x000376, ALTERNATING_AL, 0, + 0x00037f, 0x00037f, INTEGER_OFFSET, 116, + 0x000386, 0x000386, INTEGER_OFFSET, 38, + 0x000388, 0x00038a, INTEGER_OFFSET, 37, + 0x00038c, 0x00038c, INTEGER_OFFSET, 64, + 0x00038e, 0x00038f, INTEGER_OFFSET, 63, + 0x000390, 0x000390, INTEGER_OFFSET, 1113276, + 0x000391, 0x0003a1, INTEGER_OFFSET, 32, + 0x0003a3, 0x0003ab, INTEGER_OFFSET, 32, + 0x0003b0, 0x0003b0, INTEGER_OFFSET, 1113245, + 0x0003c2, 0x0003c2, ALTERNATING_AL, 0, + 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, + 0x0003d0, 0x0003d0, INTEGER_OFFSET, -30, + 0x0003d1, 0x0003d1, INTEGER_OFFSET, -25, + 0x0003d5, 0x0003d5, INTEGER_OFFSET, -15, + 0x0003d6, 0x0003d6, INTEGER_OFFSET, -22, + 0x0003d8, 0x0003ee, ALTERNATING_AL, 0, + 0x0003f0, 0x0003f0, INTEGER_OFFSET, -54, + 0x0003f1, 0x0003f1, INTEGER_OFFSET, -48, + 0x0003f4, 0x0003f4, INTEGER_OFFSET, -60, + 0x0003f5, 0x0003f5, INTEGER_OFFSET, -64, + 0x0003f7, 0x0003f7, ALTERNATING_UL, 0, + 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, + 0x0003fa, 0x0003fa, ALTERNATING_AL, 0, + 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, + 0x000400, 0x00040f, INTEGER_OFFSET, 80, + 0x000410, 0x00042f, INTEGER_OFFSET, 32, + 0x000460, 0x000480, ALTERNATING_AL, 0, + 0x00048a, 0x0004be, ALTERNATING_AL, 0, + 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, + 0x0004c1, 0x0004cd, ALTERNATING_UL, 0, + 0x0004d0, 0x00052e, ALTERNATING_AL, 0, + 0x000531, 0x000556, INTEGER_OFFSET, 48, + 0x000587, 0x000587, INTEGER_OFFSET, 1112775, + 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, + 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, + 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, + 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, + 0x001c80, 0x001c80, INTEGER_OFFSET, -6222, + 0x001c81, 0x001c81, INTEGER_OFFSET, -6221, + 0x001c82, 0x001c82, INTEGER_OFFSET, -6212, + 0x001c83, 0x001c84, INTEGER_OFFSET, -6210, + 0x001c85, 0x001c85, INTEGER_OFFSET, -6211, + 0x001c86, 0x001c86, INTEGER_OFFSET, -6204, + 0x001c87, 0x001c87, INTEGER_OFFSET, -6180, + 0x001c88, 0x001c88, INTEGER_OFFSET, 35267, + 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, + 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, + 0x001e00, 0x001e94, ALTERNATING_AL, 0, + 0x001e96, 0x001e9a, INTEGER_OFFSET, 1106361, + 0x001e9b, 0x001e9b, INTEGER_OFFSET, -58, + 0x001e9e, 0x001e9e, INTEGER_OFFSET, 1106347, + 0x001ea0, 0x001efe, ALTERNATING_AL, 0, + 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, + 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, + 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, + 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, + 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, + 0x001f50, 0x001f50, INTEGER_OFFSET, 1106180, + 0x001f52, 0x001f52, INTEGER_OFFSET, 1106179, + 0x001f54, 0x001f54, INTEGER_OFFSET, 1106178, + 0x001f56, 0x001f56, INTEGER_OFFSET, 1106177, + 0x001f59, 0x001f59, INTEGER_OFFSET, -8, + 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, + 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, + 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, + 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, + 0x001f80, 0x001f87, INTEGER_OFFSET, 1106136, + 0x001f88, 0x001f97, INTEGER_OFFSET, 1106128, + 0x001f98, 0x001fa7, INTEGER_OFFSET, 1106120, + 0x001fa8, 0x001faf, INTEGER_OFFSET, 1106112, + 0x001fb2, 0x001fb4, INTEGER_OFFSET, 1106110, + 0x001fb6, 0x001fb7, INTEGER_OFFSET, 1106109, + 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, + 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, 1106101, + 0x001fbe, 0x001fbe, INTEGER_OFFSET, -7173, + 0x001fc2, 0x001fc4, INTEGER_OFFSET, 1106099, + 0x001fc6, 0x001fc7, INTEGER_OFFSET, 1106098, + 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, 1106090, + 0x001fd2, 0x001fd2, INTEGER_OFFSET, 1106088, + 0x001fd3, 0x001fd3, INTEGER_OFFSET, 1106041, + 0x001fd6, 0x001fd7, INTEGER_OFFSET, 1106085, + 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, + 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, + 0x001fe2, 0x001fe2, INTEGER_OFFSET, 1106075, + 0x001fe3, 0x001fe3, INTEGER_OFFSET, 1106026, + 0x001fe4, 0x001fe4, INTEGER_OFFSET, 1106074, + 0x001fe6, 0x001fe7, INTEGER_OFFSET, 1106073, + 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, + 0x001fea, 0x001feb, INTEGER_OFFSET, -112, + 0x001fec, 0x001fec, INTEGER_OFFSET, -7, + 0x001ff2, 0x001ff4, INTEGER_OFFSET, 1106063, + 0x001ff6, 0x001ff7, INTEGER_OFFSET, 1106062, + 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, + 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, 1106054, + 0x002126, 0x002126, INTEGER_OFFSET, -7517, + 0x00212a, 0x00212a, INTEGER_OFFSET, -8383, + 0x00212b, 0x00212b, INTEGER_OFFSET, -8262, + 0x002132, 0x002132, INTEGER_OFFSET, 28, + 0x002160, 0x00216f, INTEGER_OFFSET, 16, + 0x002183, 0x002183, ALTERNATING_UL, 0, + 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, + 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, + 0x002c60, 0x002c60, ALTERNATING_AL, 0, + 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, + 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, + 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, + 0x002c67, 0x002c6b, ALTERNATING_UL, 0, + 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, + 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, + 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, + 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, + 0x002c72, 0x002c72, ALTERNATING_AL, 0, + 0x002c75, 0x002c75, ALTERNATING_UL, 0, + 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, + 0x002c80, 0x002ce2, ALTERNATING_AL, 0, + 0x002ceb, 0x002ced, ALTERNATING_UL, 0, + 0x002cf2, 0x002cf2, ALTERNATING_AL, 0, + 0x00a640, 0x00a66c, ALTERNATING_AL, 0, + 0x00a680, 0x00a69a, ALTERNATING_AL, 0, + 0x00a722, 0x00a72e, ALTERNATING_AL, 0, + 0x00a732, 0x00a76e, ALTERNATING_AL, 0, + 0x00a779, 0x00a77b, ALTERNATING_UL, 0, + 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, + 0x00a77e, 0x00a786, ALTERNATING_AL, 0, + 0x00a78b, 0x00a78b, ALTERNATING_UL, 0, + 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, + 0x00a790, 0x00a792, ALTERNATING_AL, 0, + 0x00a796, 0x00a7a8, ALTERNATING_AL, 0, + 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, + 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, + 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, + 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, + 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, + 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, + 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, + 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, + 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, + 0x00a7b4, 0x00a7c2, ALTERNATING_AL, 0, + 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, + 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, + 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, + 0x00a7c7, 0x00a7c9, ALTERNATING_UL, 0, + 0x00a7d0, 0x00a7d0, ALTERNATING_AL, 0, + 0x00a7d6, 0x00a7d8, ALTERNATING_AL, 0, + 0x00a7f5, 0x00a7f5, ALTERNATING_UL, 0, + 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, + 0x00fb00, 0x00fb05, INTEGER_OFFSET, 1049990, + 0x00fb06, 0x00fb06, INTEGER_OFFSET, 1049989, + 0x00fb13, 0x00fb17, INTEGER_OFFSET, 1049977, + 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, + 0x010400, 0x010427, INTEGER_OFFSET, 40, + 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, + 0x010570, 0x01057a, INTEGER_OFFSET, 39, + 0x01057c, 0x01058a, INTEGER_OFFSET, 39, + 0x01058c, 0x010592, INTEGER_OFFSET, 39, + 0x010594, 0x010595, INTEGER_OFFSET, 39, + 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, + 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, + 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, + 0x01e900, 0x01e921, INTEGER_OFFSET, 34, + }); + private static final CaseFoldTable ORACLE_DB = new CaseFoldTable(UNICODE_15_0_0_FULL, new int[]{ + 0x002c2f, 0x002c2f, INTEGER_OFFSET, 0, + 0x00a7bf, 0x00a7c1, INTEGER_OFFSET, 0, + 0x00a7c7, 0x00a7c9, INTEGER_OFFSET, 0, + 0x00a7d0, 0x00a7d0, INTEGER_OFFSET, 0, + 0x00a7d6, 0x00a7d8, INTEGER_OFFSET, 0, + 0x00a7f5, 0x00a7f5, INTEGER_OFFSET, 0, + 0x010570, 0x01057a, INTEGER_OFFSET, 0, + 0x01057c, 0x01058a, INTEGER_OFFSET, 0, + 0x01058c, 0x010592, INTEGER_OFFSET, 0, + 0x010594, 0x010595, INTEGER_OFFSET, 0, + }); + private static final CaseFoldTable ORACLE_DB_AI = new CaseFoldTable(null, new int[]{ + 0x000041, 0x00005a, INTEGER_OFFSET, 32, + 0x000084, 0x000084, ALTERNATING_AL, 0, + 0x0000a9, 0x0000a9, INTEGER_OFFSET, -70, + 0x0000aa, 0x0000aa, INTEGER_OFFSET, -73, + 0x0000ae, 0x0000ae, INTEGER_OFFSET, -60, + 0x0000b2, 0x0000b3, INTEGER_OFFSET, -128, + 0x0000b5, 0x0000b5, INTEGER_OFFSET, 775, + 0x0000b9, 0x0000b9, INTEGER_OFFSET, -136, + 0x0000ba, 0x0000ba, INTEGER_OFFSET, -75, + 0x0000c0, 0x0000c5, DIRECT_SINGLE, 97, + 0x0000c6, 0x0000c6, INTEGER_OFFSET, 32, + 0x0000c7, 0x0000c7, INTEGER_OFFSET, -100, + 0x0000c8, 0x0000cb, DIRECT_SINGLE, 101, + 0x0000cc, 0x0000cf, DIRECT_SINGLE, 105, + 0x0000d0, 0x0000d0, INTEGER_OFFSET, 32, + 0x0000d1, 0x0000d2, INTEGER_OFFSET, -99, + 0x0000d3, 0x0000d8, DIRECT_SINGLE, 111, + 0x0000d9, 0x0000dc, DIRECT_SINGLE, 117, + 0x0000dd, 0x0000dd, INTEGER_OFFSET, -100, + 0x0000de, 0x0000de, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 1113962, + 0x0000e0, 0x0000e5, DIRECT_SINGLE, 97, + 0x0000e7, 0x0000e7, INTEGER_OFFSET, -132, + 0x0000e8, 0x0000eb, DIRECT_SINGLE, 101, + 0x0000ec, 0x0000ef, DIRECT_SINGLE, 105, + 0x0000f1, 0x0000f2, INTEGER_OFFSET, -131, + 0x0000f3, 0x0000f8, DIRECT_SINGLE, 111, + 0x0000f9, 0x0000fc, DIRECT_SINGLE, 117, + 0x0000fd, 0x0000ff, DIRECT_SINGLE, 121, + 0x000100, 0x000105, DIRECT_SINGLE, 97, + 0x000106, 0x00010d, DIRECT_SINGLE, 99, + 0x00010e, 0x000111, DIRECT_SINGLE, 100, + 0x000112, 0x00011b, DIRECT_SINGLE, 101, + 0x00011c, 0x000123, DIRECT_SINGLE, 103, + 0x000124, 0x000127, DIRECT_SINGLE, 104, + 0x000128, 0x000131, DIRECT_SINGLE, 105, + 0x000132, 0x000133, DIRECT_SINGLE, 1114257, + 0x000134, 0x000135, DIRECT_SINGLE, 106, + 0x000136, 0x000138, DIRECT_SINGLE, 107, + 0x000139, 0x000140, DIRECT_SINGLE, 108, + 0x000141, 0x000141, ALTERNATING_UL, 0, + 0x000142, 0x000142, INTEGER_OFFSET, -214, + 0x000143, 0x000148, DIRECT_SINGLE, 110, + 0x00014a, 0x00014a, ALTERNATING_AL, 0, + 0x00014c, 0x000151, DIRECT_SINGLE, 111, + 0x000152, 0x000153, DIRECT_SINGLE, 1114258, + 0x000154, 0x000159, DIRECT_SINGLE, 114, + 0x00015a, 0x000161, DIRECT_SINGLE, 115, + 0x000162, 0x000165, DIRECT_SINGLE, 116, + 0x000166, 0x000166, ALTERNATING_AL, 0, + 0x000167, 0x000168, INTEGER_OFFSET, -243, + 0x000169, 0x000173, DIRECT_SINGLE, 117, + 0x000174, 0x000175, DIRECT_SINGLE, 119, + 0x000176, 0x000178, DIRECT_SINGLE, 121, + 0x000179, 0x00017e, DIRECT_SINGLE, 122, + 0x00017f, 0x00017f, INTEGER_OFFSET, -268, + 0x000181, 0x000181, INTEGER_OFFSET, 210, + 0x000182, 0x000184, ALTERNATING_AL, 0, + 0x000186, 0x000186, INTEGER_OFFSET, 206, + 0x000187, 0x000187, ALTERNATING_UL, 0, + 0x000189, 0x00018a, INTEGER_OFFSET, 205, + 0x00018b, 0x00018b, ALTERNATING_UL, 0, + 0x00018e, 0x00018e, INTEGER_OFFSET, 79, + 0x00018f, 0x00018f, INTEGER_OFFSET, 202, + 0x000190, 0x000190, INTEGER_OFFSET, 203, + 0x000191, 0x000191, ALTERNATING_UL, 0, + 0x000193, 0x000193, INTEGER_OFFSET, 205, + 0x000194, 0x000194, INTEGER_OFFSET, 207, + 0x000196, 0x000196, INTEGER_OFFSET, 211, + 0x000197, 0x000197, INTEGER_OFFSET, 209, + 0x000198, 0x000198, ALTERNATING_AL, 0, + 0x00019c, 0x00019c, INTEGER_OFFSET, 211, + 0x00019d, 0x00019d, INTEGER_OFFSET, 213, + 0x00019f, 0x00019f, INTEGER_OFFSET, 214, + 0x0001a0, 0x0001a1, DIRECT_SINGLE, 111, + 0x0001a2, 0x0001a4, ALTERNATING_AL, 0, + 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, + 0x0001a7, 0x0001a7, ALTERNATING_UL, 0, + 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, + 0x0001ac, 0x0001ac, ALTERNATING_AL, 0, + 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, + 0x0001af, 0x0001b0, DIRECT_SINGLE, 117, + 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, + 0x0001b3, 0x0001b5, ALTERNATING_UL, 0, + 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, + 0x0001b8, 0x0001b8, ALTERNATING_AL, 0, + 0x0001bc, 0x0001bc, ALTERNATING_AL, 0, + 0x0001c4, 0x0001c6, DIRECT_SINGLE, 499, + 0x0001c7, 0x0001c9, DIRECT_SINGLE, 1114259, + 0x0001ca, 0x0001cc, DIRECT_SINGLE, 1114260, + 0x0001cd, 0x0001ce, DIRECT_SINGLE, 97, + 0x0001cf, 0x0001d0, DIRECT_SINGLE, 105, + 0x0001d1, 0x0001d2, DIRECT_SINGLE, 111, + 0x0001d3, 0x0001dc, DIRECT_SINGLE, 117, + 0x0001de, 0x0001e1, DIRECT_SINGLE, 97, + 0x0001e2, 0x0001e3, DIRECT_SINGLE, 230, + 0x0001e4, 0x0001e4, ALTERNATING_AL, 0, + 0x0001e6, 0x0001e7, DIRECT_SINGLE, 103, + 0x0001e8, 0x0001e9, DIRECT_SINGLE, 107, + 0x0001ea, 0x0001ed, DIRECT_SINGLE, 111, + 0x0001ee, 0x0001ee, INTEGER_OFFSET, -55, + 0x0001ef, 0x0001ef, INTEGER_OFFSET, 163, + 0x0001f0, 0x0001f0, INTEGER_OFFSET, -390, + 0x0001f1, 0x0001f3, DIRECT_SINGLE, 1114261, + 0x0001f4, 0x0001f5, DIRECT_SINGLE, 103, + 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, + 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, + 0x0001f8, 0x0001f9, DIRECT_SINGLE, 110, + 0x0001fa, 0x0001fb, DIRECT_SINGLE, 97, + 0x0001fc, 0x0001fd, DIRECT_SINGLE, 230, + 0x0001fe, 0x0001ff, DIRECT_SINGLE, 111, + 0x000200, 0x000203, DIRECT_SINGLE, 97, + 0x000204, 0x000207, DIRECT_SINGLE, 101, + 0x000208, 0x00020b, DIRECT_SINGLE, 105, + 0x00020c, 0x00020f, DIRECT_SINGLE, 111, + 0x000210, 0x000213, DIRECT_SINGLE, 114, + 0x000214, 0x000217, DIRECT_SINGLE, 117, + 0x000218, 0x000219, DIRECT_SINGLE, 115, + 0x00021a, 0x00021b, DIRECT_SINGLE, 116, + 0x00021c, 0x00021c, ALTERNATING_AL, 0, + 0x00021e, 0x00021f, DIRECT_SINGLE, 104, + 0x000222, 0x000224, ALTERNATING_AL, 0, + 0x000226, 0x000227, DIRECT_SINGLE, 97, + 0x000228, 0x000229, DIRECT_SINGLE, 101, + 0x00022a, 0x000231, DIRECT_SINGLE, 111, + 0x000232, 0x000233, DIRECT_SINGLE, 121, + 0x0002a3, 0x0002a8, INTEGER_OFFSET, 1113586, + 0x0002b0, 0x0002b0, INTEGER_OFFSET, -584, + 0x0002b1, 0x0002b1, INTEGER_OFFSET, -75, + 0x0002b2, 0x0002b2, INTEGER_OFFSET, -584, + 0x0002b3, 0x0002b3, INTEGER_OFFSET, -577, + 0x0002b4, 0x0002b4, INTEGER_OFFSET, -59, + 0x0002b5, 0x0002b5, INTEGER_OFFSET, -58, + 0x0002b6, 0x0002b6, INTEGER_OFFSET, -53, + 0x0002b7, 0x0002b7, INTEGER_OFFSET, -576, + 0x0002b8, 0x0002b8, INTEGER_OFFSET, -575, + 0x0002e0, 0x0002e0, INTEGER_OFFSET, -125, + 0x0002e1, 0x0002e1, INTEGER_OFFSET, -629, + 0x0002e2, 0x0002e2, INTEGER_OFFSET, -623, + 0x0002e4, 0x0002e4, INTEGER_OFFSET, -79, + 0x000344, 0x000344, INTEGER_OFFSET, -60, + 0x000385, 0x000385, INTEGER_OFFSET, -733, + 0x000386, 0x000386, INTEGER_OFFSET, 43, + 0x000388, 0x000388, INTEGER_OFFSET, 45, + 0x000389, 0x000389, INTEGER_OFFSET, 46, + 0x00038a, 0x00038a, INTEGER_OFFSET, 47, + 0x00038c, 0x00038c, INTEGER_OFFSET, 51, + 0x00038e, 0x00038e, INTEGER_OFFSET, 55, + 0x00038f, 0x00038f, INTEGER_OFFSET, 58, + 0x000390, 0x000390, INTEGER_OFFSET, 41, + 0x000391, 0x0003a1, INTEGER_OFFSET, 32, + 0x0003a3, 0x0003a9, INTEGER_OFFSET, 32, + 0x0003aa, 0x0003aa, INTEGER_OFFSET, 15, + 0x0003ab, 0x0003ab, INTEGER_OFFSET, 26, + 0x0003ac, 0x0003ac, INTEGER_OFFSET, 5, + 0x0003ad, 0x0003ad, INTEGER_OFFSET, 8, + 0x0003ae, 0x0003ae, INTEGER_OFFSET, 9, + 0x0003af, 0x0003af, INTEGER_OFFSET, 10, + 0x0003b0, 0x0003b0, INTEGER_OFFSET, 21, + 0x0003c2, 0x0003c2, ALTERNATING_AL, 0, + 0x0003ca, 0x0003ca, INTEGER_OFFSET, -17, + 0x0003cb, 0x0003cb, INTEGER_OFFSET, -6, + 0x0003cc, 0x0003cc, INTEGER_OFFSET, -13, + 0x0003cd, 0x0003cd, INTEGER_OFFSET, -8, + 0x0003ce, 0x0003ce, INTEGER_OFFSET, -5, + 0x0003d0, 0x0003d0, INTEGER_OFFSET, -30, + 0x0003d1, 0x0003d1, INTEGER_OFFSET, -25, + 0x0003d2, 0x0003d4, DIRECT_SINGLE, 965, + 0x0003d5, 0x0003d5, INTEGER_OFFSET, -15, + 0x0003d6, 0x0003d6, INTEGER_OFFSET, -22, + 0x0003da, 0x0003ee, ALTERNATING_AL, 0, + 0x0003f0, 0x0003f0, INTEGER_OFFSET, -54, + 0x0003f1, 0x0003f1, INTEGER_OFFSET, -48, + 0x0003f2, 0x0003f2, INTEGER_OFFSET, -47, + 0x0003f4, 0x0003f4, INTEGER_OFFSET, -60, + 0x000400, 0x000401, DIRECT_SINGLE, 1077, + 0x000402, 0x000402, INTEGER_OFFSET, 80, + 0x000403, 0x000403, INTEGER_OFFSET, 48, + 0x000404, 0x000406, INTEGER_OFFSET, 80, + 0x000407, 0x000407, INTEGER_OFFSET, 79, + 0x000408, 0x00040b, INTEGER_OFFSET, 80, + 0x00040c, 0x00040c, INTEGER_OFFSET, 46, + 0x00040d, 0x00040d, INTEGER_OFFSET, 43, + 0x00040e, 0x00040e, INTEGER_OFFSET, 53, + 0x00040f, 0x00040f, INTEGER_OFFSET, 80, + 0x000410, 0x000418, INTEGER_OFFSET, 32, + 0x000419, 0x000419, INTEGER_OFFSET, 31, + 0x00041a, 0x00042f, INTEGER_OFFSET, 32, + 0x000439, 0x000439, INTEGER_OFFSET, -1, + 0x000450, 0x000451, DIRECT_SINGLE, 1077, + 0x000453, 0x000453, INTEGER_OFFSET, -32, + 0x000457, 0x000457, INTEGER_OFFSET, -1, + 0x00045c, 0x00045c, INTEGER_OFFSET, -34, + 0x00045d, 0x00045d, INTEGER_OFFSET, -37, + 0x00045e, 0x00045e, INTEGER_OFFSET, -27, + 0x000460, 0x000474, ALTERNATING_AL, 0, + 0x000476, 0x000477, DIRECT_SINGLE, 1141, + 0x000478, 0x000480, ALTERNATING_AL, 0, + 0x00048c, 0x00048e, ALTERNATING_AL, 0, + 0x000490, 0x000491, DIRECT_SINGLE, 1075, + 0x000492, 0x0004be, ALTERNATING_AL, 0, + 0x0004c1, 0x0004c2, DIRECT_SINGLE, 1078, + 0x0004c3, 0x0004c3, ALTERNATING_UL, 0, + 0x0004c7, 0x0004c7, ALTERNATING_UL, 0, + 0x0004cb, 0x0004cb, ALTERNATING_UL, 0, + 0x0004d0, 0x0004d3, DIRECT_SINGLE, 1072, + 0x0004d4, 0x0004d4, ALTERNATING_AL, 0, + 0x0004d6, 0x0004d7, DIRECT_SINGLE, 1077, + 0x0004d8, 0x0004d8, ALTERNATING_AL, 0, + 0x0004da, 0x0004db, DIRECT_SINGLE, 1241, + 0x0004dc, 0x0004dd, DIRECT_SINGLE, 1078, + 0x0004de, 0x0004df, DIRECT_SINGLE, 1079, + 0x0004e0, 0x0004e0, ALTERNATING_AL, 0, + 0x0004e2, 0x0004e5, DIRECT_SINGLE, 1080, + 0x0004e6, 0x0004e7, DIRECT_SINGLE, 1086, + 0x0004e8, 0x0004e8, ALTERNATING_AL, 0, + 0x0004ea, 0x0004eb, DIRECT_SINGLE, 1257, + 0x0004ec, 0x0004ed, DIRECT_SINGLE, 1101, + 0x0004ee, 0x0004f3, DIRECT_SINGLE, 1091, + 0x0004f4, 0x0004f5, DIRECT_SINGLE, 1095, + 0x0004f8, 0x0004f9, DIRECT_SINGLE, 1099, + 0x000531, 0x000556, INTEGER_OFFSET, 48, + 0x0005da, 0x0005da, ALTERNATING_AL, 0, + 0x0005dd, 0x0005df, ALTERNATING_UL, 0, + 0x0005e3, 0x0005e5, ALTERNATING_UL, 0, + 0x000622, 0x000623, DIRECT_SINGLE, 1575, + 0x000624, 0x000624, INTEGER_OFFSET, 36, + 0x000625, 0x000625, INTEGER_OFFSET, 2, + 0x000626, 0x000626, INTEGER_OFFSET, 36, + 0x000660, 0x000669, INTEGER_OFFSET, -1584, + 0x0006c0, 0x0006c0, INTEGER_OFFSET, 21, + 0x0006c2, 0x0006c2, INTEGER_OFFSET, -1, + 0x0006d3, 0x0006d3, INTEGER_OFFSET, -1, + 0x0006f0, 0x0006f9, INTEGER_OFFSET, -1728, + 0x000929, 0x000929, INTEGER_OFFSET, -1, + 0x000931, 0x000931, INTEGER_OFFSET, -1, + 0x000934, 0x000934, INTEGER_OFFSET, -1, + 0x0009cb, 0x0009cc, DIRECT_SINGLE, 2503, + 0x000b48, 0x000b4c, DIRECT_SINGLE, 2887, + 0x000b94, 0x000b94, INTEGER_OFFSET, -2, + 0x000bca, 0x000bcb, INTEGER_OFFSET, -4, + 0x000bcc, 0x000bcc, INTEGER_OFFSET, -6, + 0x000c48, 0x000c48, INTEGER_OFFSET, -2, + 0x000cc0, 0x000cc0, INTEGER_OFFSET, -1, + 0x000cc7, 0x000ccb, DIRECT_SINGLE, 3270, + 0x000d4a, 0x000d4b, INTEGER_OFFSET, -4, + 0x000d4c, 0x000d4c, INTEGER_OFFSET, -6, + 0x000dda, 0x000dde, DIRECT_SINGLE, 3545, + 0x000f73, 0x000f81, DIRECT_SINGLE, 3953, + 0x001026, 0x001026, INTEGER_OFFSET, -1, + 0x0010a0, 0x0010c5, INTEGER_OFFSET, 48, + 0x001e00, 0x001e01, DIRECT_SINGLE, 97, + 0x001e02, 0x001e07, DIRECT_SINGLE, 98, + 0x001e08, 0x001e09, DIRECT_SINGLE, 99, + 0x001e0a, 0x001e13, DIRECT_SINGLE, 100, + 0x001e14, 0x001e1d, DIRECT_SINGLE, 101, + 0x001e1e, 0x001e1f, DIRECT_SINGLE, 102, + 0x001e20, 0x001e21, DIRECT_SINGLE, 103, + 0x001e22, 0x001e2b, DIRECT_SINGLE, 104, + 0x001e2c, 0x001e2f, DIRECT_SINGLE, 105, + 0x001e30, 0x001e35, DIRECT_SINGLE, 107, + 0x001e36, 0x001e3d, DIRECT_SINGLE, 108, + 0x001e3e, 0x001e43, DIRECT_SINGLE, 109, + 0x001e44, 0x001e4b, DIRECT_SINGLE, 110, + 0x001e4c, 0x001e53, DIRECT_SINGLE, 111, + 0x001e54, 0x001e57, DIRECT_SINGLE, 112, + 0x001e58, 0x001e5f, DIRECT_SINGLE, 114, + 0x001e60, 0x001e69, DIRECT_SINGLE, 115, + 0x001e6a, 0x001e71, DIRECT_SINGLE, 116, + 0x001e72, 0x001e7b, DIRECT_SINGLE, 117, + 0x001e7c, 0x001e7f, DIRECT_SINGLE, 118, + 0x001e80, 0x001e89, DIRECT_SINGLE, 119, + 0x001e8a, 0x001e8d, DIRECT_SINGLE, 120, + 0x001e8e, 0x001e8f, DIRECT_SINGLE, 121, + 0x001e90, 0x001e95, DIRECT_SINGLE, 122, + 0x001e96, 0x001e96, INTEGER_OFFSET, -7726, + 0x001e97, 0x001e97, INTEGER_OFFSET, -7715, + 0x001e98, 0x001e98, INTEGER_OFFSET, -7713, + 0x001e99, 0x001e99, INTEGER_OFFSET, -7712, + 0x001e9a, 0x001e9a, INTEGER_OFFSET, 1106361, + 0x001e9b, 0x001e9b, INTEGER_OFFSET, -7720, + 0x001ea0, 0x001eb7, DIRECT_SINGLE, 97, + 0x001eb8, 0x001ec7, DIRECT_SINGLE, 101, + 0x001ec8, 0x001ecb, DIRECT_SINGLE, 105, + 0x001ecc, 0x001ee3, DIRECT_SINGLE, 111, + 0x001ee4, 0x001ef1, DIRECT_SINGLE, 117, + 0x001ef2, 0x001ef9, DIRECT_SINGLE, 121, + 0x001f00, 0x001f0f, DIRECT_SINGLE, 945, + 0x001f10, 0x001f1d, DIRECT_SINGLE, 949, + 0x001f20, 0x001f2f, DIRECT_SINGLE, 951, + 0x001f30, 0x001f3f, DIRECT_SINGLE, 953, + 0x001f40, 0x001f4d, DIRECT_SINGLE, 959, + 0x001f50, 0x001f5f, DIRECT_SINGLE, 965, + 0x001f60, 0x001f6f, DIRECT_SINGLE, 969, + 0x001f70, 0x001f70, INTEGER_OFFSET, -7103, + 0x001f72, 0x001f72, INTEGER_OFFSET, -7101, + 0x001f74, 0x001f74, INTEGER_OFFSET, -7101, + 0x001f76, 0x001f76, INTEGER_OFFSET, -7101, + 0x001f78, 0x001f78, INTEGER_OFFSET, -7097, + 0x001f7a, 0x001f7a, INTEGER_OFFSET, -7093, + 0x001f7c, 0x001f7c, INTEGER_OFFSET, -7091, + 0x001f80, 0x001f8f, DIRECT_SINGLE, 945, + 0x001f90, 0x001f9f, DIRECT_SINGLE, 951, + 0x001fa0, 0x001faf, DIRECT_SINGLE, 969, + 0x001fb0, 0x001fba, DIRECT_SINGLE, 945, + 0x001fbb, 0x001fbb, INTEGER_OFFSET, -74, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, -7179, + 0x001fbe, 0x001fbe, INTEGER_OFFSET, -7173, + 0x001fc1, 0x001fc1, INTEGER_OFFSET, -7961, + 0x001fc2, 0x001fc7, DIRECT_SINGLE, 951, + 0x001fc8, 0x001fc8, INTEGER_OFFSET, -7187, + 0x001fc9, 0x001fc9, INTEGER_OFFSET, -86, + 0x001fca, 0x001fca, INTEGER_OFFSET, -7187, + 0x001fcb, 0x001fcb, INTEGER_OFFSET, -86, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, -7189, + 0x001fcd, 0x001fcf, DIRECT_SINGLE, 8127, + 0x001fd0, 0x001fda, DIRECT_SINGLE, 953, + 0x001fdb, 0x001fdb, INTEGER_OFFSET, -100, + 0x001fdd, 0x001fdf, DIRECT_SINGLE, 8190, + 0x001fe0, 0x001fe2, DIRECT_SINGLE, 965, + 0x001fe4, 0x001fe5, DIRECT_SINGLE, 961, + 0x001fe6, 0x001fea, DIRECT_SINGLE, 965, + 0x001feb, 0x001feb, INTEGER_OFFSET, -112, + 0x001fec, 0x001fec, INTEGER_OFFSET, -7211, + 0x001fed, 0x001fed, INTEGER_OFFSET, -8005, + 0x001ff2, 0x001ff7, DIRECT_SINGLE, 969, + 0x001ff8, 0x001ff8, INTEGER_OFFSET, -7225, + 0x001ff9, 0x001ff9, INTEGER_OFFSET, -128, + 0x001ffa, 0x001ffa, INTEGER_OFFSET, -7217, + 0x001ffb, 0x001ffb, INTEGER_OFFSET, -126, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, -7219, + 0x002070, 0x002070, INTEGER_OFFSET, -8256, + 0x002074, 0x002079, INTEGER_OFFSET, -8256, + 0x00207f, 0x00207f, INTEGER_OFFSET, -8209, + 0x002080, 0x002089, INTEGER_OFFSET, -8272, + 0x002102, 0x002103, DIRECT_SINGLE, 99, + 0x002105, 0x002105, INTEGER_OFFSET, 1105814, + 0x002109, 0x00210b, INTEGER_OFFSET, -8355, + 0x00210c, 0x00210f, DIRECT_SINGLE, 104, + 0x002110, 0x002111, DIRECT_SINGLE, 105, + 0x002112, 0x002113, DIRECT_SINGLE, 108, + 0x002115, 0x002115, INTEGER_OFFSET, -8359, + 0x002116, 0x002116, INTEGER_OFFSET, 1105798, + 0x002119, 0x00211b, INTEGER_OFFSET, -8361, + 0x00211c, 0x00211d, DIRECT_SINGLE, 114, + 0x002120, 0x002122, INTEGER_OFFSET, 1105789, + 0x002124, 0x002124, INTEGER_OFFSET, -8362, + 0x002126, 0x002126, INTEGER_OFFSET, -7517, + 0x002128, 0x002128, INTEGER_OFFSET, -8366, + 0x00212a, 0x00212a, INTEGER_OFFSET, -8383, + 0x00212b, 0x00212c, INTEGER_OFFSET, -8394, + 0x00212f, 0x002130, DIRECT_SINGLE, 101, + 0x002131, 0x002131, INTEGER_OFFSET, -8395, + 0x002133, 0x002133, INTEGER_OFFSET, -8390, + 0x002134, 0x002134, INTEGER_OFFSET, -8389, + 0x00215f, 0x00215f, INTEGER_OFFSET, -8494, + 0x002160, 0x002160, INTEGER_OFFSET, -8439, + 0x002161, 0x002163, INTEGER_OFFSET, 1105727, + 0x002164, 0x002164, INTEGER_OFFSET, -8430, + 0x002165, 0x002166, INTEGER_OFFSET, 1105726, + 0x002167, 0x002168, INTEGER_OFFSET, 1105725, + 0x002169, 0x002169, INTEGER_OFFSET, -8433, + 0x00216a, 0x00216b, INTEGER_OFFSET, 1105724, + 0x00216c, 0x00216c, INTEGER_OFFSET, -8448, + 0x00216d, 0x00216e, INTEGER_OFFSET, -8458, + 0x00216f, 0x00216f, INTEGER_OFFSET, -8450, + 0x002170, 0x002170, INTEGER_OFFSET, -8455, + 0x002171, 0x002173, INTEGER_OFFSET, 1105711, + 0x002174, 0x002174, INTEGER_OFFSET, -8446, + 0x002175, 0x002176, INTEGER_OFFSET, 1105710, + 0x002177, 0x002178, INTEGER_OFFSET, 1105709, + 0x002179, 0x002179, INTEGER_OFFSET, -8449, + 0x00217a, 0x00217b, INTEGER_OFFSET, 1105708, + 0x00217c, 0x00217c, INTEGER_OFFSET, -8464, + 0x00217d, 0x00217e, INTEGER_OFFSET, -8474, + 0x00217f, 0x00217f, INTEGER_OFFSET, -8466, + 0x00219a, 0x00219a, INTEGER_OFFSET, -10, + 0x00219b, 0x00219b, INTEGER_OFFSET, -9, + 0x0021ae, 0x0021ae, INTEGER_OFFSET, -26, + 0x0021cd, 0x0021cd, INTEGER_OFFSET, 3, + 0x0021ce, 0x0021ce, INTEGER_OFFSET, 6, + 0x0021cf, 0x0021cf, INTEGER_OFFSET, 3, + 0x002204, 0x002204, INTEGER_OFFSET, -1, + 0x002209, 0x002209, INTEGER_OFFSET, -1, + 0x00220c, 0x00220c, INTEGER_OFFSET, -1, + 0x002222, 0x002222, ALTERNATING_AL, 0, + 0x002224, 0x002224, INTEGER_OFFSET, -1, + 0x002226, 0x002226, INTEGER_OFFSET, -1, + 0x002241, 0x002241, INTEGER_OFFSET, -5, + 0x002244, 0x002244, INTEGER_OFFSET, -1, + 0x002247, 0x002247, INTEGER_OFFSET, -2, + 0x002249, 0x002249, INTEGER_OFFSET, -1, + 0x002260, 0x002260, INTEGER_OFFSET, -8739, + 0x002262, 0x002262, INTEGER_OFFSET, -1, + 0x00226d, 0x00226d, INTEGER_OFFSET, -32, + 0x00226e, 0x00226e, INTEGER_OFFSET, -8754, + 0x00226f, 0x00226f, INTEGER_OFFSET, -8753, + 0x002270, 0x002271, INTEGER_OFFSET, -12, + 0x002274, 0x002275, INTEGER_OFFSET, -2, + 0x002278, 0x002279, INTEGER_OFFSET, -2, + 0x002280, 0x002281, INTEGER_OFFSET, -6, + 0x002284, 0x002285, INTEGER_OFFSET, -2, + 0x002288, 0x002289, INTEGER_OFFSET, -2, + 0x0022ac, 0x0022ac, INTEGER_OFFSET, -10, + 0x0022ad, 0x0022ae, INTEGER_OFFSET, -5, + 0x0022af, 0x0022af, INTEGER_OFFSET, -4, + 0x0022e0, 0x0022e1, INTEGER_OFFSET, -100, + 0x0022e2, 0x0022e3, INTEGER_OFFSET, -81, + 0x0022ea, 0x0022ed, INTEGER_OFFSET, -56, + 0x002460, 0x002468, INTEGER_OFFSET, -9263, + 0x002474, 0x00247c, INTEGER_OFFSET, -9283, + 0x002488, 0x002490, INTEGER_OFFSET, -9303, + 0x00249c, 0x0024b5, INTEGER_OFFSET, -9275, + 0x0024b6, 0x0024cf, INTEGER_OFFSET, -9301, + 0x0024d0, 0x0024e9, INTEGER_OFFSET, -9327, + 0x0024ea, 0x0024ea, INTEGER_OFFSET, -9402, + 0x00277d, 0x00277e, INTEGER_OFFSET, -10053, + 0x002787, 0x002788, INTEGER_OFFSET, -10063, + 0x002791, 0x002792, INTEGER_OFFSET, -10073, + 0x003007, 0x003007, INTEGER_OFFSET, -12247, + 0x003021, 0x003029, INTEGER_OFFSET, -12272, + 0x00304c, 0x00304c, INTEGER_OFFSET, -1, + 0x00304e, 0x00304e, INTEGER_OFFSET, -1, + 0x003050, 0x003050, INTEGER_OFFSET, -1, + 0x003052, 0x003052, INTEGER_OFFSET, -1, + 0x003054, 0x003054, INTEGER_OFFSET, -1, + 0x003056, 0x003056, INTEGER_OFFSET, -1, + 0x003058, 0x003058, INTEGER_OFFSET, -1, + 0x00305a, 0x00305a, INTEGER_OFFSET, -1, + 0x00305c, 0x00305c, INTEGER_OFFSET, -1, + 0x00305e, 0x00305e, INTEGER_OFFSET, -1, + 0x003060, 0x003060, INTEGER_OFFSET, -1, + 0x003062, 0x003062, INTEGER_OFFSET, -1, + 0x003065, 0x003065, INTEGER_OFFSET, -1, + 0x003067, 0x003067, INTEGER_OFFSET, -1, + 0x003069, 0x003069, INTEGER_OFFSET, -1, + 0x003070, 0x003071, DIRECT_SINGLE, 12399, + 0x003073, 0x003074, DIRECT_SINGLE, 12402, + 0x003076, 0x003077, DIRECT_SINGLE, 12405, + 0x003079, 0x00307a, DIRECT_SINGLE, 12408, + 0x00307c, 0x00307d, DIRECT_SINGLE, 12411, + 0x003094, 0x003094, INTEGER_OFFSET, -78, + 0x00309e, 0x00309e, INTEGER_OFFSET, -1, + 0x0030ac, 0x0030ac, INTEGER_OFFSET, -1, + 0x0030ae, 0x0030ae, INTEGER_OFFSET, -1, + 0x0030b0, 0x0030b0, INTEGER_OFFSET, -1, + 0x0030b2, 0x0030b2, INTEGER_OFFSET, -1, + 0x0030b4, 0x0030b4, INTEGER_OFFSET, -1, + 0x0030b6, 0x0030b6, INTEGER_OFFSET, -1, + 0x0030b8, 0x0030b8, INTEGER_OFFSET, -1, + 0x0030ba, 0x0030ba, INTEGER_OFFSET, -1, + 0x0030bc, 0x0030bc, INTEGER_OFFSET, -1, + 0x0030be, 0x0030be, INTEGER_OFFSET, -1, + 0x0030c0, 0x0030c0, INTEGER_OFFSET, -1, + 0x0030c2, 0x0030c2, INTEGER_OFFSET, -1, + 0x0030c5, 0x0030c5, INTEGER_OFFSET, -1, + 0x0030c7, 0x0030c7, INTEGER_OFFSET, -1, + 0x0030c9, 0x0030c9, INTEGER_OFFSET, -1, + 0x0030d0, 0x0030d1, DIRECT_SINGLE, 12495, + 0x0030d3, 0x0030d4, DIRECT_SINGLE, 12498, + 0x0030d6, 0x0030d7, DIRECT_SINGLE, 12501, + 0x0030d9, 0x0030da, DIRECT_SINGLE, 12504, + 0x0030dc, 0x0030dd, DIRECT_SINGLE, 12507, + 0x0030f4, 0x0030f4, INTEGER_OFFSET, -78, + 0x0030f7, 0x0030fa, INTEGER_OFFSET, -8, + 0x0030fe, 0x0030fe, INTEGER_OFFSET, -1, + 0x00f8e2, 0x00f8e3, DIRECT_SINGLE, 1102, + 0x00f8e4, 0x00f8e5, DIRECT_SINGLE, 1099, + 0x00f8e6, 0x00f8e7, DIRECT_SINGLE, 1098, + 0x00f8e8, 0x00f8e9, DIRECT_SINGLE, 1091, + 0x00f8ea, 0x00f8eb, DIRECT_SINGLE, 1086, + 0x00f8ec, 0x00f8ed, DIRECT_SINGLE, 1080, + 0x00f8ee, 0x00f8ef, DIRECT_SINGLE, 1101, + 0x00f8f0, 0x00f8f1, DIRECT_SINGLE, 1072, + 0x00f8f6, 0x00f8f6, INTEGER_OFFSET, -63615, + 0x00f8f7, 0x00f8f7, INTEGER_OFFSET, -63625, + 0x00f8f8, 0x00f8f8, INTEGER_OFFSET, -63631, + 0x00f8f9, 0x00f8f9, INTEGER_OFFSET, -63618, + 0x00f8fa, 0x00f8fa, INTEGER_OFFSET, -63633, + 0x00fb00, 0x00fb00, INTEGER_OFFSET, 1050024, + 0x00fb01, 0x00fb05, INTEGER_OFFSET, 1049990, + 0x00fb06, 0x00fb06, INTEGER_OFFSET, 1049989, + 0x00ff10, 0x00ff19, INTEGER_OFFSET, -65248, + 0x00ff21, 0x00ff3a, INTEGER_OFFSET, -65216, + 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -65248, + 0x010400, 0x010425, INTEGER_OFFSET, 40, + }); + public static final CodePointSet FOLDABLE_CHARACTERS = rangeSet(0x000041, 0x00005a, 0x0000b5, 0x0000b5, 0x0000c0, 0x0000d6, 0x0000d8, 0x0000de, 0x000100, 0x000100, 0x000102, 0x000102, 0x000104, + 0x000104, 0x000106, 0x000106, 0x000108, 0x000108, 0x00010a, 0x00010a, 0x00010c, 0x00010c, 0x00010e, 0x00010e, 0x000110, 0x000110, 0x000112, 0x000112, 0x000114, 0x000114, 0x000116, + 0x000116, 0x000118, 0x000118, 0x00011a, 0x00011a, 0x00011c, 0x00011c, 0x00011e, 0x00011e, 0x000120, 0x000120, 0x000122, 0x000122, 0x000124, 0x000124, 0x000126, 0x000126, 0x000128, + 0x000128, 0x00012a, 0x00012a, 0x00012c, 0x00012c, 0x00012e, 0x00012e, 0x000132, 0x000132, 0x000134, 0x000134, 0x000136, 0x000136, 0x000139, 0x000139, 0x00013b, 0x00013b, 0x00013d, + 0x00013d, 0x00013f, 0x00013f, 0x000141, 0x000141, 0x000143, 0x000143, 0x000145, 0x000145, 0x000147, 0x000147, 0x00014a, 0x00014a, 0x00014c, 0x00014c, 0x00014e, 0x00014e, 0x000150, + 0x000150, 0x000152, 0x000152, 0x000154, 0x000154, 0x000156, 0x000156, 0x000158, 0x000158, 0x00015a, 0x00015a, 0x00015c, 0x00015c, 0x00015e, 0x00015e, 0x000160, 0x000160, 0x000162, + 0x000162, 0x000164, 0x000164, 0x000166, 0x000166, 0x000168, 0x000168, 0x00016a, 0x00016a, 0x00016c, 0x00016c, 0x00016e, 0x00016e, 0x000170, 0x000170, 0x000172, 0x000172, 0x000174, + 0x000174, 0x000176, 0x000176, 0x000178, 0x000179, 0x00017b, 0x00017b, 0x00017d, 0x00017d, 0x00017f, 0x00017f, 0x000181, 0x000182, 0x000184, 0x000184, 0x000186, 0x000187, 0x000189, + 0x00018b, 0x00018e, 0x000191, 0x000193, 0x000194, 0x000196, 0x000198, 0x00019c, 0x00019d, 0x00019f, 0x0001a0, 0x0001a2, 0x0001a2, 0x0001a4, 0x0001a4, 0x0001a6, 0x0001a7, 0x0001a9, + 0x0001a9, 0x0001ac, 0x0001ac, 0x0001ae, 0x0001af, 0x0001b1, 0x0001b3, 0x0001b5, 0x0001b5, 0x0001b7, 0x0001b8, 0x0001bc, 0x0001bc, 0x0001c4, 0x0001c5, 0x0001c7, 0x0001c8, 0x0001ca, + 0x0001cb, 0x0001cd, 0x0001cd, 0x0001cf, 0x0001cf, 0x0001d1, 0x0001d1, 0x0001d3, 0x0001d3, 0x0001d5, 0x0001d5, 0x0001d7, 0x0001d7, 0x0001d9, 0x0001d9, 0x0001db, 0x0001db, 0x0001de, + 0x0001de, 0x0001e0, 0x0001e0, 0x0001e2, 0x0001e2, 0x0001e4, 0x0001e4, 0x0001e6, 0x0001e6, 0x0001e8, 0x0001e8, 0x0001ea, 0x0001ea, 0x0001ec, 0x0001ec, 0x0001ee, 0x0001ee, 0x0001f1, + 0x0001f2, 0x0001f4, 0x0001f4, 0x0001f6, 0x0001f8, 0x0001fa, 0x0001fa, 0x0001fc, 0x0001fc, 0x0001fe, 0x0001fe, 0x000200, 0x000200, 0x000202, 0x000202, 0x000204, 0x000204, 0x000206, + 0x000206, 0x000208, 0x000208, 0x00020a, 0x00020a, 0x00020c, 0x00020c, 0x00020e, 0x00020e, 0x000210, 0x000210, 0x000212, 0x000212, 0x000214, 0x000214, 0x000216, 0x000216, 0x000218, + 0x000218, 0x00021a, 0x00021a, 0x00021c, 0x00021c, 0x00021e, 0x00021e, 0x000220, 0x000220, 0x000222, 0x000222, 0x000224, 0x000224, 0x000226, 0x000226, 0x000228, 0x000228, 0x00022a, + 0x00022a, 0x00022c, 0x00022c, 0x00022e, 0x00022e, 0x000230, 0x000230, 0x000232, 0x000232, 0x00023a, 0x00023b, 0x00023d, 0x00023e, 0x000241, 0x000241, 0x000243, 0x000246, 0x000248, + 0x000248, 0x00024a, 0x00024a, 0x00024c, 0x00024c, 0x00024e, 0x00024e, 0x000345, 0x000345, 0x000370, 0x000370, 0x000372, 0x000372, 0x000376, 0x000376, 0x00037f, 0x00037f, 0x000386, + 0x000386, 0x000388, 0x00038a, 0x00038c, 0x00038c, 0x00038e, 0x00038f, 0x000391, 0x0003a1, 0x0003a3, 0x0003ab, 0x0003c2, 0x0003c2, 0x0003cf, 0x0003d1, 0x0003d5, 0x0003d6, 0x0003d8, + 0x0003d8, 0x0003da, 0x0003da, 0x0003dc, 0x0003dc, 0x0003de, 0x0003de, 0x0003e0, 0x0003e0, 0x0003e2, 0x0003e2, 0x0003e4, 0x0003e4, 0x0003e6, 0x0003e6, 0x0003e8, 0x0003e8, 0x0003ea, + 0x0003ea, 0x0003ec, 0x0003ec, 0x0003ee, 0x0003ee, 0x0003f0, 0x0003f1, 0x0003f4, 0x0003f5, 0x0003f7, 0x0003f7, 0x0003f9, 0x0003fa, 0x0003fd, 0x00042f, 0x000460, 0x000460, 0x000462, + 0x000462, 0x000464, 0x000464, 0x000466, 0x000466, 0x000468, 0x000468, 0x00046a, 0x00046a, 0x00046c, 0x00046c, 0x00046e, 0x00046e, 0x000470, 0x000470, 0x000472, 0x000472, 0x000474, + 0x000474, 0x000476, 0x000476, 0x000478, 0x000478, 0x00047a, 0x00047a, 0x00047c, 0x00047c, 0x00047e, 0x00047e, 0x000480, 0x000480, 0x00048a, 0x00048a, 0x00048c, 0x00048c, 0x00048e, + 0x00048e, 0x000490, 0x000490, 0x000492, 0x000492, 0x000494, 0x000494, 0x000496, 0x000496, 0x000498, 0x000498, 0x00049a, 0x00049a, 0x00049c, 0x00049c, 0x00049e, 0x00049e, 0x0004a0, + 0x0004a0, 0x0004a2, 0x0004a2, 0x0004a4, 0x0004a4, 0x0004a6, 0x0004a6, 0x0004a8, 0x0004a8, 0x0004aa, 0x0004aa, 0x0004ac, 0x0004ac, 0x0004ae, 0x0004ae, 0x0004b0, 0x0004b0, 0x0004b2, + 0x0004b2, 0x0004b4, 0x0004b4, 0x0004b6, 0x0004b6, 0x0004b8, 0x0004b8, 0x0004ba, 0x0004ba, 0x0004bc, 0x0004bc, 0x0004be, 0x0004be, 0x0004c0, 0x0004c1, 0x0004c3, 0x0004c3, 0x0004c5, + 0x0004c5, 0x0004c7, 0x0004c7, 0x0004c9, 0x0004c9, 0x0004cb, 0x0004cb, 0x0004cd, 0x0004cd, 0x0004d0, 0x0004d0, 0x0004d2, 0x0004d2, 0x0004d4, 0x0004d4, 0x0004d6, 0x0004d6, 0x0004d8, + 0x0004d8, 0x0004da, 0x0004da, 0x0004dc, 0x0004dc, 0x0004de, 0x0004de, 0x0004e0, 0x0004e0, 0x0004e2, 0x0004e2, 0x0004e4, 0x0004e4, 0x0004e6, 0x0004e6, 0x0004e8, 0x0004e8, 0x0004ea, + 0x0004ea, 0x0004ec, 0x0004ec, 0x0004ee, 0x0004ee, 0x0004f0, 0x0004f0, 0x0004f2, 0x0004f2, 0x0004f4, 0x0004f4, 0x0004f6, 0x0004f6, 0x0004f8, 0x0004f8, 0x0004fa, 0x0004fa, 0x0004fc, + 0x0004fc, 0x0004fe, 0x0004fe, 0x000500, 0x000500, 0x000502, 0x000502, 0x000504, 0x000504, 0x000506, 0x000506, 0x000508, 0x000508, 0x00050a, 0x00050a, 0x00050c, 0x00050c, 0x00050e, + 0x00050e, 0x000510, 0x000510, 0x000512, 0x000512, 0x000514, 0x000514, 0x000516, 0x000516, 0x000518, 0x000518, 0x00051a, 0x00051a, 0x00051c, 0x00051c, 0x00051e, 0x00051e, 0x000520, + 0x000520, 0x000522, 0x000522, 0x000524, 0x000524, 0x000526, 0x000526, 0x000528, 0x000528, 0x00052a, 0x00052a, 0x00052c, 0x00052c, 0x00052e, 0x00052e, 0x000531, 0x000556, 0x0010a0, + 0x0010c5, 0x0010c7, 0x0010c7, 0x0010cd, 0x0010cd, 0x0013f8, 0x0013fd, 0x001c80, 0x001c88, 0x001c90, 0x001cba, 0x001cbd, 0x001cbf, 0x001e00, 0x001e00, 0x001e02, 0x001e02, 0x001e04, + 0x001e04, 0x001e06, 0x001e06, 0x001e08, 0x001e08, 0x001e0a, 0x001e0a, 0x001e0c, 0x001e0c, 0x001e0e, 0x001e0e, 0x001e10, 0x001e10, 0x001e12, 0x001e12, 0x001e14, 0x001e14, 0x001e16, + 0x001e16, 0x001e18, 0x001e18, 0x001e1a, 0x001e1a, 0x001e1c, 0x001e1c, 0x001e1e, 0x001e1e, 0x001e20, 0x001e20, 0x001e22, 0x001e22, 0x001e24, 0x001e24, 0x001e26, 0x001e26, 0x001e28, + 0x001e28, 0x001e2a, 0x001e2a, 0x001e2c, 0x001e2c, 0x001e2e, 0x001e2e, 0x001e30, 0x001e30, 0x001e32, 0x001e32, 0x001e34, 0x001e34, 0x001e36, 0x001e36, 0x001e38, 0x001e38, 0x001e3a, + 0x001e3a, 0x001e3c, 0x001e3c, 0x001e3e, 0x001e3e, 0x001e40, 0x001e40, 0x001e42, 0x001e42, 0x001e44, 0x001e44, 0x001e46, 0x001e46, 0x001e48, 0x001e48, 0x001e4a, 0x001e4a, 0x001e4c, + 0x001e4c, 0x001e4e, 0x001e4e, 0x001e50, 0x001e50, 0x001e52, 0x001e52, 0x001e54, 0x001e54, 0x001e56, 0x001e56, 0x001e58, 0x001e58, 0x001e5a, 0x001e5a, 0x001e5c, 0x001e5c, 0x001e5e, + 0x001e5e, 0x001e60, 0x001e60, 0x001e62, 0x001e62, 0x001e64, 0x001e64, 0x001e66, 0x001e66, 0x001e68, 0x001e68, 0x001e6a, 0x001e6a, 0x001e6c, 0x001e6c, 0x001e6e, 0x001e6e, 0x001e70, + 0x001e70, 0x001e72, 0x001e72, 0x001e74, 0x001e74, 0x001e76, 0x001e76, 0x001e78, 0x001e78, 0x001e7a, 0x001e7a, 0x001e7c, 0x001e7c, 0x001e7e, 0x001e7e, 0x001e80, 0x001e80, 0x001e82, + 0x001e82, 0x001e84, 0x001e84, 0x001e86, 0x001e86, 0x001e88, 0x001e88, 0x001e8a, 0x001e8a, 0x001e8c, 0x001e8c, 0x001e8e, 0x001e8e, 0x001e90, 0x001e90, 0x001e92, 0x001e92, 0x001e94, + 0x001e94, 0x001e9b, 0x001e9b, 0x001e9e, 0x001e9e, 0x001ea0, 0x001ea0, 0x001ea2, 0x001ea2, 0x001ea4, 0x001ea4, 0x001ea6, 0x001ea6, 0x001ea8, 0x001ea8, 0x001eaa, 0x001eaa, 0x001eac, + 0x001eac, 0x001eae, 0x001eae, 0x001eb0, 0x001eb0, 0x001eb2, 0x001eb2, 0x001eb4, 0x001eb4, 0x001eb6, 0x001eb6, 0x001eb8, 0x001eb8, 0x001eba, 0x001eba, 0x001ebc, 0x001ebc, 0x001ebe, + 0x001ebe, 0x001ec0, 0x001ec0, 0x001ec2, 0x001ec2, 0x001ec4, 0x001ec4, 0x001ec6, 0x001ec6, 0x001ec8, 0x001ec8, 0x001eca, 0x001eca, 0x001ecc, 0x001ecc, 0x001ece, 0x001ece, 0x001ed0, + 0x001ed0, 0x001ed2, 0x001ed2, 0x001ed4, 0x001ed4, 0x001ed6, 0x001ed6, 0x001ed8, 0x001ed8, 0x001eda, 0x001eda, 0x001edc, 0x001edc, 0x001ede, 0x001ede, 0x001ee0, 0x001ee0, 0x001ee2, + 0x001ee2, 0x001ee4, 0x001ee4, 0x001ee6, 0x001ee6, 0x001ee8, 0x001ee8, 0x001eea, 0x001eea, 0x001eec, 0x001eec, 0x001eee, 0x001eee, 0x001ef0, 0x001ef0, 0x001ef2, 0x001ef2, 0x001ef4, + 0x001ef4, 0x001ef6, 0x001ef6, 0x001ef8, 0x001ef8, 0x001efa, 0x001efa, 0x001efc, 0x001efc, 0x001efe, 0x001efe, 0x001f08, 0x001f0f, 0x001f18, 0x001f1d, 0x001f28, 0x001f2f, 0x001f38, + 0x001f3f, 0x001f48, 0x001f4d, 0x001f59, 0x001f59, 0x001f5b, 0x001f5b, 0x001f5d, 0x001f5d, 0x001f5f, 0x001f5f, 0x001f68, 0x001f6f, 0x001f88, 0x001f8f, 0x001f98, 0x001f9f, 0x001fa8, + 0x001faf, 0x001fb8, 0x001fbc, 0x001fbe, 0x001fbe, 0x001fc8, 0x001fcc, 0x001fd8, 0x001fdb, 0x001fe8, 0x001fec, 0x001ff8, 0x001ffc, 0x002126, 0x002126, 0x00212a, 0x00212b, 0x002132, + 0x002132, 0x002160, 0x00216f, 0x002183, 0x002183, 0x0024b6, 0x0024cf, 0x002c00, 0x002c2f, 0x002c60, 0x002c60, 0x002c62, 0x002c64, 0x002c67, 0x002c67, 0x002c69, 0x002c69, 0x002c6b, + 0x002c6b, 0x002c6d, 0x002c70, 0x002c72, 0x002c72, 0x002c75, 0x002c75, 0x002c7e, 0x002c80, 0x002c82, 0x002c82, 0x002c84, 0x002c84, 0x002c86, 0x002c86, 0x002c88, 0x002c88, 0x002c8a, + 0x002c8a, 0x002c8c, 0x002c8c, 0x002c8e, 0x002c8e, 0x002c90, 0x002c90, 0x002c92, 0x002c92, 0x002c94, 0x002c94, 0x002c96, 0x002c96, 0x002c98, 0x002c98, 0x002c9a, 0x002c9a, 0x002c9c, + 0x002c9c, 0x002c9e, 0x002c9e, 0x002ca0, 0x002ca0, 0x002ca2, 0x002ca2, 0x002ca4, 0x002ca4, 0x002ca6, 0x002ca6, 0x002ca8, 0x002ca8, 0x002caa, 0x002caa, 0x002cac, 0x002cac, 0x002cae, + 0x002cae, 0x002cb0, 0x002cb0, 0x002cb2, 0x002cb2, 0x002cb4, 0x002cb4, 0x002cb6, 0x002cb6, 0x002cb8, 0x002cb8, 0x002cba, 0x002cba, 0x002cbc, 0x002cbc, 0x002cbe, 0x002cbe, 0x002cc0, + 0x002cc0, 0x002cc2, 0x002cc2, 0x002cc4, 0x002cc4, 0x002cc6, 0x002cc6, 0x002cc8, 0x002cc8, 0x002cca, 0x002cca, 0x002ccc, 0x002ccc, 0x002cce, 0x002cce, 0x002cd0, 0x002cd0, 0x002cd2, + 0x002cd2, 0x002cd4, 0x002cd4, 0x002cd6, 0x002cd6, 0x002cd8, 0x002cd8, 0x002cda, 0x002cda, 0x002cdc, 0x002cdc, 0x002cde, 0x002cde, 0x002ce0, 0x002ce0, 0x002ce2, 0x002ce2, 0x002ceb, + 0x002ceb, 0x002ced, 0x002ced, 0x002cf2, 0x002cf2, 0x00a640, 0x00a640, 0x00a642, 0x00a642, 0x00a644, 0x00a644, 0x00a646, 0x00a646, 0x00a648, 0x00a648, 0x00a64a, 0x00a64a, 0x00a64c, + 0x00a64c, 0x00a64e, 0x00a64e, 0x00a650, 0x00a650, 0x00a652, 0x00a652, 0x00a654, 0x00a654, 0x00a656, 0x00a656, 0x00a658, 0x00a658, 0x00a65a, 0x00a65a, 0x00a65c, 0x00a65c, 0x00a65e, + 0x00a65e, 0x00a660, 0x00a660, 0x00a662, 0x00a662, 0x00a664, 0x00a664, 0x00a666, 0x00a666, 0x00a668, 0x00a668, 0x00a66a, 0x00a66a, 0x00a66c, 0x00a66c, 0x00a680, 0x00a680, 0x00a682, + 0x00a682, 0x00a684, 0x00a684, 0x00a686, 0x00a686, 0x00a688, 0x00a688, 0x00a68a, 0x00a68a, 0x00a68c, 0x00a68c, 0x00a68e, 0x00a68e, 0x00a690, 0x00a690, 0x00a692, 0x00a692, 0x00a694, + 0x00a694, 0x00a696, 0x00a696, 0x00a698, 0x00a698, 0x00a69a, 0x00a69a, 0x00a722, 0x00a722, 0x00a724, 0x00a724, 0x00a726, 0x00a726, 0x00a728, 0x00a728, 0x00a72a, 0x00a72a, 0x00a72c, + 0x00a72c, 0x00a72e, 0x00a72e, 0x00a732, 0x00a732, 0x00a734, 0x00a734, 0x00a736, 0x00a736, 0x00a738, 0x00a738, 0x00a73a, 0x00a73a, 0x00a73c, 0x00a73c, 0x00a73e, 0x00a73e, 0x00a740, + 0x00a740, 0x00a742, 0x00a742, 0x00a744, 0x00a744, 0x00a746, 0x00a746, 0x00a748, 0x00a748, 0x00a74a, 0x00a74a, 0x00a74c, 0x00a74c, 0x00a74e, 0x00a74e, 0x00a750, 0x00a750, 0x00a752, + 0x00a752, 0x00a754, 0x00a754, 0x00a756, 0x00a756, 0x00a758, 0x00a758, 0x00a75a, 0x00a75a, 0x00a75c, 0x00a75c, 0x00a75e, 0x00a75e, 0x00a760, 0x00a760, 0x00a762, 0x00a762, 0x00a764, + 0x00a764, 0x00a766, 0x00a766, 0x00a768, 0x00a768, 0x00a76a, 0x00a76a, 0x00a76c, 0x00a76c, 0x00a76e, 0x00a76e, 0x00a779, 0x00a779, 0x00a77b, 0x00a77b, 0x00a77d, 0x00a77e, 0x00a780, + 0x00a780, 0x00a782, 0x00a782, 0x00a784, 0x00a784, 0x00a786, 0x00a786, 0x00a78b, 0x00a78b, 0x00a78d, 0x00a78d, 0x00a790, 0x00a790, 0x00a792, 0x00a792, 0x00a796, 0x00a796, 0x00a798, + 0x00a798, 0x00a79a, 0x00a79a, 0x00a79c, 0x00a79c, 0x00a79e, 0x00a79e, 0x00a7a0, 0x00a7a0, 0x00a7a2, 0x00a7a2, 0x00a7a4, 0x00a7a4, 0x00a7a6, 0x00a7a6, 0x00a7a8, 0x00a7a8, 0x00a7aa, + 0x00a7ae, 0x00a7b0, 0x00a7b4, 0x00a7b6, 0x00a7b6, 0x00a7b8, 0x00a7b8, 0x00a7ba, 0x00a7ba, 0x00a7bc, 0x00a7bc, 0x00a7be, 0x00a7be, 0x00a7c0, 0x00a7c0, 0x00a7c2, 0x00a7c2, 0x00a7c4, + 0x00a7c7, 0x00a7c9, 0x00a7c9, 0x00a7d0, 0x00a7d0, 0x00a7d6, 0x00a7d6, 0x00a7d8, 0x00a7d8, 0x00a7f5, 0x00a7f5, 0x00ab70, 0x00abbf, 0x00ff21, 0x00ff3a, 0x010400, 0x010427, 0x0104b0, + 0x0104d3, 0x010570, 0x01057a, 0x01057c, 0x01058a, 0x01058c, 0x010592, 0x010594, 0x010595, 0x010c80, 0x010cb2, 0x0118a0, 0x0118bf, 0x016e40, 0x016e5f, 0x01e900, 0x01e921); + + /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + private static final CaseUnfoldingTrie UNFOLDING_TRIE_RUBY = UNICODE_15_0_0_FULL.createCaseUnfoldTrie(); + private static final CaseUnfoldingTrie UNFOLDING_TRIE_ORACLE_DB = ORACLE_DB.createCaseUnfoldTrie(); + private static final CaseUnfoldingTrie UNFOLDING_TRIE_ORACLE_DB_AI = ORACLE_DB_AI.createCaseUnfoldTrie(); + + public static final CodePointSet FOLDED_CHARACTERS = FOLDABLE_CHARACTERS.createInverse(Encodings.UTF_32); + +} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java deleted file mode 100644 index aa926064402c..000000000000 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java +++ /dev/null @@ -1,2098 +0,0 @@ -/* - * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.truffle.regex.tregex.parser; - -import java.util.function.BiPredicate; - -import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; - -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.regex.charset.CodePointSet; -import com.oracle.truffle.regex.charset.CodePointSetAccumulator; -import com.oracle.truffle.regex.charset.Constants; -import com.oracle.truffle.regex.charset.Range; -import com.oracle.truffle.regex.charset.RangesBuffer; -import com.oracle.truffle.regex.charset.SortedListOfRanges; - -public class CaseFoldTable { - - public enum CaseFoldingAlgorithm { - ECMAScriptNonUnicode, - ECMAScriptUnicode, - PythonAscii, - PythonUnicode; - - public BiPredicate getEqualsPredicate() { - return (codePointA, codePointB) -> getTable(this).equalsIgnoreCase(codePointA, codePointB); - } - } - - private static CaseFoldTableImpl getTable(CaseFoldingAlgorithm algorithm) { - switch (algorithm) { - case ECMAScriptNonUnicode: - return NON_UNICODE_TABLE_ENTRIES; - case ECMAScriptUnicode: - return UNICODE_TABLE_ENTRIES; - case PythonAscii: - return PYTHON_ASCII_TABLE_ENTRIES; - case PythonUnicode: - return PYTHON_UNICODE_TABLE_ENTRIES; - default: - throw CompilerDirectives.shouldNotReachHere(); - } - } - - public static void applyCaseFoldUnfold(CodePointSetAccumulator codePointSet, CodePointSetAccumulator tmp, CaseFoldingAlgorithm algorithm) { - codePointSet.copyTo(tmp); - getTable(algorithm).applyCaseFold(codePointSet, tmp); - } - - public static CodePointSet simpleCaseFold(CodePointSet codePointSet, CodePointSetAccumulator tmp) { - tmp.addSet(codePointSet); - SIMPLE_CASE_FOLDING_ENTRIES.applyCaseFold(tmp, codePointSet); - tmp.intersectWith(Constants.FOLDED_CHARACTERS); - return tmp.toCodePointSet(); - } - - public static String simpleCaseFold(String string) { - int[] folded = string.codePoints().map(CaseFoldTable::simpleCaseFold).toArray(); - return new String(folded, 0, folded.length); - } - - public static int simpleCaseFold(int codePoint) { - return UCharacter.foldCase(codePoint, UCharacter.FOLD_CASE_DEFAULT); - } - - private static CodePointSet rangeSet(int... ranges) { - return CodePointSet.createNoDedup(ranges); - } - - private static final int INTEGER_OFFSET = 1; - private static final int DIRECT_MAPPING = 2; - private static final int ALTERNATING_UL = 3; - private static final int ALTERNATING_AL = 4; - - private static final class CaseFoldTableImpl implements SortedListOfRanges { - - private final int[] ranges; - - CaseFoldTableImpl(int[] ranges) { - this.ranges = ranges; - } - - void applyCaseFold(CodePointSetAccumulator dst, Iterable src) { - for (Range r : src) { - int search = binarySearch(r.lo); - if (binarySearchExactMatch(search, r.lo, r.hi)) { - apply(dst, search, r.lo, r.hi); - continue; - } - int firstIntersection = binarySearchGetFirstIntersecting(search, r.lo, r.hi); - if (binarySearchNoIntersectingFound(firstIntersection)) { - continue; - } - for (int j = firstIntersection; j < size(); j++) { - if (rightOf(j, r.lo, r.hi)) { - break; - } - assert intersects(j, r.lo, r.hi); - int intersectionLo = Math.max(getLo(j), r.lo); - int intersectionHi = Math.min(getHi(j), r.hi); - apply(dst, j, intersectionLo, intersectionHi); - } - } - } - - private void apply(CodePointSetAccumulator codePointSet, int tblEntryIndex, int intersectionLo, int intersectionHi) { - switch (ranges[tblEntryIndex * 4 + 2]) { - case INTEGER_OFFSET: - int delta = ranges[tblEntryIndex * 4 + 3]; - codePointSet.addRange(intersectionLo + delta, intersectionHi + delta); - break; - case DIRECT_MAPPING: - CodePointSet set = CHARACTER_SET_TABLE[ranges[tblEntryIndex * 4 + 3]]; - codePointSet.addSet(set); - break; - case ALTERNATING_UL: - int loUL = Math.min(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); - int hiUL = Math.max(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); - if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loUL, hiUL)) { - codePointSet.addRange(loUL, hiUL); - } - break; - case ALTERNATING_AL: - int loAL = Math.min(intersectionLo ^ 1, intersectionHi ^ 1); - int hiAL = Math.max(intersectionLo ^ 1, intersectionHi ^ 1); - if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loAL, hiAL)) { - codePointSet.addRange(loAL, hiAL); - } - break; - default: - throw CompilerDirectives.shouldNotReachHere(); - } - } - - boolean equalsIgnoreCase(int codePointA, int codePointB) { - if (codePointA == codePointB) { - return true; - } - int search = binarySearch(codePointA); - if (binarySearchExactMatch(search, codePointA, codePointA)) { - return equalsIgnoreCase(search, codePointA, codePointB); - } - int firstIntersection = binarySearchGetFirstIntersecting(search, codePointA, codePointA); - if (binarySearchNoIntersectingFound(firstIntersection) || rightOf(firstIntersection, codePointA, codePointA)) { - return false; - } - assert intersects(firstIntersection, codePointA, codePointA); - return equalsIgnoreCase(firstIntersection, codePointA, codePointB); - } - - private boolean equalsIgnoreCase(int tblEntryIndex, int codePointA, int codePointB) { - switch (ranges[tblEntryIndex * 4 + 2]) { - case INTEGER_OFFSET: - int delta = ranges[tblEntryIndex * 4 + 3]; - return codePointA + delta == codePointB; - case DIRECT_MAPPING: - CodePointSet set = CHARACTER_SET_TABLE[ranges[tblEntryIndex * 4 + 3]]; - return set.contains(codePointB); - case ALTERNATING_UL: - return ((codePointA - 1) ^ 1) + 1 == codePointB; - case ALTERNATING_AL: - return (codePointA ^ 1) == codePointB; - default: - throw CompilerDirectives.shouldNotReachHere(); - } - } - - @Override - public int getLo(int i) { - return ranges[i * 4]; - } - - @Override - public int getHi(int i) { - return ranges[i * 4 + 1]; - } - - @Override - public int size() { - return ranges.length / 4; - } - - @Override - public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) { - throw CompilerDirectives.shouldNotReachHere(); - } - } - - /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ - - private static final CodePointSet[] CHARACTER_SET_TABLE = new CodePointSet[]{ - rangeSet(0x0000b5, 0x0000b5, 0x00039c, 0x00039c, 0x0003bc, 0x0003bc), - rangeSet(0x0001c4, 0x0001c6), - rangeSet(0x0001c7, 0x0001c9), - rangeSet(0x0001ca, 0x0001cc), - rangeSet(0x0001f1, 0x0001f3), - rangeSet(0x000345, 0x000345, 0x000399, 0x000399, 0x0003b9, 0x0003b9, 0x001fbe, 0x001fbe), - rangeSet(0x000392, 0x000392, 0x0003b2, 0x0003b2, 0x0003d0, 0x0003d0), - rangeSet(0x000395, 0x000395, 0x0003b5, 0x0003b5, 0x0003f5, 0x0003f5), - rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1), - rangeSet(0x00039a, 0x00039a, 0x0003ba, 0x0003ba, 0x0003f0, 0x0003f0), - rangeSet(0x0003a0, 0x0003a0, 0x0003c0, 0x0003c0, 0x0003d6, 0x0003d6), - rangeSet(0x0003a1, 0x0003a1, 0x0003c1, 0x0003c1, 0x0003f1, 0x0003f1), - rangeSet(0x0003a3, 0x0003a3, 0x0003c2, 0x0003c3), - rangeSet(0x0003a6, 0x0003a6, 0x0003c6, 0x0003c6, 0x0003d5, 0x0003d5), - rangeSet(0x000412, 0x000412, 0x000432, 0x000432, 0x001c80, 0x001c80), - rangeSet(0x000414, 0x000414, 0x000434, 0x000434, 0x001c81, 0x001c81), - rangeSet(0x00041e, 0x00041e, 0x00043e, 0x00043e, 0x001c82, 0x001c82), - rangeSet(0x000421, 0x000421, 0x000441, 0x000441, 0x001c83, 0x001c83), - rangeSet(0x000422, 0x000422, 0x000442, 0x000442, 0x001c84, 0x001c85), - rangeSet(0x00042a, 0x00042a, 0x00044a, 0x00044a, 0x001c86, 0x001c86), - rangeSet(0x000462, 0x000463, 0x001c87, 0x001c87), - rangeSet(0x001c88, 0x001c88, 0x00a64a, 0x00a64b), - rangeSet(0x001e60, 0x001e61, 0x001e9b, 0x001e9b), - rangeSet(0x00004b, 0x00004b, 0x00006b, 0x00006b, 0x00212a, 0x00212a), - rangeSet(0x000053, 0x000053, 0x000073, 0x000073, 0x00017f, 0x00017f), - rangeSet(0x0000c5, 0x0000c5, 0x0000e5, 0x0000e5, 0x00212b, 0x00212b), - rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1, 0x0003f4, 0x0003f4), - rangeSet(0x0003a9, 0x0003a9, 0x0003c9, 0x0003c9, 0x002126, 0x002126), - rangeSet(0x000049, 0x000049, 0x000069, 0x000069, 0x000130, 0x000131)}; - - public static final CaseFoldTableImpl NON_UNICODE_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x00007a, INTEGER_OFFSET, -32, - 0x0000b5, 0x0000b5, DIRECT_MAPPING, 0, - 0x0000c0, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x0000e0, 0x0000f6, INTEGER_OFFSET, -32, - 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, - 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, - 0x000100, 0x00012f, ALTERNATING_AL, 0, - 0x000132, 0x000137, ALTERNATING_AL, 0, - 0x000139, 0x000148, ALTERNATING_UL, 0, - 0x00014a, 0x000177, ALTERNATING_AL, 0, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x00017e, ALTERNATING_UL, 0, - 0x000180, 0x000180, INTEGER_OFFSET, 195, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000185, ALTERNATING_AL, 0, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000188, ALTERNATING_UL, 0, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018c, ALTERNATING_UL, 0, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000192, ALTERNATING_UL, 0, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000195, 0x000195, INTEGER_OFFSET, 97, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000199, ALTERNATING_AL, 0, - 0x00019a, 0x00019a, INTEGER_OFFSET, 163, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019e, 0x00019e, INTEGER_OFFSET, 130, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001b0, ALTERNATING_UL, 0, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, - 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, - 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, - 0x0001c4, 0x0001c6, DIRECT_MAPPING, 1, - 0x0001c7, 0x0001c9, DIRECT_MAPPING, 2, - 0x0001ca, 0x0001cc, DIRECT_MAPPING, 3, - 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, - 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, - 0x0001de, 0x0001ef, ALTERNATING_AL, 0, - 0x0001f1, 0x0001f3, DIRECT_MAPPING, 4, - 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x00021f, ALTERNATING_AL, 0, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000233, ALTERNATING_AL, 0, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023c, ALTERNATING_UL, 0, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x00023f, 0x000240, INTEGER_OFFSET, 10815, - 0x000241, 0x000242, ALTERNATING_UL, 0, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x00024f, ALTERNATING_AL, 0, - 0x000250, 0x000250, INTEGER_OFFSET, 10783, - 0x000251, 0x000251, INTEGER_OFFSET, 10780, - 0x000252, 0x000252, INTEGER_OFFSET, 10782, - 0x000253, 0x000253, INTEGER_OFFSET, -210, - 0x000254, 0x000254, INTEGER_OFFSET, -206, - 0x000256, 0x000257, INTEGER_OFFSET, -205, - 0x000259, 0x000259, INTEGER_OFFSET, -202, - 0x00025b, 0x00025b, INTEGER_OFFSET, -203, - 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, - 0x000260, 0x000260, INTEGER_OFFSET, -205, - 0x000261, 0x000261, INTEGER_OFFSET, 42315, - 0x000263, 0x000263, INTEGER_OFFSET, -207, - 0x000265, 0x000265, INTEGER_OFFSET, 42280, - 0x000266, 0x000266, INTEGER_OFFSET, 42308, - 0x000268, 0x000268, INTEGER_OFFSET, -209, - 0x000269, 0x000269, INTEGER_OFFSET, -211, - 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, - 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, - 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, - 0x00026f, 0x00026f, INTEGER_OFFSET, -211, - 0x000271, 0x000271, INTEGER_OFFSET, 10749, - 0x000272, 0x000272, INTEGER_OFFSET, -213, - 0x000275, 0x000275, INTEGER_OFFSET, -214, - 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, - 0x000280, 0x000280, INTEGER_OFFSET, -218, - 0x000282, 0x000282, INTEGER_OFFSET, 42307, - 0x000283, 0x000283, INTEGER_OFFSET, -218, - 0x000287, 0x000287, INTEGER_OFFSET, 42282, - 0x000288, 0x000288, INTEGER_OFFSET, -218, - 0x000289, 0x000289, INTEGER_OFFSET, -69, - 0x00028a, 0x00028b, INTEGER_OFFSET, -217, - 0x00028c, 0x00028c, INTEGER_OFFSET, -71, - 0x000292, 0x000292, INTEGER_OFFSET, -219, - 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, - 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, - 0x000345, 0x000345, DIRECT_MAPPING, 5, - 0x000370, 0x000373, ALTERNATING_AL, 0, - 0x000376, 0x000377, ALTERNATING_AL, 0, - 0x00037b, 0x00037d, INTEGER_OFFSET, 130, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000391, 0x000391, INTEGER_OFFSET, 32, - 0x000392, 0x000392, DIRECT_MAPPING, 6, - 0x000393, 0x000394, INTEGER_OFFSET, 32, - 0x000395, 0x000395, DIRECT_MAPPING, 7, - 0x000396, 0x000397, INTEGER_OFFSET, 32, - 0x000398, 0x000398, DIRECT_MAPPING, 8, - 0x000399, 0x000399, DIRECT_MAPPING, 5, - 0x00039a, 0x00039a, DIRECT_MAPPING, 9, - 0x00039b, 0x00039b, INTEGER_OFFSET, 32, - 0x00039c, 0x00039c, DIRECT_MAPPING, 0, - 0x00039d, 0x00039f, INTEGER_OFFSET, 32, - 0x0003a0, 0x0003a0, DIRECT_MAPPING, 10, - 0x0003a1, 0x0003a1, DIRECT_MAPPING, 11, - 0x0003a3, 0x0003a3, DIRECT_MAPPING, 12, - 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, - 0x0003a6, 0x0003a6, DIRECT_MAPPING, 13, - 0x0003a7, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, - 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, - 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, - 0x0003b2, 0x0003b2, DIRECT_MAPPING, 6, - 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, - 0x0003b5, 0x0003b5, DIRECT_MAPPING, 7, - 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, - 0x0003b8, 0x0003b8, DIRECT_MAPPING, 8, - 0x0003b9, 0x0003b9, DIRECT_MAPPING, 5, - 0x0003ba, 0x0003ba, DIRECT_MAPPING, 9, - 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, - 0x0003bc, 0x0003bc, DIRECT_MAPPING, 0, - 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, - 0x0003c0, 0x0003c0, DIRECT_MAPPING, 10, - 0x0003c1, 0x0003c1, DIRECT_MAPPING, 11, - 0x0003c2, 0x0003c3, DIRECT_MAPPING, 12, - 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, - 0x0003c6, 0x0003c6, DIRECT_MAPPING, 13, - 0x0003c7, 0x0003cb, INTEGER_OFFSET, -32, - 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, - 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, DIRECT_MAPPING, 6, - 0x0003d1, 0x0003d1, DIRECT_MAPPING, 8, - 0x0003d5, 0x0003d5, DIRECT_MAPPING, 13, - 0x0003d6, 0x0003d6, DIRECT_MAPPING, 10, - 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, - 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, - 0x0003f0, 0x0003f0, DIRECT_MAPPING, 9, - 0x0003f1, 0x0003f1, DIRECT_MAPPING, 11, - 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, - 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, - 0x0003f5, 0x0003f5, DIRECT_MAPPING, 7, - 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x000411, INTEGER_OFFSET, 32, - 0x000412, 0x000412, DIRECT_MAPPING, 14, - 0x000413, 0x000413, INTEGER_OFFSET, 32, - 0x000414, 0x000414, DIRECT_MAPPING, 15, - 0x000415, 0x00041d, INTEGER_OFFSET, 32, - 0x00041e, 0x00041e, DIRECT_MAPPING, 16, - 0x00041f, 0x000420, INTEGER_OFFSET, 32, - 0x000421, 0x000421, DIRECT_MAPPING, 17, - 0x000422, 0x000422, DIRECT_MAPPING, 18, - 0x000423, 0x000429, INTEGER_OFFSET, 32, - 0x00042a, 0x00042a, DIRECT_MAPPING, 19, - 0x00042b, 0x00042f, INTEGER_OFFSET, 32, - 0x000430, 0x000431, INTEGER_OFFSET, -32, - 0x000432, 0x000432, DIRECT_MAPPING, 14, - 0x000433, 0x000433, INTEGER_OFFSET, -32, - 0x000434, 0x000434, DIRECT_MAPPING, 15, - 0x000435, 0x00043d, INTEGER_OFFSET, -32, - 0x00043e, 0x00043e, DIRECT_MAPPING, 16, - 0x00043f, 0x000440, INTEGER_OFFSET, -32, - 0x000441, 0x000441, DIRECT_MAPPING, 17, - 0x000442, 0x000442, DIRECT_MAPPING, 18, - 0x000443, 0x000449, INTEGER_OFFSET, -32, - 0x00044a, 0x00044a, DIRECT_MAPPING, 19, - 0x00044b, 0x00044f, INTEGER_OFFSET, -32, - 0x000450, 0x00045f, INTEGER_OFFSET, -80, - 0x000460, 0x000461, ALTERNATING_AL, 0, - 0x000462, 0x000463, DIRECT_MAPPING, 20, - 0x000464, 0x000481, ALTERNATING_AL, 0, - 0x00048a, 0x0004bf, ALTERNATING_AL, 0, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, - 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, - 0x0004d0, 0x00052f, ALTERNATING_AL, 0, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x000561, 0x000586, INTEGER_OFFSET, -48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, - 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, - 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, - 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, DIRECT_MAPPING, 14, - 0x001c81, 0x001c81, DIRECT_MAPPING, 15, - 0x001c82, 0x001c82, DIRECT_MAPPING, 16, - 0x001c83, 0x001c83, DIRECT_MAPPING, 17, - 0x001c84, 0x001c85, DIRECT_MAPPING, 18, - 0x001c86, 0x001c86, DIRECT_MAPPING, 19, - 0x001c87, 0x001c87, DIRECT_MAPPING, 20, - 0x001c88, 0x001c88, DIRECT_MAPPING, 21, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, - 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, - 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, - 0x001e00, 0x001e5f, ALTERNATING_AL, 0, - 0x001e60, 0x001e61, DIRECT_MAPPING, 22, - 0x001e62, 0x001e95, ALTERNATING_AL, 0, - 0x001e9b, 0x001e9b, DIRECT_MAPPING, 22, - 0x001ea0, 0x001eff, ALTERNATING_AL, 0, - 0x001f00, 0x001f07, INTEGER_OFFSET, 8, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f10, 0x001f15, INTEGER_OFFSET, 8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f20, 0x001f27, INTEGER_OFFSET, 8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f30, 0x001f37, INTEGER_OFFSET, 8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f40, 0x001f45, INTEGER_OFFSET, 8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f51, 0x001f51, INTEGER_OFFSET, 8, - 0x001f53, 0x001f53, INTEGER_OFFSET, 8, - 0x001f55, 0x001f55, INTEGER_OFFSET, 8, - 0x001f57, 0x001f57, INTEGER_OFFSET, 8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f60, 0x001f67, INTEGER_OFFSET, 8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f70, 0x001f71, INTEGER_OFFSET, 74, - 0x001f72, 0x001f75, INTEGER_OFFSET, 86, - 0x001f76, 0x001f77, INTEGER_OFFSET, 100, - 0x001f78, 0x001f79, INTEGER_OFFSET, 128, - 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, - 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, - 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbe, 0x001fbe, DIRECT_MAPPING, 5, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, - 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x00214e, 0x00214e, INTEGER_OFFSET, -28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002170, 0x00217f, INTEGER_OFFSET, -16, - 0x002183, 0x002184, ALTERNATING_UL, 0, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, - 0x002c60, 0x002c61, ALTERNATING_AL, 0, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, - 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, - 0x002c67, 0x002c6c, ALTERNATING_UL, 0, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c73, ALTERNATING_AL, 0, - 0x002c75, 0x002c76, ALTERNATING_UL, 0, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002ce3, ALTERNATING_AL, 0, - 0x002ceb, 0x002cee, ALTERNATING_UL, 0, - 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, - 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, - 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, - 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, - 0x00a640, 0x00a649, ALTERNATING_AL, 0, - 0x00a64a, 0x00a64b, DIRECT_MAPPING, 21, - 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, - 0x00a680, 0x00a69b, ALTERNATING_AL, 0, - 0x00a722, 0x00a72f, ALTERNATING_AL, 0, - 0x00a732, 0x00a76f, ALTERNATING_AL, 0, - 0x00a779, 0x00a77c, ALTERNATING_UL, 0, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a787, ALTERNATING_AL, 0, - 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a793, ALTERNATING_AL, 0, - 0x00a794, 0x00a794, INTEGER_OFFSET, 48, - 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, - 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, - 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, - 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, - 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32 - }); - - public static final CaseFoldTableImpl UNICODE_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00004a, INTEGER_OFFSET, 32, - 0x00004b, 0x00004b, DIRECT_MAPPING, 23, - 0x00004c, 0x000052, INTEGER_OFFSET, 32, - 0x000053, 0x000053, DIRECT_MAPPING, 24, - 0x000054, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x00006a, INTEGER_OFFSET, -32, - 0x00006b, 0x00006b, DIRECT_MAPPING, 23, - 0x00006c, 0x000072, INTEGER_OFFSET, -32, - 0x000073, 0x000073, DIRECT_MAPPING, 24, - 0x000074, 0x00007a, INTEGER_OFFSET, -32, - 0x0000b5, 0x0000b5, DIRECT_MAPPING, 0, - 0x0000c0, 0x0000c4, INTEGER_OFFSET, 32, - 0x0000c5, 0x0000c5, DIRECT_MAPPING, 25, - 0x0000c6, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x0000df, 0x0000df, INTEGER_OFFSET, 7615, - 0x0000e0, 0x0000e4, INTEGER_OFFSET, -32, - 0x0000e5, 0x0000e5, DIRECT_MAPPING, 25, - 0x0000e6, 0x0000f6, INTEGER_OFFSET, -32, - 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, - 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, - 0x000100, 0x00012f, ALTERNATING_AL, 0, - 0x000132, 0x000137, ALTERNATING_AL, 0, - 0x000139, 0x000148, ALTERNATING_UL, 0, - 0x00014a, 0x000177, ALTERNATING_AL, 0, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x00017e, ALTERNATING_UL, 0, - 0x00017f, 0x00017f, DIRECT_MAPPING, 24, - 0x000180, 0x000180, INTEGER_OFFSET, 195, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000185, ALTERNATING_AL, 0, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000188, ALTERNATING_UL, 0, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018c, ALTERNATING_UL, 0, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000192, ALTERNATING_UL, 0, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000195, 0x000195, INTEGER_OFFSET, 97, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000199, ALTERNATING_AL, 0, - 0x00019a, 0x00019a, INTEGER_OFFSET, 163, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019e, 0x00019e, INTEGER_OFFSET, 130, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001b0, ALTERNATING_UL, 0, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, - 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, - 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, - 0x0001c4, 0x0001c6, DIRECT_MAPPING, 1, - 0x0001c7, 0x0001c9, DIRECT_MAPPING, 2, - 0x0001ca, 0x0001cc, DIRECT_MAPPING, 3, - 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, - 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, - 0x0001de, 0x0001ef, ALTERNATING_AL, 0, - 0x0001f1, 0x0001f3, DIRECT_MAPPING, 4, - 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x00021f, ALTERNATING_AL, 0, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000233, ALTERNATING_AL, 0, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023c, ALTERNATING_UL, 0, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x00023f, 0x000240, INTEGER_OFFSET, 10815, - 0x000241, 0x000242, ALTERNATING_UL, 0, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x00024f, ALTERNATING_AL, 0, - 0x000250, 0x000250, INTEGER_OFFSET, 10783, - 0x000251, 0x000251, INTEGER_OFFSET, 10780, - 0x000252, 0x000252, INTEGER_OFFSET, 10782, - 0x000253, 0x000253, INTEGER_OFFSET, -210, - 0x000254, 0x000254, INTEGER_OFFSET, -206, - 0x000256, 0x000257, INTEGER_OFFSET, -205, - 0x000259, 0x000259, INTEGER_OFFSET, -202, - 0x00025b, 0x00025b, INTEGER_OFFSET, -203, - 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, - 0x000260, 0x000260, INTEGER_OFFSET, -205, - 0x000261, 0x000261, INTEGER_OFFSET, 42315, - 0x000263, 0x000263, INTEGER_OFFSET, -207, - 0x000265, 0x000265, INTEGER_OFFSET, 42280, - 0x000266, 0x000266, INTEGER_OFFSET, 42308, - 0x000268, 0x000268, INTEGER_OFFSET, -209, - 0x000269, 0x000269, INTEGER_OFFSET, -211, - 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, - 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, - 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, - 0x00026f, 0x00026f, INTEGER_OFFSET, -211, - 0x000271, 0x000271, INTEGER_OFFSET, 10749, - 0x000272, 0x000272, INTEGER_OFFSET, -213, - 0x000275, 0x000275, INTEGER_OFFSET, -214, - 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, - 0x000280, 0x000280, INTEGER_OFFSET, -218, - 0x000282, 0x000282, INTEGER_OFFSET, 42307, - 0x000283, 0x000283, INTEGER_OFFSET, -218, - 0x000287, 0x000287, INTEGER_OFFSET, 42282, - 0x000288, 0x000288, INTEGER_OFFSET, -218, - 0x000289, 0x000289, INTEGER_OFFSET, -69, - 0x00028a, 0x00028b, INTEGER_OFFSET, -217, - 0x00028c, 0x00028c, INTEGER_OFFSET, -71, - 0x000292, 0x000292, INTEGER_OFFSET, -219, - 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, - 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, - 0x000345, 0x000345, DIRECT_MAPPING, 5, - 0x000370, 0x000373, ALTERNATING_AL, 0, - 0x000376, 0x000377, ALTERNATING_AL, 0, - 0x00037b, 0x00037d, INTEGER_OFFSET, 130, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000391, 0x000391, INTEGER_OFFSET, 32, - 0x000392, 0x000392, DIRECT_MAPPING, 6, - 0x000393, 0x000394, INTEGER_OFFSET, 32, - 0x000395, 0x000395, DIRECT_MAPPING, 7, - 0x000396, 0x000397, INTEGER_OFFSET, 32, - 0x000398, 0x000398, DIRECT_MAPPING, 26, - 0x000399, 0x000399, DIRECT_MAPPING, 5, - 0x00039a, 0x00039a, DIRECT_MAPPING, 9, - 0x00039b, 0x00039b, INTEGER_OFFSET, 32, - 0x00039c, 0x00039c, DIRECT_MAPPING, 0, - 0x00039d, 0x00039f, INTEGER_OFFSET, 32, - 0x0003a0, 0x0003a0, DIRECT_MAPPING, 10, - 0x0003a1, 0x0003a1, DIRECT_MAPPING, 11, - 0x0003a3, 0x0003a3, DIRECT_MAPPING, 12, - 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, - 0x0003a6, 0x0003a6, DIRECT_MAPPING, 13, - 0x0003a7, 0x0003a8, INTEGER_OFFSET, 32, - 0x0003a9, 0x0003a9, DIRECT_MAPPING, 27, - 0x0003aa, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, - 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, - 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, - 0x0003b2, 0x0003b2, DIRECT_MAPPING, 6, - 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, - 0x0003b5, 0x0003b5, DIRECT_MAPPING, 7, - 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, - 0x0003b8, 0x0003b8, DIRECT_MAPPING, 26, - 0x0003b9, 0x0003b9, DIRECT_MAPPING, 5, - 0x0003ba, 0x0003ba, DIRECT_MAPPING, 9, - 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, - 0x0003bc, 0x0003bc, DIRECT_MAPPING, 0, - 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, - 0x0003c0, 0x0003c0, DIRECT_MAPPING, 10, - 0x0003c1, 0x0003c1, DIRECT_MAPPING, 11, - 0x0003c2, 0x0003c3, DIRECT_MAPPING, 12, - 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, - 0x0003c6, 0x0003c6, DIRECT_MAPPING, 13, - 0x0003c7, 0x0003c8, INTEGER_OFFSET, -32, - 0x0003c9, 0x0003c9, DIRECT_MAPPING, 27, - 0x0003ca, 0x0003cb, INTEGER_OFFSET, -32, - 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, - 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, DIRECT_MAPPING, 6, - 0x0003d1, 0x0003d1, DIRECT_MAPPING, 26, - 0x0003d5, 0x0003d5, DIRECT_MAPPING, 13, - 0x0003d6, 0x0003d6, DIRECT_MAPPING, 10, - 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, - 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, - 0x0003f0, 0x0003f0, DIRECT_MAPPING, 9, - 0x0003f1, 0x0003f1, DIRECT_MAPPING, 11, - 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, - 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, - 0x0003f4, 0x0003f4, DIRECT_MAPPING, 26, - 0x0003f5, 0x0003f5, DIRECT_MAPPING, 7, - 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x000411, INTEGER_OFFSET, 32, - 0x000412, 0x000412, DIRECT_MAPPING, 14, - 0x000413, 0x000413, INTEGER_OFFSET, 32, - 0x000414, 0x000414, DIRECT_MAPPING, 15, - 0x000415, 0x00041d, INTEGER_OFFSET, 32, - 0x00041e, 0x00041e, DIRECT_MAPPING, 16, - 0x00041f, 0x000420, INTEGER_OFFSET, 32, - 0x000421, 0x000421, DIRECT_MAPPING, 17, - 0x000422, 0x000422, DIRECT_MAPPING, 18, - 0x000423, 0x000429, INTEGER_OFFSET, 32, - 0x00042a, 0x00042a, DIRECT_MAPPING, 19, - 0x00042b, 0x00042f, INTEGER_OFFSET, 32, - 0x000430, 0x000431, INTEGER_OFFSET, -32, - 0x000432, 0x000432, DIRECT_MAPPING, 14, - 0x000433, 0x000433, INTEGER_OFFSET, -32, - 0x000434, 0x000434, DIRECT_MAPPING, 15, - 0x000435, 0x00043d, INTEGER_OFFSET, -32, - 0x00043e, 0x00043e, DIRECT_MAPPING, 16, - 0x00043f, 0x000440, INTEGER_OFFSET, -32, - 0x000441, 0x000441, DIRECT_MAPPING, 17, - 0x000442, 0x000442, DIRECT_MAPPING, 18, - 0x000443, 0x000449, INTEGER_OFFSET, -32, - 0x00044a, 0x00044a, DIRECT_MAPPING, 19, - 0x00044b, 0x00044f, INTEGER_OFFSET, -32, - 0x000450, 0x00045f, INTEGER_OFFSET, -80, - 0x000460, 0x000461, ALTERNATING_AL, 0, - 0x000462, 0x000463, DIRECT_MAPPING, 20, - 0x000464, 0x000481, ALTERNATING_AL, 0, - 0x00048a, 0x0004bf, ALTERNATING_AL, 0, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, - 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, - 0x0004d0, 0x00052f, ALTERNATING_AL, 0, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x000561, 0x000586, INTEGER_OFFSET, -48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, - 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, - 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, - 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, DIRECT_MAPPING, 14, - 0x001c81, 0x001c81, DIRECT_MAPPING, 15, - 0x001c82, 0x001c82, DIRECT_MAPPING, 16, - 0x001c83, 0x001c83, DIRECT_MAPPING, 17, - 0x001c84, 0x001c85, DIRECT_MAPPING, 18, - 0x001c86, 0x001c86, DIRECT_MAPPING, 19, - 0x001c87, 0x001c87, DIRECT_MAPPING, 20, - 0x001c88, 0x001c88, DIRECT_MAPPING, 21, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, - 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, - 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, - 0x001e00, 0x001e5f, ALTERNATING_AL, 0, - 0x001e60, 0x001e61, DIRECT_MAPPING, 22, - 0x001e62, 0x001e95, ALTERNATING_AL, 0, - 0x001e9b, 0x001e9b, DIRECT_MAPPING, 22, - 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, - 0x001ea0, 0x001eff, ALTERNATING_AL, 0, - 0x001f00, 0x001f07, INTEGER_OFFSET, 8, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f10, 0x001f15, INTEGER_OFFSET, 8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f20, 0x001f27, INTEGER_OFFSET, 8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f30, 0x001f37, INTEGER_OFFSET, 8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f40, 0x001f45, INTEGER_OFFSET, 8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f51, 0x001f51, INTEGER_OFFSET, 8, - 0x001f53, 0x001f53, INTEGER_OFFSET, 8, - 0x001f55, 0x001f55, INTEGER_OFFSET, 8, - 0x001f57, 0x001f57, INTEGER_OFFSET, 8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f60, 0x001f67, INTEGER_OFFSET, 8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f70, 0x001f71, INTEGER_OFFSET, 74, - 0x001f72, 0x001f75, INTEGER_OFFSET, 86, - 0x001f76, 0x001f77, INTEGER_OFFSET, 100, - 0x001f78, 0x001f79, INTEGER_OFFSET, 128, - 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, - 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, - 0x001f80, 0x001f87, INTEGER_OFFSET, 8, - 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, - 0x001f90, 0x001f97, INTEGER_OFFSET, 8, - 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, - 0x001fa0, 0x001fa7, INTEGER_OFFSET, 8, - 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, - 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, - 0x001fb3, 0x001fb3, INTEGER_OFFSET, 9, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, - 0x001fbe, 0x001fbe, DIRECT_MAPPING, 5, - 0x001fc3, 0x001fc3, INTEGER_OFFSET, 9, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, - 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, - 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff3, 0x001ff3, INTEGER_OFFSET, 9, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, - 0x002126, 0x002126, DIRECT_MAPPING, 27, - 0x00212a, 0x00212a, DIRECT_MAPPING, 23, - 0x00212b, 0x00212b, DIRECT_MAPPING, 25, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x00214e, 0x00214e, INTEGER_OFFSET, -28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002170, 0x00217f, INTEGER_OFFSET, -16, - 0x002183, 0x002184, ALTERNATING_UL, 0, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, - 0x002c60, 0x002c61, ALTERNATING_AL, 0, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, - 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, - 0x002c67, 0x002c6c, ALTERNATING_UL, 0, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c73, ALTERNATING_AL, 0, - 0x002c75, 0x002c76, ALTERNATING_UL, 0, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002ce3, ALTERNATING_AL, 0, - 0x002ceb, 0x002cee, ALTERNATING_UL, 0, - 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, - 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, - 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, - 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, - 0x00a640, 0x00a649, ALTERNATING_AL, 0, - 0x00a64a, 0x00a64b, DIRECT_MAPPING, 21, - 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, - 0x00a680, 0x00a69b, ALTERNATING_AL, 0, - 0x00a722, 0x00a72f, ALTERNATING_AL, 0, - 0x00a732, 0x00a76f, ALTERNATING_AL, 0, - 0x00a779, 0x00a77c, ALTERNATING_UL, 0, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a787, ALTERNATING_AL, 0, - 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a793, ALTERNATING_AL, 0, - 0x00a794, 0x00a794, INTEGER_OFFSET, 48, - 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, - 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, - 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, - 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, - 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32, - 0x010400, 0x010427, INTEGER_OFFSET, 40, - 0x010428, 0x01044f, INTEGER_OFFSET, -40, - 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, - 0x0104d8, 0x0104fb, INTEGER_OFFSET, -40, - 0x010570, 0x01057a, INTEGER_OFFSET, 39, - 0x01057c, 0x01058a, INTEGER_OFFSET, 39, - 0x01058c, 0x010592, INTEGER_OFFSET, 39, - 0x010594, 0x010595, INTEGER_OFFSET, 39, - 0x010597, 0x0105a1, INTEGER_OFFSET, -39, - 0x0105a3, 0x0105b1, INTEGER_OFFSET, -39, - 0x0105b3, 0x0105b9, INTEGER_OFFSET, -39, - 0x0105bb, 0x0105bc, INTEGER_OFFSET, -39, - 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, - 0x010cc0, 0x010cf2, INTEGER_OFFSET, -64, - 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, - 0x0118c0, 0x0118df, INTEGER_OFFSET, -32, - 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, - 0x016e60, 0x016e7f, INTEGER_OFFSET, -32, - 0x01e900, 0x01e921, INTEGER_OFFSET, 34, - 0x01e922, 0x01e943, INTEGER_OFFSET, -34 - }); - - public static final CaseFoldTableImpl PYTHON_ASCII_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x00007a, INTEGER_OFFSET, -32 - }); - - public static final CaseFoldTableImpl PYTHON_UNICODE_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x000048, INTEGER_OFFSET, 32, - 0x000049, 0x000049, DIRECT_MAPPING, 28, - 0x00004a, 0x00004a, INTEGER_OFFSET, 32, - 0x00004b, 0x00004b, DIRECT_MAPPING, 23, - 0x00004c, 0x000052, INTEGER_OFFSET, 32, - 0x000053, 0x000053, DIRECT_MAPPING, 24, - 0x000054, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x000068, INTEGER_OFFSET, -32, - 0x000069, 0x000069, DIRECT_MAPPING, 28, - 0x00006a, 0x00006a, INTEGER_OFFSET, -32, - 0x00006b, 0x00006b, DIRECT_MAPPING, 23, - 0x00006c, 0x000072, INTEGER_OFFSET, -32, - 0x000073, 0x000073, DIRECT_MAPPING, 24, - 0x000074, 0x00007a, INTEGER_OFFSET, -32, - 0x0000b5, 0x0000b5, DIRECT_MAPPING, 0, - 0x0000c0, 0x0000c4, INTEGER_OFFSET, 32, - 0x0000c5, 0x0000c5, DIRECT_MAPPING, 25, - 0x0000c6, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x0000df, 0x0000df, INTEGER_OFFSET, 7615, - 0x0000e0, 0x0000e4, INTEGER_OFFSET, -32, - 0x0000e5, 0x0000e5, DIRECT_MAPPING, 25, - 0x0000e6, 0x0000f6, INTEGER_OFFSET, -32, - 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, - 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, - 0x000100, 0x00012f, ALTERNATING_AL, 0, - 0x000130, 0x000131, DIRECT_MAPPING, 28, - 0x000132, 0x000137, ALTERNATING_AL, 0, - 0x000139, 0x000148, ALTERNATING_UL, 0, - 0x00014a, 0x000177, ALTERNATING_AL, 0, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x00017e, ALTERNATING_UL, 0, - 0x00017f, 0x00017f, DIRECT_MAPPING, 24, - 0x000180, 0x000180, INTEGER_OFFSET, 195, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000185, ALTERNATING_AL, 0, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000188, ALTERNATING_UL, 0, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018c, ALTERNATING_UL, 0, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000192, ALTERNATING_UL, 0, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000195, 0x000195, INTEGER_OFFSET, 97, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000199, ALTERNATING_AL, 0, - 0x00019a, 0x00019a, INTEGER_OFFSET, 163, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019e, 0x00019e, INTEGER_OFFSET, 130, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001b0, ALTERNATING_UL, 0, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, - 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, - 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, - 0x0001c4, 0x0001c6, DIRECT_MAPPING, 1, - 0x0001c7, 0x0001c9, DIRECT_MAPPING, 2, - 0x0001ca, 0x0001cc, DIRECT_MAPPING, 3, - 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, - 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, - 0x0001de, 0x0001ef, ALTERNATING_AL, 0, - 0x0001f1, 0x0001f3, DIRECT_MAPPING, 4, - 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x00021f, ALTERNATING_AL, 0, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000233, ALTERNATING_AL, 0, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023c, ALTERNATING_UL, 0, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x00023f, 0x000240, INTEGER_OFFSET, 10815, - 0x000241, 0x000242, ALTERNATING_UL, 0, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x00024f, ALTERNATING_AL, 0, - 0x000250, 0x000250, INTEGER_OFFSET, 10783, - 0x000251, 0x000251, INTEGER_OFFSET, 10780, - 0x000252, 0x000252, INTEGER_OFFSET, 10782, - 0x000253, 0x000253, INTEGER_OFFSET, -210, - 0x000254, 0x000254, INTEGER_OFFSET, -206, - 0x000256, 0x000257, INTEGER_OFFSET, -205, - 0x000259, 0x000259, INTEGER_OFFSET, -202, - 0x00025b, 0x00025b, INTEGER_OFFSET, -203, - 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, - 0x000260, 0x000260, INTEGER_OFFSET, -205, - 0x000261, 0x000261, INTEGER_OFFSET, 42315, - 0x000263, 0x000263, INTEGER_OFFSET, -207, - 0x000265, 0x000265, INTEGER_OFFSET, 42280, - 0x000266, 0x000266, INTEGER_OFFSET, 42308, - 0x000268, 0x000268, INTEGER_OFFSET, -209, - 0x000269, 0x000269, INTEGER_OFFSET, -211, - 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, - 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, - 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, - 0x00026f, 0x00026f, INTEGER_OFFSET, -211, - 0x000271, 0x000271, INTEGER_OFFSET, 10749, - 0x000272, 0x000272, INTEGER_OFFSET, -213, - 0x000275, 0x000275, INTEGER_OFFSET, -214, - 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, - 0x000280, 0x000280, INTEGER_OFFSET, -218, - 0x000282, 0x000282, INTEGER_OFFSET, 42307, - 0x000283, 0x000283, INTEGER_OFFSET, -218, - 0x000287, 0x000287, INTEGER_OFFSET, 42282, - 0x000288, 0x000288, INTEGER_OFFSET, -218, - 0x000289, 0x000289, INTEGER_OFFSET, -69, - 0x00028a, 0x00028b, INTEGER_OFFSET, -217, - 0x00028c, 0x00028c, INTEGER_OFFSET, -71, - 0x000292, 0x000292, INTEGER_OFFSET, -219, - 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, - 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, - 0x000345, 0x000345, DIRECT_MAPPING, 5, - 0x000370, 0x000373, ALTERNATING_AL, 0, - 0x000376, 0x000377, ALTERNATING_AL, 0, - 0x00037b, 0x00037d, INTEGER_OFFSET, 130, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000390, 0x000390, INTEGER_OFFSET, 7235, - 0x000391, 0x000391, INTEGER_OFFSET, 32, - 0x000392, 0x000392, DIRECT_MAPPING, 6, - 0x000393, 0x000394, INTEGER_OFFSET, 32, - 0x000395, 0x000395, DIRECT_MAPPING, 7, - 0x000396, 0x000397, INTEGER_OFFSET, 32, - 0x000398, 0x000398, DIRECT_MAPPING, 26, - 0x000399, 0x000399, DIRECT_MAPPING, 5, - 0x00039a, 0x00039a, DIRECT_MAPPING, 9, - 0x00039b, 0x00039b, INTEGER_OFFSET, 32, - 0x00039c, 0x00039c, DIRECT_MAPPING, 0, - 0x00039d, 0x00039f, INTEGER_OFFSET, 32, - 0x0003a0, 0x0003a0, DIRECT_MAPPING, 10, - 0x0003a1, 0x0003a1, DIRECT_MAPPING, 11, - 0x0003a3, 0x0003a3, DIRECT_MAPPING, 12, - 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, - 0x0003a6, 0x0003a6, DIRECT_MAPPING, 13, - 0x0003a7, 0x0003a8, INTEGER_OFFSET, 32, - 0x0003a9, 0x0003a9, DIRECT_MAPPING, 27, - 0x0003aa, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, - 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, - 0x0003b0, 0x0003b0, INTEGER_OFFSET, 7219, - 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, - 0x0003b2, 0x0003b2, DIRECT_MAPPING, 6, - 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, - 0x0003b5, 0x0003b5, DIRECT_MAPPING, 7, - 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, - 0x0003b8, 0x0003b8, DIRECT_MAPPING, 26, - 0x0003b9, 0x0003b9, DIRECT_MAPPING, 5, - 0x0003ba, 0x0003ba, DIRECT_MAPPING, 9, - 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, - 0x0003bc, 0x0003bc, DIRECT_MAPPING, 0, - 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, - 0x0003c0, 0x0003c0, DIRECT_MAPPING, 10, - 0x0003c1, 0x0003c1, DIRECT_MAPPING, 11, - 0x0003c2, 0x0003c3, DIRECT_MAPPING, 12, - 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, - 0x0003c6, 0x0003c6, DIRECT_MAPPING, 13, - 0x0003c7, 0x0003c8, INTEGER_OFFSET, -32, - 0x0003c9, 0x0003c9, DIRECT_MAPPING, 27, - 0x0003ca, 0x0003cb, INTEGER_OFFSET, -32, - 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, - 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, DIRECT_MAPPING, 6, - 0x0003d1, 0x0003d1, DIRECT_MAPPING, 26, - 0x0003d5, 0x0003d5, DIRECT_MAPPING, 13, - 0x0003d6, 0x0003d6, DIRECT_MAPPING, 10, - 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, - 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, - 0x0003f0, 0x0003f0, DIRECT_MAPPING, 9, - 0x0003f1, 0x0003f1, DIRECT_MAPPING, 11, - 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, - 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, - 0x0003f4, 0x0003f4, DIRECT_MAPPING, 26, - 0x0003f5, 0x0003f5, DIRECT_MAPPING, 7, - 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x000411, INTEGER_OFFSET, 32, - 0x000412, 0x000412, DIRECT_MAPPING, 14, - 0x000413, 0x000413, INTEGER_OFFSET, 32, - 0x000414, 0x000414, DIRECT_MAPPING, 15, - 0x000415, 0x00041d, INTEGER_OFFSET, 32, - 0x00041e, 0x00041e, DIRECT_MAPPING, 16, - 0x00041f, 0x000420, INTEGER_OFFSET, 32, - 0x000421, 0x000421, DIRECT_MAPPING, 17, - 0x000422, 0x000422, DIRECT_MAPPING, 18, - 0x000423, 0x000429, INTEGER_OFFSET, 32, - 0x00042a, 0x00042a, DIRECT_MAPPING, 19, - 0x00042b, 0x00042f, INTEGER_OFFSET, 32, - 0x000430, 0x000431, INTEGER_OFFSET, -32, - 0x000432, 0x000432, DIRECT_MAPPING, 14, - 0x000433, 0x000433, INTEGER_OFFSET, -32, - 0x000434, 0x000434, DIRECT_MAPPING, 15, - 0x000435, 0x00043d, INTEGER_OFFSET, -32, - 0x00043e, 0x00043e, DIRECT_MAPPING, 16, - 0x00043f, 0x000440, INTEGER_OFFSET, -32, - 0x000441, 0x000441, DIRECT_MAPPING, 17, - 0x000442, 0x000442, DIRECT_MAPPING, 18, - 0x000443, 0x000449, INTEGER_OFFSET, -32, - 0x00044a, 0x00044a, DIRECT_MAPPING, 19, - 0x00044b, 0x00044f, INTEGER_OFFSET, -32, - 0x000450, 0x00045f, INTEGER_OFFSET, -80, - 0x000460, 0x000461, ALTERNATING_AL, 0, - 0x000462, 0x000463, DIRECT_MAPPING, 20, - 0x000464, 0x000481, ALTERNATING_AL, 0, - 0x00048a, 0x0004bf, ALTERNATING_AL, 0, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, - 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, - 0x0004d0, 0x00052f, ALTERNATING_AL, 0, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x000561, 0x000586, INTEGER_OFFSET, -48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, - 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, - 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, - 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, DIRECT_MAPPING, 14, - 0x001c81, 0x001c81, DIRECT_MAPPING, 15, - 0x001c82, 0x001c82, DIRECT_MAPPING, 16, - 0x001c83, 0x001c83, DIRECT_MAPPING, 17, - 0x001c84, 0x001c85, DIRECT_MAPPING, 18, - 0x001c86, 0x001c86, DIRECT_MAPPING, 19, - 0x001c87, 0x001c87, DIRECT_MAPPING, 20, - 0x001c88, 0x001c88, DIRECT_MAPPING, 21, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, - 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, - 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, - 0x001e00, 0x001e5f, ALTERNATING_AL, 0, - 0x001e60, 0x001e61, DIRECT_MAPPING, 22, - 0x001e62, 0x001e95, ALTERNATING_AL, 0, - 0x001e9b, 0x001e9b, DIRECT_MAPPING, 22, - 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, - 0x001ea0, 0x001eff, ALTERNATING_AL, 0, - 0x001f00, 0x001f07, INTEGER_OFFSET, 8, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f10, 0x001f15, INTEGER_OFFSET, 8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f20, 0x001f27, INTEGER_OFFSET, 8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f30, 0x001f37, INTEGER_OFFSET, 8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f40, 0x001f45, INTEGER_OFFSET, 8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f51, 0x001f51, INTEGER_OFFSET, 8, - 0x001f53, 0x001f53, INTEGER_OFFSET, 8, - 0x001f55, 0x001f55, INTEGER_OFFSET, 8, - 0x001f57, 0x001f57, INTEGER_OFFSET, 8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f60, 0x001f67, INTEGER_OFFSET, 8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f70, 0x001f71, INTEGER_OFFSET, 74, - 0x001f72, 0x001f75, INTEGER_OFFSET, 86, - 0x001f76, 0x001f77, INTEGER_OFFSET, 100, - 0x001f78, 0x001f79, INTEGER_OFFSET, 128, - 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, - 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, - 0x001f80, 0x001f87, INTEGER_OFFSET, 8, - 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, - 0x001f90, 0x001f97, INTEGER_OFFSET, 8, - 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, - 0x001fa0, 0x001fa7, INTEGER_OFFSET, 8, - 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, - 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, - 0x001fb3, 0x001fb3, INTEGER_OFFSET, 9, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, - 0x001fbe, 0x001fbe, DIRECT_MAPPING, 5, - 0x001fc3, 0x001fc3, INTEGER_OFFSET, 9, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, - 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, - 0x001fd3, 0x001fd3, INTEGER_OFFSET, -7235, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, - 0x001fe3, 0x001fe3, INTEGER_OFFSET, -7219, - 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff3, 0x001ff3, INTEGER_OFFSET, 9, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, - 0x002126, 0x002126, DIRECT_MAPPING, 27, - 0x00212a, 0x00212a, DIRECT_MAPPING, 23, - 0x00212b, 0x00212b, DIRECT_MAPPING, 25, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x00214e, 0x00214e, INTEGER_OFFSET, -28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002170, 0x00217f, INTEGER_OFFSET, -16, - 0x002183, 0x002184, ALTERNATING_UL, 0, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, - 0x002c60, 0x002c61, ALTERNATING_AL, 0, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, - 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, - 0x002c67, 0x002c6c, ALTERNATING_UL, 0, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c73, ALTERNATING_AL, 0, - 0x002c75, 0x002c76, ALTERNATING_UL, 0, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002ce3, ALTERNATING_AL, 0, - 0x002ceb, 0x002cee, ALTERNATING_UL, 0, - 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, - 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, - 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, - 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, - 0x00a640, 0x00a649, ALTERNATING_AL, 0, - 0x00a64a, 0x00a64b, DIRECT_MAPPING, 21, - 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, - 0x00a680, 0x00a69b, ALTERNATING_AL, 0, - 0x00a722, 0x00a72f, ALTERNATING_AL, 0, - 0x00a732, 0x00a76f, ALTERNATING_AL, 0, - 0x00a779, 0x00a77c, ALTERNATING_UL, 0, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a787, ALTERNATING_AL, 0, - 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a793, ALTERNATING_AL, 0, - 0x00a794, 0x00a794, INTEGER_OFFSET, 48, - 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, - 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, - 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, - 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, - 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00fb05, 0x00fb06, ALTERNATING_UL, 0, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32, - 0x010400, 0x010427, INTEGER_OFFSET, 40, - 0x010428, 0x01044f, INTEGER_OFFSET, -40, - 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, - 0x0104d8, 0x0104fb, INTEGER_OFFSET, -40, - 0x010570, 0x01057a, INTEGER_OFFSET, 39, - 0x01057c, 0x01058a, INTEGER_OFFSET, 39, - 0x01058c, 0x010592, INTEGER_OFFSET, 39, - 0x010594, 0x010595, INTEGER_OFFSET, 39, - 0x010597, 0x0105a1, INTEGER_OFFSET, -39, - 0x0105a3, 0x0105b1, INTEGER_OFFSET, -39, - 0x0105b3, 0x0105b9, INTEGER_OFFSET, -39, - 0x0105bb, 0x0105bc, INTEGER_OFFSET, -39, - 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, - 0x010cc0, 0x010cf2, INTEGER_OFFSET, -64, - 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, - 0x0118c0, 0x0118df, INTEGER_OFFSET, -32, - 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, - 0x016e60, 0x016e7f, INTEGER_OFFSET, -32, - 0x01e900, 0x01e921, INTEGER_OFFSET, 34, - 0x01e922, 0x01e943, INTEGER_OFFSET, -34 - }); - - public static final CaseFoldTableImpl SIMPLE_CASE_FOLDING_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00005a, INTEGER_OFFSET, 32, - 0x0000b5, 0x0000b5, INTEGER_OFFSET, 775, - 0x0000c0, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x000100, 0x000100, INTEGER_OFFSET, 1, - 0x000102, 0x000102, INTEGER_OFFSET, 1, - 0x000104, 0x000104, INTEGER_OFFSET, 1, - 0x000106, 0x000106, INTEGER_OFFSET, 1, - 0x000108, 0x000108, INTEGER_OFFSET, 1, - 0x00010a, 0x00010a, INTEGER_OFFSET, 1, - 0x00010c, 0x00010c, INTEGER_OFFSET, 1, - 0x00010e, 0x00010e, INTEGER_OFFSET, 1, - 0x000110, 0x000110, INTEGER_OFFSET, 1, - 0x000112, 0x000112, INTEGER_OFFSET, 1, - 0x000114, 0x000114, INTEGER_OFFSET, 1, - 0x000116, 0x000116, INTEGER_OFFSET, 1, - 0x000118, 0x000118, INTEGER_OFFSET, 1, - 0x00011a, 0x00011a, INTEGER_OFFSET, 1, - 0x00011c, 0x00011c, INTEGER_OFFSET, 1, - 0x00011e, 0x00011e, INTEGER_OFFSET, 1, - 0x000120, 0x000120, INTEGER_OFFSET, 1, - 0x000122, 0x000122, INTEGER_OFFSET, 1, - 0x000124, 0x000124, INTEGER_OFFSET, 1, - 0x000126, 0x000126, INTEGER_OFFSET, 1, - 0x000128, 0x000128, INTEGER_OFFSET, 1, - 0x00012a, 0x00012a, INTEGER_OFFSET, 1, - 0x00012c, 0x00012c, INTEGER_OFFSET, 1, - 0x00012e, 0x00012e, INTEGER_OFFSET, 1, - 0x000132, 0x000132, INTEGER_OFFSET, 1, - 0x000134, 0x000134, INTEGER_OFFSET, 1, - 0x000136, 0x000136, INTEGER_OFFSET, 1, - 0x000139, 0x000139, INTEGER_OFFSET, 1, - 0x00013b, 0x00013b, INTEGER_OFFSET, 1, - 0x00013d, 0x00013d, INTEGER_OFFSET, 1, - 0x00013f, 0x00013f, INTEGER_OFFSET, 1, - 0x000141, 0x000141, INTEGER_OFFSET, 1, - 0x000143, 0x000143, INTEGER_OFFSET, 1, - 0x000145, 0x000145, INTEGER_OFFSET, 1, - 0x000147, 0x000147, INTEGER_OFFSET, 1, - 0x00014a, 0x00014a, INTEGER_OFFSET, 1, - 0x00014c, 0x00014c, INTEGER_OFFSET, 1, - 0x00014e, 0x00014e, INTEGER_OFFSET, 1, - 0x000150, 0x000150, INTEGER_OFFSET, 1, - 0x000152, 0x000152, INTEGER_OFFSET, 1, - 0x000154, 0x000154, INTEGER_OFFSET, 1, - 0x000156, 0x000156, INTEGER_OFFSET, 1, - 0x000158, 0x000158, INTEGER_OFFSET, 1, - 0x00015a, 0x00015a, INTEGER_OFFSET, 1, - 0x00015c, 0x00015c, INTEGER_OFFSET, 1, - 0x00015e, 0x00015e, INTEGER_OFFSET, 1, - 0x000160, 0x000160, INTEGER_OFFSET, 1, - 0x000162, 0x000162, INTEGER_OFFSET, 1, - 0x000164, 0x000164, INTEGER_OFFSET, 1, - 0x000166, 0x000166, INTEGER_OFFSET, 1, - 0x000168, 0x000168, INTEGER_OFFSET, 1, - 0x00016a, 0x00016a, INTEGER_OFFSET, 1, - 0x00016c, 0x00016c, INTEGER_OFFSET, 1, - 0x00016e, 0x00016e, INTEGER_OFFSET, 1, - 0x000170, 0x000170, INTEGER_OFFSET, 1, - 0x000172, 0x000172, INTEGER_OFFSET, 1, - 0x000174, 0x000174, INTEGER_OFFSET, 1, - 0x000176, 0x000176, INTEGER_OFFSET, 1, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x000179, INTEGER_OFFSET, 1, - 0x00017b, 0x00017b, INTEGER_OFFSET, 1, - 0x00017d, 0x00017d, INTEGER_OFFSET, 1, - 0x00017f, 0x00017f, INTEGER_OFFSET, -268, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000182, INTEGER_OFFSET, 1, - 0x000184, 0x000184, INTEGER_OFFSET, 1, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000187, INTEGER_OFFSET, 1, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018b, INTEGER_OFFSET, 1, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000191, INTEGER_OFFSET, 1, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000198, INTEGER_OFFSET, 1, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a0, INTEGER_OFFSET, 1, - 0x0001a2, 0x0001a2, INTEGER_OFFSET, 1, - 0x0001a4, 0x0001a4, INTEGER_OFFSET, 1, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a7, INTEGER_OFFSET, 1, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ac, INTEGER_OFFSET, 1, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001af, INTEGER_OFFSET, 1, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b3, INTEGER_OFFSET, 1, - 0x0001b5, 0x0001b5, INTEGER_OFFSET, 1, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b8, INTEGER_OFFSET, 1, - 0x0001bc, 0x0001bc, INTEGER_OFFSET, 1, - 0x0001c4, 0x0001c4, INTEGER_OFFSET, 2, - 0x0001c5, 0x0001c5, INTEGER_OFFSET, 1, - 0x0001c7, 0x0001c7, INTEGER_OFFSET, 2, - 0x0001c8, 0x0001c8, INTEGER_OFFSET, 1, - 0x0001ca, 0x0001ca, INTEGER_OFFSET, 2, - 0x0001cb, 0x0001cb, INTEGER_OFFSET, 1, - 0x0001cd, 0x0001cd, INTEGER_OFFSET, 1, - 0x0001cf, 0x0001cf, INTEGER_OFFSET, 1, - 0x0001d1, 0x0001d1, INTEGER_OFFSET, 1, - 0x0001d3, 0x0001d3, INTEGER_OFFSET, 1, - 0x0001d5, 0x0001d5, INTEGER_OFFSET, 1, - 0x0001d7, 0x0001d7, INTEGER_OFFSET, 1, - 0x0001d9, 0x0001d9, INTEGER_OFFSET, 1, - 0x0001db, 0x0001db, INTEGER_OFFSET, 1, - 0x0001de, 0x0001de, INTEGER_OFFSET, 1, - 0x0001e0, 0x0001e0, INTEGER_OFFSET, 1, - 0x0001e2, 0x0001e2, INTEGER_OFFSET, 1, - 0x0001e4, 0x0001e4, INTEGER_OFFSET, 1, - 0x0001e6, 0x0001e6, INTEGER_OFFSET, 1, - 0x0001e8, 0x0001e8, INTEGER_OFFSET, 1, - 0x0001ea, 0x0001ea, INTEGER_OFFSET, 1, - 0x0001ec, 0x0001ec, INTEGER_OFFSET, 1, - 0x0001ee, 0x0001ee, INTEGER_OFFSET, 1, - 0x0001f1, 0x0001f1, INTEGER_OFFSET, 2, - 0x0001f2, 0x0001f2, INTEGER_OFFSET, 1, - 0x0001f4, 0x0001f4, INTEGER_OFFSET, 1, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x0001f8, INTEGER_OFFSET, 1, - 0x0001fa, 0x0001fa, INTEGER_OFFSET, 1, - 0x0001fc, 0x0001fc, INTEGER_OFFSET, 1, - 0x0001fe, 0x0001fe, INTEGER_OFFSET, 1, - 0x000200, 0x000200, INTEGER_OFFSET, 1, - 0x000202, 0x000202, INTEGER_OFFSET, 1, - 0x000204, 0x000204, INTEGER_OFFSET, 1, - 0x000206, 0x000206, INTEGER_OFFSET, 1, - 0x000208, 0x000208, INTEGER_OFFSET, 1, - 0x00020a, 0x00020a, INTEGER_OFFSET, 1, - 0x00020c, 0x00020c, INTEGER_OFFSET, 1, - 0x00020e, 0x00020e, INTEGER_OFFSET, 1, - 0x000210, 0x000210, INTEGER_OFFSET, 1, - 0x000212, 0x000212, INTEGER_OFFSET, 1, - 0x000214, 0x000214, INTEGER_OFFSET, 1, - 0x000216, 0x000216, INTEGER_OFFSET, 1, - 0x000218, 0x000218, INTEGER_OFFSET, 1, - 0x00021a, 0x00021a, INTEGER_OFFSET, 1, - 0x00021c, 0x00021c, INTEGER_OFFSET, 1, - 0x00021e, 0x00021e, INTEGER_OFFSET, 1, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000222, INTEGER_OFFSET, 1, - 0x000224, 0x000224, INTEGER_OFFSET, 1, - 0x000226, 0x000226, INTEGER_OFFSET, 1, - 0x000228, 0x000228, INTEGER_OFFSET, 1, - 0x00022a, 0x00022a, INTEGER_OFFSET, 1, - 0x00022c, 0x00022c, INTEGER_OFFSET, 1, - 0x00022e, 0x00022e, INTEGER_OFFSET, 1, - 0x000230, 0x000230, INTEGER_OFFSET, 1, - 0x000232, 0x000232, INTEGER_OFFSET, 1, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023b, INTEGER_OFFSET, 1, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x000241, 0x000241, INTEGER_OFFSET, 1, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x000246, INTEGER_OFFSET, 1, - 0x000248, 0x000248, INTEGER_OFFSET, 1, - 0x00024a, 0x00024a, INTEGER_OFFSET, 1, - 0x00024c, 0x00024c, INTEGER_OFFSET, 1, - 0x00024e, 0x00024e, INTEGER_OFFSET, 1, - 0x000345, 0x000345, INTEGER_OFFSET, 116, - 0x000370, 0x000370, INTEGER_OFFSET, 1, - 0x000372, 0x000372, INTEGER_OFFSET, 1, - 0x000376, 0x000376, INTEGER_OFFSET, 1, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000391, 0x0003a1, INTEGER_OFFSET, 32, - 0x0003a3, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003c2, 0x0003c2, INTEGER_OFFSET, 1, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, INTEGER_OFFSET, -30, - 0x0003d1, 0x0003d1, INTEGER_OFFSET, -25, - 0x0003d5, 0x0003d5, INTEGER_OFFSET, -15, - 0x0003d6, 0x0003d6, INTEGER_OFFSET, -22, - 0x0003d8, 0x0003d8, INTEGER_OFFSET, 1, - 0x0003da, 0x0003da, INTEGER_OFFSET, 1, - 0x0003dc, 0x0003dc, INTEGER_OFFSET, 1, - 0x0003de, 0x0003de, INTEGER_OFFSET, 1, - 0x0003e0, 0x0003e0, INTEGER_OFFSET, 1, - 0x0003e2, 0x0003e2, INTEGER_OFFSET, 1, - 0x0003e4, 0x0003e4, INTEGER_OFFSET, 1, - 0x0003e6, 0x0003e6, INTEGER_OFFSET, 1, - 0x0003e8, 0x0003e8, INTEGER_OFFSET, 1, - 0x0003ea, 0x0003ea, INTEGER_OFFSET, 1, - 0x0003ec, 0x0003ec, INTEGER_OFFSET, 1, - 0x0003ee, 0x0003ee, INTEGER_OFFSET, 1, - 0x0003f0, 0x0003f0, INTEGER_OFFSET, -54, - 0x0003f1, 0x0003f1, INTEGER_OFFSET, -48, - 0x0003f4, 0x0003f4, INTEGER_OFFSET, -60, - 0x0003f5, 0x0003f5, INTEGER_OFFSET, -64, - 0x0003f7, 0x0003f7, INTEGER_OFFSET, 1, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fa, INTEGER_OFFSET, 1, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x00042f, INTEGER_OFFSET, 32, - 0x000460, 0x000460, INTEGER_OFFSET, 1, - 0x000462, 0x000462, INTEGER_OFFSET, 1, - 0x000464, 0x000464, INTEGER_OFFSET, 1, - 0x000466, 0x000466, INTEGER_OFFSET, 1, - 0x000468, 0x000468, INTEGER_OFFSET, 1, - 0x00046a, 0x00046a, INTEGER_OFFSET, 1, - 0x00046c, 0x00046c, INTEGER_OFFSET, 1, - 0x00046e, 0x00046e, INTEGER_OFFSET, 1, - 0x000470, 0x000470, INTEGER_OFFSET, 1, - 0x000472, 0x000472, INTEGER_OFFSET, 1, - 0x000474, 0x000474, INTEGER_OFFSET, 1, - 0x000476, 0x000476, INTEGER_OFFSET, 1, - 0x000478, 0x000478, INTEGER_OFFSET, 1, - 0x00047a, 0x00047a, INTEGER_OFFSET, 1, - 0x00047c, 0x00047c, INTEGER_OFFSET, 1, - 0x00047e, 0x00047e, INTEGER_OFFSET, 1, - 0x000480, 0x000480, INTEGER_OFFSET, 1, - 0x00048a, 0x00048a, INTEGER_OFFSET, 1, - 0x00048c, 0x00048c, INTEGER_OFFSET, 1, - 0x00048e, 0x00048e, INTEGER_OFFSET, 1, - 0x000490, 0x000490, INTEGER_OFFSET, 1, - 0x000492, 0x000492, INTEGER_OFFSET, 1, - 0x000494, 0x000494, INTEGER_OFFSET, 1, - 0x000496, 0x000496, INTEGER_OFFSET, 1, - 0x000498, 0x000498, INTEGER_OFFSET, 1, - 0x00049a, 0x00049a, INTEGER_OFFSET, 1, - 0x00049c, 0x00049c, INTEGER_OFFSET, 1, - 0x00049e, 0x00049e, INTEGER_OFFSET, 1, - 0x0004a0, 0x0004a0, INTEGER_OFFSET, 1, - 0x0004a2, 0x0004a2, INTEGER_OFFSET, 1, - 0x0004a4, 0x0004a4, INTEGER_OFFSET, 1, - 0x0004a6, 0x0004a6, INTEGER_OFFSET, 1, - 0x0004a8, 0x0004a8, INTEGER_OFFSET, 1, - 0x0004aa, 0x0004aa, INTEGER_OFFSET, 1, - 0x0004ac, 0x0004ac, INTEGER_OFFSET, 1, - 0x0004ae, 0x0004ae, INTEGER_OFFSET, 1, - 0x0004b0, 0x0004b0, INTEGER_OFFSET, 1, - 0x0004b2, 0x0004b2, INTEGER_OFFSET, 1, - 0x0004b4, 0x0004b4, INTEGER_OFFSET, 1, - 0x0004b6, 0x0004b6, INTEGER_OFFSET, 1, - 0x0004b8, 0x0004b8, INTEGER_OFFSET, 1, - 0x0004ba, 0x0004ba, INTEGER_OFFSET, 1, - 0x0004bc, 0x0004bc, INTEGER_OFFSET, 1, - 0x0004be, 0x0004be, INTEGER_OFFSET, 1, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004c1, INTEGER_OFFSET, 1, - 0x0004c3, 0x0004c3, INTEGER_OFFSET, 1, - 0x0004c5, 0x0004c5, INTEGER_OFFSET, 1, - 0x0004c7, 0x0004c7, INTEGER_OFFSET, 1, - 0x0004c9, 0x0004c9, INTEGER_OFFSET, 1, - 0x0004cb, 0x0004cb, INTEGER_OFFSET, 1, - 0x0004cd, 0x0004cd, INTEGER_OFFSET, 1, - 0x0004d0, 0x0004d0, INTEGER_OFFSET, 1, - 0x0004d2, 0x0004d2, INTEGER_OFFSET, 1, - 0x0004d4, 0x0004d4, INTEGER_OFFSET, 1, - 0x0004d6, 0x0004d6, INTEGER_OFFSET, 1, - 0x0004d8, 0x0004d8, INTEGER_OFFSET, 1, - 0x0004da, 0x0004da, INTEGER_OFFSET, 1, - 0x0004dc, 0x0004dc, INTEGER_OFFSET, 1, - 0x0004de, 0x0004de, INTEGER_OFFSET, 1, - 0x0004e0, 0x0004e0, INTEGER_OFFSET, 1, - 0x0004e2, 0x0004e2, INTEGER_OFFSET, 1, - 0x0004e4, 0x0004e4, INTEGER_OFFSET, 1, - 0x0004e6, 0x0004e6, INTEGER_OFFSET, 1, - 0x0004e8, 0x0004e8, INTEGER_OFFSET, 1, - 0x0004ea, 0x0004ea, INTEGER_OFFSET, 1, - 0x0004ec, 0x0004ec, INTEGER_OFFSET, 1, - 0x0004ee, 0x0004ee, INTEGER_OFFSET, 1, - 0x0004f0, 0x0004f0, INTEGER_OFFSET, 1, - 0x0004f2, 0x0004f2, INTEGER_OFFSET, 1, - 0x0004f4, 0x0004f4, INTEGER_OFFSET, 1, - 0x0004f6, 0x0004f6, INTEGER_OFFSET, 1, - 0x0004f8, 0x0004f8, INTEGER_OFFSET, 1, - 0x0004fa, 0x0004fa, INTEGER_OFFSET, 1, - 0x0004fc, 0x0004fc, INTEGER_OFFSET, 1, - 0x0004fe, 0x0004fe, INTEGER_OFFSET, 1, - 0x000500, 0x000500, INTEGER_OFFSET, 1, - 0x000502, 0x000502, INTEGER_OFFSET, 1, - 0x000504, 0x000504, INTEGER_OFFSET, 1, - 0x000506, 0x000506, INTEGER_OFFSET, 1, - 0x000508, 0x000508, INTEGER_OFFSET, 1, - 0x00050a, 0x00050a, INTEGER_OFFSET, 1, - 0x00050c, 0x00050c, INTEGER_OFFSET, 1, - 0x00050e, 0x00050e, INTEGER_OFFSET, 1, - 0x000510, 0x000510, INTEGER_OFFSET, 1, - 0x000512, 0x000512, INTEGER_OFFSET, 1, - 0x000514, 0x000514, INTEGER_OFFSET, 1, - 0x000516, 0x000516, INTEGER_OFFSET, 1, - 0x000518, 0x000518, INTEGER_OFFSET, 1, - 0x00051a, 0x00051a, INTEGER_OFFSET, 1, - 0x00051c, 0x00051c, INTEGER_OFFSET, 1, - 0x00051e, 0x00051e, INTEGER_OFFSET, 1, - 0x000520, 0x000520, INTEGER_OFFSET, 1, - 0x000522, 0x000522, INTEGER_OFFSET, 1, - 0x000524, 0x000524, INTEGER_OFFSET, 1, - 0x000526, 0x000526, INTEGER_OFFSET, 1, - 0x000528, 0x000528, INTEGER_OFFSET, 1, - 0x00052a, 0x00052a, INTEGER_OFFSET, 1, - 0x00052c, 0x00052c, INTEGER_OFFSET, 1, - 0x00052e, 0x00052e, INTEGER_OFFSET, 1, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, INTEGER_OFFSET, -6222, - 0x001c81, 0x001c81, INTEGER_OFFSET, -6221, - 0x001c82, 0x001c82, INTEGER_OFFSET, -6212, - 0x001c83, 0x001c84, INTEGER_OFFSET, -6210, - 0x001c85, 0x001c85, INTEGER_OFFSET, -6211, - 0x001c86, 0x001c86, INTEGER_OFFSET, -6204, - 0x001c87, 0x001c87, INTEGER_OFFSET, -6180, - 0x001c88, 0x001c88, INTEGER_OFFSET, 35267, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001e00, 0x001e00, INTEGER_OFFSET, 1, - 0x001e02, 0x001e02, INTEGER_OFFSET, 1, - 0x001e04, 0x001e04, INTEGER_OFFSET, 1, - 0x001e06, 0x001e06, INTEGER_OFFSET, 1, - 0x001e08, 0x001e08, INTEGER_OFFSET, 1, - 0x001e0a, 0x001e0a, INTEGER_OFFSET, 1, - 0x001e0c, 0x001e0c, INTEGER_OFFSET, 1, - 0x001e0e, 0x001e0e, INTEGER_OFFSET, 1, - 0x001e10, 0x001e10, INTEGER_OFFSET, 1, - 0x001e12, 0x001e12, INTEGER_OFFSET, 1, - 0x001e14, 0x001e14, INTEGER_OFFSET, 1, - 0x001e16, 0x001e16, INTEGER_OFFSET, 1, - 0x001e18, 0x001e18, INTEGER_OFFSET, 1, - 0x001e1a, 0x001e1a, INTEGER_OFFSET, 1, - 0x001e1c, 0x001e1c, INTEGER_OFFSET, 1, - 0x001e1e, 0x001e1e, INTEGER_OFFSET, 1, - 0x001e20, 0x001e20, INTEGER_OFFSET, 1, - 0x001e22, 0x001e22, INTEGER_OFFSET, 1, - 0x001e24, 0x001e24, INTEGER_OFFSET, 1, - 0x001e26, 0x001e26, INTEGER_OFFSET, 1, - 0x001e28, 0x001e28, INTEGER_OFFSET, 1, - 0x001e2a, 0x001e2a, INTEGER_OFFSET, 1, - 0x001e2c, 0x001e2c, INTEGER_OFFSET, 1, - 0x001e2e, 0x001e2e, INTEGER_OFFSET, 1, - 0x001e30, 0x001e30, INTEGER_OFFSET, 1, - 0x001e32, 0x001e32, INTEGER_OFFSET, 1, - 0x001e34, 0x001e34, INTEGER_OFFSET, 1, - 0x001e36, 0x001e36, INTEGER_OFFSET, 1, - 0x001e38, 0x001e38, INTEGER_OFFSET, 1, - 0x001e3a, 0x001e3a, INTEGER_OFFSET, 1, - 0x001e3c, 0x001e3c, INTEGER_OFFSET, 1, - 0x001e3e, 0x001e3e, INTEGER_OFFSET, 1, - 0x001e40, 0x001e40, INTEGER_OFFSET, 1, - 0x001e42, 0x001e42, INTEGER_OFFSET, 1, - 0x001e44, 0x001e44, INTEGER_OFFSET, 1, - 0x001e46, 0x001e46, INTEGER_OFFSET, 1, - 0x001e48, 0x001e48, INTEGER_OFFSET, 1, - 0x001e4a, 0x001e4a, INTEGER_OFFSET, 1, - 0x001e4c, 0x001e4c, INTEGER_OFFSET, 1, - 0x001e4e, 0x001e4e, INTEGER_OFFSET, 1, - 0x001e50, 0x001e50, INTEGER_OFFSET, 1, - 0x001e52, 0x001e52, INTEGER_OFFSET, 1, - 0x001e54, 0x001e54, INTEGER_OFFSET, 1, - 0x001e56, 0x001e56, INTEGER_OFFSET, 1, - 0x001e58, 0x001e58, INTEGER_OFFSET, 1, - 0x001e5a, 0x001e5a, INTEGER_OFFSET, 1, - 0x001e5c, 0x001e5c, INTEGER_OFFSET, 1, - 0x001e5e, 0x001e5e, INTEGER_OFFSET, 1, - 0x001e60, 0x001e60, INTEGER_OFFSET, 1, - 0x001e62, 0x001e62, INTEGER_OFFSET, 1, - 0x001e64, 0x001e64, INTEGER_OFFSET, 1, - 0x001e66, 0x001e66, INTEGER_OFFSET, 1, - 0x001e68, 0x001e68, INTEGER_OFFSET, 1, - 0x001e6a, 0x001e6a, INTEGER_OFFSET, 1, - 0x001e6c, 0x001e6c, INTEGER_OFFSET, 1, - 0x001e6e, 0x001e6e, INTEGER_OFFSET, 1, - 0x001e70, 0x001e70, INTEGER_OFFSET, 1, - 0x001e72, 0x001e72, INTEGER_OFFSET, 1, - 0x001e74, 0x001e74, INTEGER_OFFSET, 1, - 0x001e76, 0x001e76, INTEGER_OFFSET, 1, - 0x001e78, 0x001e78, INTEGER_OFFSET, 1, - 0x001e7a, 0x001e7a, INTEGER_OFFSET, 1, - 0x001e7c, 0x001e7c, INTEGER_OFFSET, 1, - 0x001e7e, 0x001e7e, INTEGER_OFFSET, 1, - 0x001e80, 0x001e80, INTEGER_OFFSET, 1, - 0x001e82, 0x001e82, INTEGER_OFFSET, 1, - 0x001e84, 0x001e84, INTEGER_OFFSET, 1, - 0x001e86, 0x001e86, INTEGER_OFFSET, 1, - 0x001e88, 0x001e88, INTEGER_OFFSET, 1, - 0x001e8a, 0x001e8a, INTEGER_OFFSET, 1, - 0x001e8c, 0x001e8c, INTEGER_OFFSET, 1, - 0x001e8e, 0x001e8e, INTEGER_OFFSET, 1, - 0x001e90, 0x001e90, INTEGER_OFFSET, 1, - 0x001e92, 0x001e92, INTEGER_OFFSET, 1, - 0x001e94, 0x001e94, INTEGER_OFFSET, 1, - 0x001e9b, 0x001e9b, INTEGER_OFFSET, -58, - 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, - 0x001ea0, 0x001ea0, INTEGER_OFFSET, 1, - 0x001ea2, 0x001ea2, INTEGER_OFFSET, 1, - 0x001ea4, 0x001ea4, INTEGER_OFFSET, 1, - 0x001ea6, 0x001ea6, INTEGER_OFFSET, 1, - 0x001ea8, 0x001ea8, INTEGER_OFFSET, 1, - 0x001eaa, 0x001eaa, INTEGER_OFFSET, 1, - 0x001eac, 0x001eac, INTEGER_OFFSET, 1, - 0x001eae, 0x001eae, INTEGER_OFFSET, 1, - 0x001eb0, 0x001eb0, INTEGER_OFFSET, 1, - 0x001eb2, 0x001eb2, INTEGER_OFFSET, 1, - 0x001eb4, 0x001eb4, INTEGER_OFFSET, 1, - 0x001eb6, 0x001eb6, INTEGER_OFFSET, 1, - 0x001eb8, 0x001eb8, INTEGER_OFFSET, 1, - 0x001eba, 0x001eba, INTEGER_OFFSET, 1, - 0x001ebc, 0x001ebc, INTEGER_OFFSET, 1, - 0x001ebe, 0x001ebe, INTEGER_OFFSET, 1, - 0x001ec0, 0x001ec0, INTEGER_OFFSET, 1, - 0x001ec2, 0x001ec2, INTEGER_OFFSET, 1, - 0x001ec4, 0x001ec4, INTEGER_OFFSET, 1, - 0x001ec6, 0x001ec6, INTEGER_OFFSET, 1, - 0x001ec8, 0x001ec8, INTEGER_OFFSET, 1, - 0x001eca, 0x001eca, INTEGER_OFFSET, 1, - 0x001ecc, 0x001ecc, INTEGER_OFFSET, 1, - 0x001ece, 0x001ece, INTEGER_OFFSET, 1, - 0x001ed0, 0x001ed0, INTEGER_OFFSET, 1, - 0x001ed2, 0x001ed2, INTEGER_OFFSET, 1, - 0x001ed4, 0x001ed4, INTEGER_OFFSET, 1, - 0x001ed6, 0x001ed6, INTEGER_OFFSET, 1, - 0x001ed8, 0x001ed8, INTEGER_OFFSET, 1, - 0x001eda, 0x001eda, INTEGER_OFFSET, 1, - 0x001edc, 0x001edc, INTEGER_OFFSET, 1, - 0x001ede, 0x001ede, INTEGER_OFFSET, 1, - 0x001ee0, 0x001ee0, INTEGER_OFFSET, 1, - 0x001ee2, 0x001ee2, INTEGER_OFFSET, 1, - 0x001ee4, 0x001ee4, INTEGER_OFFSET, 1, - 0x001ee6, 0x001ee6, INTEGER_OFFSET, 1, - 0x001ee8, 0x001ee8, INTEGER_OFFSET, 1, - 0x001eea, 0x001eea, INTEGER_OFFSET, 1, - 0x001eec, 0x001eec, INTEGER_OFFSET, 1, - 0x001eee, 0x001eee, INTEGER_OFFSET, 1, - 0x001ef0, 0x001ef0, INTEGER_OFFSET, 1, - 0x001ef2, 0x001ef2, INTEGER_OFFSET, 1, - 0x001ef4, 0x001ef4, INTEGER_OFFSET, 1, - 0x001ef6, 0x001ef6, INTEGER_OFFSET, 1, - 0x001ef8, 0x001ef8, INTEGER_OFFSET, 1, - 0x001efa, 0x001efa, INTEGER_OFFSET, 1, - 0x001efc, 0x001efc, INTEGER_OFFSET, 1, - 0x001efe, 0x001efe, INTEGER_OFFSET, 1, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, - 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, - 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, - 0x001fbe, 0x001fbe, INTEGER_OFFSET, -7173, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, - 0x002126, 0x002126, INTEGER_OFFSET, -7517, - 0x00212a, 0x00212a, INTEGER_OFFSET, -8383, - 0x00212b, 0x00212b, INTEGER_OFFSET, -8262, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002183, 0x002183, INTEGER_OFFSET, 1, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c60, 0x002c60, INTEGER_OFFSET, 1, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c67, 0x002c67, INTEGER_OFFSET, 1, - 0x002c69, 0x002c69, INTEGER_OFFSET, 1, - 0x002c6b, 0x002c6b, INTEGER_OFFSET, 1, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c72, INTEGER_OFFSET, 1, - 0x002c75, 0x002c75, INTEGER_OFFSET, 1, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002c80, INTEGER_OFFSET, 1, - 0x002c82, 0x002c82, INTEGER_OFFSET, 1, - 0x002c84, 0x002c84, INTEGER_OFFSET, 1, - 0x002c86, 0x002c86, INTEGER_OFFSET, 1, - 0x002c88, 0x002c88, INTEGER_OFFSET, 1, - 0x002c8a, 0x002c8a, INTEGER_OFFSET, 1, - 0x002c8c, 0x002c8c, INTEGER_OFFSET, 1, - 0x002c8e, 0x002c8e, INTEGER_OFFSET, 1, - 0x002c90, 0x002c90, INTEGER_OFFSET, 1, - 0x002c92, 0x002c92, INTEGER_OFFSET, 1, - 0x002c94, 0x002c94, INTEGER_OFFSET, 1, - 0x002c96, 0x002c96, INTEGER_OFFSET, 1, - 0x002c98, 0x002c98, INTEGER_OFFSET, 1, - 0x002c9a, 0x002c9a, INTEGER_OFFSET, 1, - 0x002c9c, 0x002c9c, INTEGER_OFFSET, 1, - 0x002c9e, 0x002c9e, INTEGER_OFFSET, 1, - 0x002ca0, 0x002ca0, INTEGER_OFFSET, 1, - 0x002ca2, 0x002ca2, INTEGER_OFFSET, 1, - 0x002ca4, 0x002ca4, INTEGER_OFFSET, 1, - 0x002ca6, 0x002ca6, INTEGER_OFFSET, 1, - 0x002ca8, 0x002ca8, INTEGER_OFFSET, 1, - 0x002caa, 0x002caa, INTEGER_OFFSET, 1, - 0x002cac, 0x002cac, INTEGER_OFFSET, 1, - 0x002cae, 0x002cae, INTEGER_OFFSET, 1, - 0x002cb0, 0x002cb0, INTEGER_OFFSET, 1, - 0x002cb2, 0x002cb2, INTEGER_OFFSET, 1, - 0x002cb4, 0x002cb4, INTEGER_OFFSET, 1, - 0x002cb6, 0x002cb6, INTEGER_OFFSET, 1, - 0x002cb8, 0x002cb8, INTEGER_OFFSET, 1, - 0x002cba, 0x002cba, INTEGER_OFFSET, 1, - 0x002cbc, 0x002cbc, INTEGER_OFFSET, 1, - 0x002cbe, 0x002cbe, INTEGER_OFFSET, 1, - 0x002cc0, 0x002cc0, INTEGER_OFFSET, 1, - 0x002cc2, 0x002cc2, INTEGER_OFFSET, 1, - 0x002cc4, 0x002cc4, INTEGER_OFFSET, 1, - 0x002cc6, 0x002cc6, INTEGER_OFFSET, 1, - 0x002cc8, 0x002cc8, INTEGER_OFFSET, 1, - 0x002cca, 0x002cca, INTEGER_OFFSET, 1, - 0x002ccc, 0x002ccc, INTEGER_OFFSET, 1, - 0x002cce, 0x002cce, INTEGER_OFFSET, 1, - 0x002cd0, 0x002cd0, INTEGER_OFFSET, 1, - 0x002cd2, 0x002cd2, INTEGER_OFFSET, 1, - 0x002cd4, 0x002cd4, INTEGER_OFFSET, 1, - 0x002cd6, 0x002cd6, INTEGER_OFFSET, 1, - 0x002cd8, 0x002cd8, INTEGER_OFFSET, 1, - 0x002cda, 0x002cda, INTEGER_OFFSET, 1, - 0x002cdc, 0x002cdc, INTEGER_OFFSET, 1, - 0x002cde, 0x002cde, INTEGER_OFFSET, 1, - 0x002ce0, 0x002ce0, INTEGER_OFFSET, 1, - 0x002ce2, 0x002ce2, INTEGER_OFFSET, 1, - 0x002ceb, 0x002ceb, INTEGER_OFFSET, 1, - 0x002ced, 0x002ced, INTEGER_OFFSET, 1, - 0x002cf2, 0x002cf2, INTEGER_OFFSET, 1, - 0x00a640, 0x00a640, INTEGER_OFFSET, 1, - 0x00a642, 0x00a642, INTEGER_OFFSET, 1, - 0x00a644, 0x00a644, INTEGER_OFFSET, 1, - 0x00a646, 0x00a646, INTEGER_OFFSET, 1, - 0x00a648, 0x00a648, INTEGER_OFFSET, 1, - 0x00a64a, 0x00a64a, INTEGER_OFFSET, 1, - 0x00a64c, 0x00a64c, INTEGER_OFFSET, 1, - 0x00a64e, 0x00a64e, INTEGER_OFFSET, 1, - 0x00a650, 0x00a650, INTEGER_OFFSET, 1, - 0x00a652, 0x00a652, INTEGER_OFFSET, 1, - 0x00a654, 0x00a654, INTEGER_OFFSET, 1, - 0x00a656, 0x00a656, INTEGER_OFFSET, 1, - 0x00a658, 0x00a658, INTEGER_OFFSET, 1, - 0x00a65a, 0x00a65a, INTEGER_OFFSET, 1, - 0x00a65c, 0x00a65c, INTEGER_OFFSET, 1, - 0x00a65e, 0x00a65e, INTEGER_OFFSET, 1, - 0x00a660, 0x00a660, INTEGER_OFFSET, 1, - 0x00a662, 0x00a662, INTEGER_OFFSET, 1, - 0x00a664, 0x00a664, INTEGER_OFFSET, 1, - 0x00a666, 0x00a666, INTEGER_OFFSET, 1, - 0x00a668, 0x00a668, INTEGER_OFFSET, 1, - 0x00a66a, 0x00a66a, INTEGER_OFFSET, 1, - 0x00a66c, 0x00a66c, INTEGER_OFFSET, 1, - 0x00a680, 0x00a680, INTEGER_OFFSET, 1, - 0x00a682, 0x00a682, INTEGER_OFFSET, 1, - 0x00a684, 0x00a684, INTEGER_OFFSET, 1, - 0x00a686, 0x00a686, INTEGER_OFFSET, 1, - 0x00a688, 0x00a688, INTEGER_OFFSET, 1, - 0x00a68a, 0x00a68a, INTEGER_OFFSET, 1, - 0x00a68c, 0x00a68c, INTEGER_OFFSET, 1, - 0x00a68e, 0x00a68e, INTEGER_OFFSET, 1, - 0x00a690, 0x00a690, INTEGER_OFFSET, 1, - 0x00a692, 0x00a692, INTEGER_OFFSET, 1, - 0x00a694, 0x00a694, INTEGER_OFFSET, 1, - 0x00a696, 0x00a696, INTEGER_OFFSET, 1, - 0x00a698, 0x00a698, INTEGER_OFFSET, 1, - 0x00a69a, 0x00a69a, INTEGER_OFFSET, 1, - 0x00a722, 0x00a722, INTEGER_OFFSET, 1, - 0x00a724, 0x00a724, INTEGER_OFFSET, 1, - 0x00a726, 0x00a726, INTEGER_OFFSET, 1, - 0x00a728, 0x00a728, INTEGER_OFFSET, 1, - 0x00a72a, 0x00a72a, INTEGER_OFFSET, 1, - 0x00a72c, 0x00a72c, INTEGER_OFFSET, 1, - 0x00a72e, 0x00a72e, INTEGER_OFFSET, 1, - 0x00a732, 0x00a732, INTEGER_OFFSET, 1, - 0x00a734, 0x00a734, INTEGER_OFFSET, 1, - 0x00a736, 0x00a736, INTEGER_OFFSET, 1, - 0x00a738, 0x00a738, INTEGER_OFFSET, 1, - 0x00a73a, 0x00a73a, INTEGER_OFFSET, 1, - 0x00a73c, 0x00a73c, INTEGER_OFFSET, 1, - 0x00a73e, 0x00a73e, INTEGER_OFFSET, 1, - 0x00a740, 0x00a740, INTEGER_OFFSET, 1, - 0x00a742, 0x00a742, INTEGER_OFFSET, 1, - 0x00a744, 0x00a744, INTEGER_OFFSET, 1, - 0x00a746, 0x00a746, INTEGER_OFFSET, 1, - 0x00a748, 0x00a748, INTEGER_OFFSET, 1, - 0x00a74a, 0x00a74a, INTEGER_OFFSET, 1, - 0x00a74c, 0x00a74c, INTEGER_OFFSET, 1, - 0x00a74e, 0x00a74e, INTEGER_OFFSET, 1, - 0x00a750, 0x00a750, INTEGER_OFFSET, 1, - 0x00a752, 0x00a752, INTEGER_OFFSET, 1, - 0x00a754, 0x00a754, INTEGER_OFFSET, 1, - 0x00a756, 0x00a756, INTEGER_OFFSET, 1, - 0x00a758, 0x00a758, INTEGER_OFFSET, 1, - 0x00a75a, 0x00a75a, INTEGER_OFFSET, 1, - 0x00a75c, 0x00a75c, INTEGER_OFFSET, 1, - 0x00a75e, 0x00a75e, INTEGER_OFFSET, 1, - 0x00a760, 0x00a760, INTEGER_OFFSET, 1, - 0x00a762, 0x00a762, INTEGER_OFFSET, 1, - 0x00a764, 0x00a764, INTEGER_OFFSET, 1, - 0x00a766, 0x00a766, INTEGER_OFFSET, 1, - 0x00a768, 0x00a768, INTEGER_OFFSET, 1, - 0x00a76a, 0x00a76a, INTEGER_OFFSET, 1, - 0x00a76c, 0x00a76c, INTEGER_OFFSET, 1, - 0x00a76e, 0x00a76e, INTEGER_OFFSET, 1, - 0x00a779, 0x00a779, INTEGER_OFFSET, 1, - 0x00a77b, 0x00a77b, INTEGER_OFFSET, 1, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a77e, INTEGER_OFFSET, 1, - 0x00a780, 0x00a780, INTEGER_OFFSET, 1, - 0x00a782, 0x00a782, INTEGER_OFFSET, 1, - 0x00a784, 0x00a784, INTEGER_OFFSET, 1, - 0x00a786, 0x00a786, INTEGER_OFFSET, 1, - 0x00a78b, 0x00a78b, INTEGER_OFFSET, 1, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a790, INTEGER_OFFSET, 1, - 0x00a792, 0x00a792, INTEGER_OFFSET, 1, - 0x00a796, 0x00a796, INTEGER_OFFSET, 1, - 0x00a798, 0x00a798, INTEGER_OFFSET, 1, - 0x00a79a, 0x00a79a, INTEGER_OFFSET, 1, - 0x00a79c, 0x00a79c, INTEGER_OFFSET, 1, - 0x00a79e, 0x00a79e, INTEGER_OFFSET, 1, - 0x00a7a0, 0x00a7a0, INTEGER_OFFSET, 1, - 0x00a7a2, 0x00a7a2, INTEGER_OFFSET, 1, - 0x00a7a4, 0x00a7a4, INTEGER_OFFSET, 1, - 0x00a7a6, 0x00a7a6, INTEGER_OFFSET, 1, - 0x00a7a8, 0x00a7a8, INTEGER_OFFSET, 1, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7b4, INTEGER_OFFSET, 1, - 0x00a7b6, 0x00a7b6, INTEGER_OFFSET, 1, - 0x00a7b8, 0x00a7b8, INTEGER_OFFSET, 1, - 0x00a7ba, 0x00a7ba, INTEGER_OFFSET, 1, - 0x00a7bc, 0x00a7bc, INTEGER_OFFSET, 1, - 0x00a7be, 0x00a7be, INTEGER_OFFSET, 1, - 0x00a7c0, 0x00a7c0, INTEGER_OFFSET, 1, - 0x00a7c2, 0x00a7c2, INTEGER_OFFSET, 1, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7c7, INTEGER_OFFSET, 1, - 0x00a7c9, 0x00a7c9, INTEGER_OFFSET, 1, - 0x00a7d0, 0x00a7d0, INTEGER_OFFSET, 1, - 0x00a7d6, 0x00a7d6, INTEGER_OFFSET, 1, - 0x00a7d8, 0x00a7d8, INTEGER_OFFSET, 1, - 0x00a7f5, 0x00a7f5, INTEGER_OFFSET, 1, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x010400, 0x010427, INTEGER_OFFSET, 40, - 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, - 0x010570, 0x01057a, INTEGER_OFFSET, 39, - 0x01057c, 0x01058a, INTEGER_OFFSET, 39, - 0x01058c, 0x010592, INTEGER_OFFSET, 39, - 0x010594, 0x010595, INTEGER_OFFSET, 39, - 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, - 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, - 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, - 0x01e900, 0x01e921, INTEGER_OFFSET, 34 - }); - - /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ - -} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseUnfoldingTrie.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseUnfoldingTrie.java similarity index 77% rename from regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseUnfoldingTrie.java rename to regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseUnfoldingTrie.java index c73997b35135..b9d372e47c7a 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseUnfoldingTrie.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseUnfoldingTrie.java @@ -38,7 +38,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.oracle.truffle.regex.tregex.parser.flavors; +package com.oracle.truffle.regex.tregex.parser; import java.util.ArrayList; import java.util.Collections; @@ -47,20 +47,13 @@ import org.graalvm.collections.EconomicMap; -public final class RubyCaseUnfoldingTrie { - - public static final RubyCaseUnfoldingTrie CASE_UNFOLD; - - static { - CASE_UNFOLD = new RubyCaseUnfoldingTrie(0); - RubyCaseFoldingData.CASE_FOLD.forEach((k, v) -> CASE_UNFOLD.add(k, v, 0)); - } +public final class CaseUnfoldingTrie { private final List codepoints; - private final EconomicMap childNodes; + private final EconomicMap childNodes; private final int depth; - public RubyCaseUnfoldingTrie(int depth) { + public CaseUnfoldingTrie(int depth) { this.codepoints = new ArrayList<>(); this.childNodes = EconomicMap.create(); this.depth = depth; @@ -73,7 +66,7 @@ public void add(int codepoint, int[] caseFoldedString, int offset) { } if (!hasChildAt(caseFoldedString[offset])) { - childNodes.put(caseFoldedString[offset], new RubyCaseUnfoldingTrie(depth + 1)); + childNodes.put(caseFoldedString[offset], new CaseUnfoldingTrie(depth + 1)); } getChildAt(caseFoldedString[offset]).add(codepoint, caseFoldedString, offset + 1); } @@ -82,7 +75,7 @@ public boolean hasChildAt(int index) { return childNodes.containsKey(index); } - public RubyCaseUnfoldingTrie getChildAt(int index) { + public CaseUnfoldingTrie getChildAt(int index) { return childNodes.get(index); } @@ -122,19 +115,19 @@ public int getCodepoint() { } } - public static List findUnfoldings(List caseFolded) { - List states = new ArrayList<>(); - List nextStates = new ArrayList<>(); + public static List findUnfoldings(CaseFoldData.CaseFoldAlgorithm algorithm, List caseFolded) { + List states = new ArrayList<>(); + List nextStates = new ArrayList<>(); List unfoldings = new ArrayList<>(); for (int i = 0; i < caseFolded.size(); i++) { int codepoint = caseFolded.get(i); - states.add(RubyCaseUnfoldingTrie.CASE_UNFOLD); + states.add(CaseFoldData.getUnfoldingTrie(algorithm)); - for (RubyCaseUnfoldingTrie state : states) { + for (CaseUnfoldingTrie state : states) { if (state.hasChildAt(codepoint)) { - RubyCaseUnfoldingTrie newState = state.getChildAt(codepoint); + CaseUnfoldingTrie newState = state.getChildAt(codepoint); nextStates.add(newState); for (int unfoldedCodepoint : newState.getCodepoints()) { unfoldings.add(new Unfolding(i + 1 - newState.getDepth(), newState.getDepth(), unfoldedCodepoint)); @@ -142,7 +135,7 @@ public static List findUnfoldings(List caseFolded) { } } - List statesTmp = states; + List statesTmp = states; states = nextStates; nextStates = statesTmp; @@ -154,8 +147,8 @@ public static List findUnfoldings(List caseFolded) { return unfoldings; } - public static List findSingleCharUnfoldings(int[] caseFolded) { - RubyCaseUnfoldingTrie state = CASE_UNFOLD; + public static List findSingleCharUnfoldings(CaseFoldData.CaseFoldAlgorithm algorithm, int[] caseFolded) { + CaseUnfoldingTrie state = CaseFoldData.getUnfoldingTrie(algorithm); for (int codepoint : caseFolded) { assert state.hasChildAt(codepoint); @@ -165,9 +158,9 @@ public static List findSingleCharUnfoldings(int[] caseFolded) { return state.getCodepoints(); } - public static List findSingleCharUnfoldings(int caseFolded) { - if (CASE_UNFOLD.hasChildAt(caseFolded)) { - return CASE_UNFOLD.getChildAt(caseFolded).getCodepoints(); + public static List findSingleCharUnfoldings(CaseFoldData.CaseFoldAlgorithm algorithm, int caseFolded) { + if (CaseFoldData.getUnfoldingTrie(algorithm).hasChildAt(caseFolded)) { + return CaseFoldData.getUnfoldingTrie(algorithm).getChildAt(caseFolded).getCodepoints(); } else { return Collections.emptyList(); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java index c6a0aaa71dbf..232c1a6422ba 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java @@ -158,14 +158,15 @@ protected boolean featureEnabledClassSetExpressions() { @Override protected void caseFoldUnfold(CodePointSetAccumulator charClass) { - CaseFoldTable.CaseFoldingAlgorithm caseFolding = flags.isEitherUnicode() ? CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptUnicode : CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptNonUnicode; - CaseFoldTable.applyCaseFoldUnfold(charClass, compilationBuffer.getCodePointSetAccumulator1(), caseFolding); + CaseFoldData.CaseFoldUnfoldAlgorithm caseFolding = flags.isEitherUnicode() ? CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode : CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptNonUnicode; + CodePointSetAccumulator tmp = compilationBuffer.getCodePointSetAccumulator1(); + CaseFoldData.applyCaseFoldUnfold(charClass, tmp, caseFolding); } @Override protected CodePointSet complementClassSet(CodePointSet codePointSet) { if (flags.isUnicodeSets() && flags.isIgnoreCase()) { - return codePointSet.createInverse(Constants.FOLDED_CHARACTERS, compilationBuffer); + return codePointSet.createInverse(CaseFoldData.FOLDED_CHARACTERS, compilationBuffer); } else { return codePointSet.createInverse(source.getEncoding()); } @@ -201,7 +202,7 @@ protected int getMaxBackReferenceDigits() { } @Override - protected CodePointSet getPredefinedCharClass(char c) { + protected CodePointSet getPredefinedCharClass(char c, boolean inCharClass) { switch (c) { case 's': if (source.getOptions().isU180EWhitespace()) { @@ -269,7 +270,7 @@ protected Token handleBoundedQuantifierSyntaxError() throws RegexSyntaxException throw syntaxError(JsErrorMessages.INCOMPLETE_QUANTIFIER); } position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } @Override @@ -475,7 +476,7 @@ protected Token parseCustomEscape(char c) { } handleInvalidBackReference(groupName); } else { - return charClass(c); + return literalChar(c); } } return null; diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java index 87509fe7d7f6..4e6c03426264 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java @@ -44,6 +44,9 @@ import java.util.List; import java.util.Map; +import org.graalvm.collections.EconomicMap; +import org.graalvm.collections.Equivalence; + import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.regex.AbstractRegexObject; @@ -52,9 +55,11 @@ import com.oracle.truffle.regex.RegexOptions; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; +import com.oracle.truffle.regex.charset.ClassSetContents; +import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.errors.JsErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable.CaseFoldingAlgorithm; import com.oracle.truffle.regex.tregex.parser.ast.Group; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; import com.oracle.truffle.regex.tregex.parser.ast.RegexASTRootNode; @@ -62,17 +67,17 @@ import com.oracle.truffle.regex.tregex.parser.ast.Sequence; import com.oracle.truffle.regex.tregex.parser.ast.Term; import com.oracle.truffle.regex.tregex.string.Encodings; -import org.graalvm.collections.EconomicMap; -import org.graalvm.collections.Equivalence; public final class JSRegexParser implements RegexParser { - private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.charClass, Token.Kind.classSet, Token.Kind.groupEnd, Token.Kind.backReference); + private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.literalChar, Token.Kind.charClass, Token.Kind.charClassEnd, Token.Kind.classSet, Token.Kind.groupEnd, + Token.Kind.backReference); private final RegexParserGlobals globals; private final RegexSource source; private final RegexFlags flags; private final JSRegexLexer lexer; private final RegexASTBuilder astBuilder; + private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); @TruffleBoundary public JSRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException { @@ -204,11 +209,30 @@ private RegexAST parse(boolean rootCapture) throws RegexSyntaxException { } astBuilder.popGroup(token); break; + case literalChar: + literalChar(((Token.LiteralCharacter) token).getCodePoint()); + break; case charClass: astBuilder.addCharClass((Token.CharacterClass) token); break; + case charClassBegin: + curCharClass.clear(); + break; + case charClassAtom: + ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents(); + assert contents.isCodePointSetOnly(); + curCharClass.addSet(contents.getCodePointSet()); + break; + case charClassEnd: + boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar(); + if (flags.isIgnoreCase()) { + lexer.caseFoldUnfold(curCharClass); + } + CodePointSet cps = curCharClass.toCodePointSet(); + astBuilder.addCharClass(lexer.isCurCharClassInverted() ? cps.createInverse(source.getEncoding()) : cps, wasSingleChar); + break; case classSet: - astBuilder.addClassSet((Token.ClassSet) token, flags.isIgnoreCase() ? CaseFoldingAlgorithm.ECMAScriptUnicode : null); + astBuilder.addClassSet((Token.ClassSet) token, flags.isIgnoreCase() ? CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode : null); break; default: throw CompilerDirectives.shouldNotReachHere(); @@ -222,6 +246,17 @@ private RegexAST parse(boolean rootCapture) throws RegexSyntaxException { return ast; } + private void literalChar(int codePoint) { + if (flags.isIgnoreCase()) { + curCharClass.clear(); + curCharClass.addCodePoint(codePoint); + lexer.caseFoldUnfold(curCharClass); + astBuilder.addCharClass(curCharClass.toCodePointSet(), true); + } else { + astBuilder.addCharClass(CodePointSet.create(codePoint)); + } + } + private static boolean isNestedInLookBehindAssertion(Term t) { RegexASTSubtreeRootNode parent = t.getSubTreeParent(); while (parent.isLookAroundAssertion()) { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java index 4a75a393dcb3..99770375d5ab 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java @@ -123,7 +123,11 @@ private void parseDryRun() throws RegexSyntaxException { case wordBoundary: case nonWordBoundary: case backReference: + case literalChar: case charClass: + case charClassBegin: + case charClassAtom: + case charClassEnd: case classSet: curTermState = CurTermState.Other; break; @@ -180,6 +184,9 @@ private void parseDryRun() throws RegexSyntaxException { throw CompilerDirectives.shouldNotReachHere(); } } + if (lexer.inCharacterClass()) { + throw syntaxError(JsErrorMessages.UNMATCHED_LEFT_BRACKET); + } if (!syntaxStack.isEmpty()) { throw syntaxError(JsErrorMessages.UNTERMINATED_GROUP); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFolding.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/MultiCharacterCaseFolding.java similarity index 64% rename from regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFolding.java rename to regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/MultiCharacterCaseFolding.java index 2c120e0a36dd..68d8653b379c 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFolding.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/MultiCharacterCaseFolding.java @@ -38,22 +38,28 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.oracle.truffle.regex.tregex.parser.flavors; +package com.oracle.truffle.regex.tregex.parser; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.function.BiConsumer; +import java.util.function.BiPredicate; import java.util.stream.Collectors; import com.oracle.truffle.regex.UnsupportedRegexException; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; -import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder; -import com.oracle.truffle.regex.tregex.parser.flavors.RubyCaseUnfoldingTrie.Unfolding; +import com.oracle.truffle.regex.charset.Range; +import com.oracle.truffle.regex.tregex.parser.CaseUnfoldingTrie.Unfolding; +import org.graalvm.collections.Pair; -public class RubyCaseFolding { +import static com.oracle.truffle.regex.tregex.parser.RegexLexer.isAscii; - public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingRange, RegexASTBuilder astBuilder) { - caseFoldUnfoldString(codepoints, encodingRange, false, astBuilder); +public class MultiCharacterCaseFolding { + + public static void caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm algorithm, int[] codepoints, CodePointSet encodingRange, RegexASTBuilder astBuilder) { + caseFoldUnfoldString(algorithm, codepoints, encodingRange, false, astBuilder); } /** @@ -66,11 +72,11 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR * the variants * @param astBuilder where to append the matcher */ - public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingRange, boolean dropAsciiOnStart, RegexASTBuilder astBuilder) { - List caseFolded = caseFold(codepoints); + public static void caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm algorithm, int[] codepoints, CodePointSet encodingRange, boolean dropAsciiOnStart, RegexASTBuilder astBuilder) { + List caseFolded = caseFold(algorithm, codepoints); - List unfoldings = RubyCaseUnfoldingTrie.findUnfoldings(caseFolded); - // We assume that if `codepoints` was in the encodingRange, than so will be `caseFolded`. + List unfoldings = CaseUnfoldingTrie.findUnfoldings(algorithm, caseFolded); + // We assume that if `codepoints` was in the encodingRange, then so will be `caseFolded`. // The only way that we could introduce out-of-range characters is through the unfoldings, // so just filter those should be enough to prevent generating out-of-range matchers. unfoldings = unfoldings.stream().filter(u -> encodingRange.contains(u.getCodepoint())).collect(Collectors.toList()); @@ -92,7 +98,7 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR // If the following mandatory string that we would add would be at the // beginning of the matcher and it would match an ASCII character, then we // return a dead matcher instead (if dropAsciiOnStart is set). - if (dropAsciiOnStart && end == 0 && RubyRegexParser.isAscii(caseFolded.get(end))) { + if (dropAsciiOnStart && end == 0 && RegexLexer.isAscii(caseFolded.get(end))) { astBuilder.popGroup(); astBuilder.replaceCurTermWithDeadNode(); return; @@ -108,7 +114,7 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR unfoldSegment(astBuilder, caseFolded, unfoldings.subList(unfoldingsStartIndex, unfoldingsEndIndex), start, end, 0, dropAsciiOnStart); if (end < caseFolded.size()) { - if (dropAsciiOnStart && end == 0 && RubyRegexParser.isAscii(caseFolded.get(end))) { + if (dropAsciiOnStart && end == 0 && RegexLexer.isAscii(caseFolded.get(end))) { astBuilder.popGroup(); astBuilder.replaceCurTermWithDeadNode(); return; @@ -119,14 +125,14 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR astBuilder.popGroup(); } - public static int[] caseFold(int codePoint) { - return RubyCaseFoldingData.CASE_FOLD.get(codePoint); + public static int[] caseFold(CaseFoldData.CaseFoldAlgorithm algorithm, int codePoint) { + return CaseFoldData.getTable(algorithm).caseFold(codePoint); } - private static List caseFold(int[] codepoints) { + private static List caseFold(CaseFoldData.CaseFoldAlgorithm algorithm, int[] codepoints) { List caseFolded = new ArrayList<>(); for (int codepoint : codepoints) { - int[] folded = caseFold(codepoint); + int[] folded = caseFold(algorithm, codepoint); if (folded == null) { caseFolded.add(codepoint); } else { @@ -198,7 +204,7 @@ private static void unfoldSegment(RegexASTBuilder astBuilder, List case // The only possible unfoldings at this position have length == 1. We can express all the // choices by using a character class. CodePointSetAccumulator acc = new CodePointSetAccumulator(); - if (!dropAsciiOnStart || start != 0 || !RubyRegexParser.isAscii(caseFolded.get(start))) { + if (!dropAsciiOnStart || start != 0 || !RegexLexer.isAscii(caseFolded.get(start))) { acc.addCodePoint(caseFolded.get(start)); } int unfoldingsNextIndex = 0; @@ -207,7 +213,7 @@ private static void unfoldSegment(RegexASTBuilder astBuilder, List case // length > 0. assert unfoldings.get(unfoldingsNextIndex).getLength() == 1; int codepoint = unfoldings.get(unfoldingsNextIndex).getCodepoint(); - if (!dropAsciiOnStart || start != 0 || !RubyRegexParser.isAscii(codepoint)) { + if (!dropAsciiOnStart || start != 0 || !RegexLexer.isAscii(codepoint)) { acc.addCodePoint(codepoint); } unfoldingsNextIndex++; @@ -215,4 +221,84 @@ private static void unfoldSegment(RegexASTBuilder astBuilder, List case astBuilder.addCharClass(acc.toCodePointSet(), false); unfoldSegment(astBuilder, caseFolded, unfoldings.subList(unfoldingsNextIndex, unfoldings.size()), start + 1, end, backtrackingDepth, dropAsciiOnStart); } + + /** + * Calls the argument on any element of the character class which has a case-folding. + */ + private static void caseFoldCharClass(CaseFoldData.CaseFoldAlgorithm algorithm, CodePointSetAccumulator charClass, BiConsumer caseFoldItem) { + CaseFoldData.getTable(algorithm).caseFold(charClass, caseFoldItem); + } + + /** + * This method modifies {@code charClass} to contains its closure on case mapping. + */ + public static void caseClosure(CaseFoldData.CaseFoldAlgorithm algorithm, CodePointSetAccumulator charClass, CodePointSetAccumulator tmp, BiPredicate filter, + CodePointSet allowedCodePoints) { + tmp.clear(); + + caseFoldCharClass(algorithm, charClass, (from, to) -> { + if (to.length == 1) { + // Add the case-folded version to the character class... + if (filter.test(from, to[0])) { + tmp.addCodePoint(to[0]); + } + } + // ... and also any characters which case-fold to the same. + for (int unfolding : CaseUnfoldingTrie.findSingleCharUnfoldings(algorithm, to)) { + if (unfolding != from && filter.test(from, unfolding)) { + tmp.addCodePoint(unfolding); + } + } + }); + + // We also handle all the characters which might have no case-folding, i.e. they case-fold + // to themselves. + for (Range r : charClass) { + for (int codepoint = r.lo; codepoint <= r.hi; codepoint++) { + for (int unfolding : CaseUnfoldingTrie.findSingleCharUnfoldings(algorithm, codepoint)) { + if (filter.test(codepoint, unfolding)) { + tmp.addCodePoint(unfolding); + } + } + } + } + + // Only include characters that are admissible in the given encoding. + tmp.intersectWith(allowedCodePoints); + + charClass.addSet(tmp.get()); + } + + /** + * Finds any characters in {@code charClass} that have multi-codepoint expansions. + * + * @return a list of pairs, with the first element being the expanded codepoint and the second + * element the expansion + */ + public static List> caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm algorithm, CodePointSetAccumulator charClass) { + List> multiCodePointExpansions = new ArrayList<>(); + + caseFoldCharClass(algorithm, charClass, (from, to) -> { + if (to.length > 1) { + assert !isAscii(from); + multiCodePointExpansions.add(Pair.create(from, to)); + } + }); + + return multiCodePointExpansions; + } + + public static boolean equalsIgnoreCase(CaseFoldData.CaseFoldAlgorithm algorithm, int codePointA, int codePointB) { + int[] foldedA = caseFold(algorithm, codePointA); + int[] foldedB = caseFold(algorithm, codePointB); + if (foldedA == null && foldedB == null) { + return codePointA == codePointB; + } else if (foldedA == null) { + return foldedB.length == 1 && codePointA == foldedB[0]; + } else if (foldedB == null) { + return foldedA.length == 1 && foldedA[0] == codePointB; + } else { + return Arrays.equals(foldedA, foldedB); + } + } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java index e765265d2781..084a866498af 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java @@ -534,7 +534,7 @@ private Term translateUnicodeCharClass(CodePointSet codePointSet, Token token, b * @param token aside from the source sections, the token most importantly contains the set of * code points and strings to be included in the class set */ - public void addClassSet(Token.ClassSet token, CaseFoldTable.CaseFoldingAlgorithm caseUnfoldAlgo) { + public void addClassSet(Token.ClassSet token, CaseFoldData.CaseFoldUnfoldAlgorithm caseUnfoldAlgo) { CodePointSetAccumulator buf = compilationBuffer.getCodePointSetAccumulator1(); ClassSetContents contents = token.getContents(); @@ -552,7 +552,7 @@ public void addClassSet(Token.ClassSet token, CaseFoldTable.CaseFoldingAlgorithm if (caseUnfoldAlgo != null) { buf.clear(); buf.addCodePoint(cp); - CaseFoldTable.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); + CaseFoldData.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); addCharClass(buf.toCodePointSet()); } else { addCharClass(CodePointSet.create(cp)); @@ -565,7 +565,7 @@ public void addClassSet(Token.ClassSet token, CaseFoldTable.CaseFoldingAlgorithm if (caseUnfoldAlgo != null) { buf.clear(); buf.addSet(contents.getCodePointSet()); - CaseFoldTable.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); + CaseFoldData.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); addCharClass(buf.toCodePointSet()); } else { addCharClass(contents.getCodePointSet()); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java index a9aa529d3e62..7b739fdf95b5 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java @@ -75,13 +75,16 @@ public abstract class RegexLexer { protected final String pattern; private final Encoding encoding; private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); + private boolean curCharClassInverted; /** * The index of the next character in {@link #pattern} to be parsed. */ protected int position = 0; protected Map> namedCaptureGroups = null; private int curStartIndex = 0; + private int curCharClassStartIndex = -1; private int charClassCurAtomStartIndex = 0; + private int charClassEmitInvalidRangeAtoms = 0; private int nGroups = 1; private boolean identifiedAllGroups = false; protected final CompilationBuffer compilationBuffer; @@ -250,7 +253,7 @@ public RegexLexer(RegexSource source, CompilationBuffer compilationBuffer) { * Note that the CodePointSet returned by this function has already been case-folded and * negated. */ - protected abstract CodePointSet getPredefinedCharClass(char c); + protected abstract CodePointSet getPredefinedCharClass(char c, boolean inCharClass); /** * The maximum value allowed while parsing bounded quantifiers. Larger values will cause a call @@ -443,30 +446,38 @@ protected void retreat() { } public boolean hasNext() { - if (featureEnabledLineComments()) { - int p; - do { - p = position; + if (!inCharacterClass()) { + if (featureEnabledLineComments()) { + int p; + do { + p = position; + skipWhitespace(); + if (consumingLookahead("#")) { + skipComment('\n'); + } else if (featureEnabledGroupComments() && consumingLookahead("(?#")) { + if (!skipComment(')')) { + handleUnfinishedGroupComment(); + } + } + } while (p != position); + } else if (featureEnabledIgnoreWhiteSpace()) { skipWhitespace(); - if (consumingLookahead("#")) { - skipComment('\n'); - } else if (featureEnabledGroupComments() && consumingLookahead("(?#")) { + } + if (featureEnabledGroupComments()) { + while (consumingLookahead("(?#")) { if (!skipComment(')')) { handleUnfinishedGroupComment(); } } - } while (p != position); - } else if (featureEnabledIgnoreWhiteSpace()) { - skipWhitespace(); - } - if (featureEnabledGroupComments()) { - while (consumingLookahead("(?#")) { - if (!skipComment(')')) { - handleUnfinishedGroupComment(); - } } } - return !atEnd(); + if (atEnd()) { + if (inCharacterClass()) { + throw handleUnmatchedLeftBracket(); + } + return false; + } + return true; } private boolean skipComment(char terminator) { @@ -505,6 +516,10 @@ public int getLastTokenPosition() { return curStartIndex; } + public int getLastCharacterClassBeginPosition() { + return curCharClassStartIndex - 1; + } + protected int getLastAtomPosition() { return Math.max(curStartIndex, charClassCurAtomStartIndex); } @@ -609,6 +624,14 @@ protected boolean atEnd() { return position >= pattern.length(); } + public boolean inCharacterClass() { + return curCharClassStartIndex >= 0; + } + + public boolean isCurCharClassInverted() { + return curCharClassInverted; + } + /** * Sets the {@link com.oracle.truffle.api.source.SourceSection} of a given {@link Token} in * respect of {@link RegexSource#getSource()}. @@ -718,39 +741,35 @@ private void identifyCaptureGroups() throws RegexSyntaxException { position = restoreIndex; } - protected Token charClass(int codePoint) { - if (featureEnabledIgnoreCase()) { - curCharClass.clear(); - curCharClass.appendRange(codePoint, codePoint); - return charClass(false); - } else { - return Token.createCharClass(CodePointSet.create(codePoint), true); - } + protected Token literalChar(int codePoint) { + return Token.createLiteralCharacter(codePoint); } private Token charClass(CodePointSet codePointSet) { if (featureEnabledIgnoreCase()) { curCharClass.clear(); curCharClass.addSet(codePointSet); - return charClass(false); + boolean wasSingleChar = curCharClass.matchesSingleChar(); + if (featureEnabledIgnoreCase()) { + caseFoldUnfold(curCharClass); + } + return Token.createCharClass(curCharClass.toCodePointSet(), wasSingleChar); } else { return Token.createCharClass(codePointSet); } } - private Token charClass(boolean invert) { - boolean wasSingleChar = !invert && curCharClass.matchesSingleChar(); - if (featureEnabledIgnoreCase()) { - caseFoldUnfold(curCharClass); - } - CodePointSet cps = curCharClass.toCodePointSet(); - return Token.createCharClass(invert ? cps.createInverse(encoding) : cps, wasSingleChar); - } - /* lexer */ private Token getNext() throws RegexSyntaxException { final char c = consumeChar(); + if (inCharacterClass()) { + if (c == ']' && (!featureEnabledCharClassFirstBracketIsLiteral() || position != curCharClassStartIndex + (curCharClassInverted ? 2 : 1))) { + curCharClassStartIndex = -1; + return Token.createCharacterClassEnd(); + } + return Token.createCharacterClassAtom(parseCharClassAtom(c)); + } switch (c) { case '.': return Token.createCharClass(getDotCodePointSet()); @@ -765,7 +784,7 @@ private Token getNext() throws RegexSyntaxException { return parseQuantifier(c); case '}': handleUnmatchedRightBrace(); - return charClass(c); + return literalChar(c); case '|': return Token.createAlternation(); case '(': @@ -773,14 +792,19 @@ private Token getNext() throws RegexSyntaxException { case ')': return Token.createGroupEnd(); case '[': - return parseCharClass(); + if (featureEnabledClassSetExpressions()) { + return Token.createClassSetExpression(parseClassSetExpression()); + } + curCharClassStartIndex = position; + curCharClassInverted = consumingLookahead("^"); + return Token.createCharacterClassBegin(); case ']': handleUnmatchedRightBracket(); - return charClass(c); + return literalChar(c); case '\\': return parseEscape(); default: - return charClass(toCodePoint(c)); + return literalChar(toCodePoint(c)); } } @@ -825,7 +849,7 @@ private Token parseEscape() throws RegexSyntaxException { // the case-folding step in the `charClass` method and call `Token::createCharClass` // directly. if (isPredefCharClass(c)) { - return Token.createCharClass(getPredefinedCharClass(c)); + return Token.createCharClass(getPredefinedCharClass(c, false)); } else if (featureEnabledUnicodePropertyEscapes() && (c == 'p' || c == 'P')) { ClassSetContents unicodePropertyContents = parseUnicodeCharacterProperty(c == 'P'); if (featureEnabledClassSetExpressions()) { @@ -835,7 +859,7 @@ private Token parseEscape() throws RegexSyntaxException { return charClass(unicodePropertyContents.getCodePointSet()); } } else { - return charClass(parseEscapeChar(c, false)); + return literalChar(parseEscapeChar(c, false)); } } @@ -1033,23 +1057,6 @@ private int countZeros(int fromIndex) { return countFrom((c) -> c == '0', fromIndex); } - private Token parseCharClass() throws RegexSyntaxException { - if (featureEnabledClassSetExpressions()) { - return Token.createClassSetExpression(parseClassSetExpression()); - } - final boolean invert = consumingLookahead("^"); - curCharClass.clear(); - int startPos = position; - while (!atEnd()) { - final char c = consumeChar(); - if (c == ']' && (!featureEnabledCharClassFirstBracketIsLiteral() || position != startPos + 1)) { - return charClass(invert); - } - parseCharClassRange(c); - } - throw handleUnmatchedLeftBracket(); - } - private ClassSetContents parseCharClassAtomPredefCharClass(char c) throws RegexSyntaxException { if (c == '\\') { if (atEnd()) { @@ -1132,7 +1139,7 @@ private int parseCharClassAtomCodePoint(char c) throws RegexSyntaxException { } } - private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException { + private ClassSetContents parseCharClassAtomInner(char c) throws RegexSyntaxException { ClassSetContents cc = parseCharClassAtomPredefCharClass(c); if (cc != null) { return cc; @@ -1140,45 +1147,45 @@ private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException return ClassSetContents.createCharacter(parseCharClassAtomCodePoint(c)); } - private void parseCharClassRange(char c) throws RegexSyntaxException { + private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException { int startPos = position - 1; charClassCurAtomStartIndex = position - 1; - ClassSetContents firstAtom = parseCharClassAtom(c); + ClassSetContents firstAtom = parseCharClassAtomInner(c); + if (charClassEmitInvalidRangeAtoms > 0) { + charClassEmitInvalidRangeAtoms--; + return firstAtom; + } if (consumingLookahead("-")) { if (atEnd() || lookahead("]")) { - addCharClassAtom(firstAtom); - curCharClass.addRange('-', '-'); + position--; + return firstAtom; } else { char nextC = consumeChar(); charClassCurAtomStartIndex = position - 1; - ClassSetContents secondAtom = parseCharClassAtom(nextC); + ClassSetContents secondAtom = parseCharClassAtomInner(nextC); // Runtime Semantics: CharacterRangeOrUnion(firstAtom, secondAtom) if (!firstAtom.isAllowedInRange() || !secondAtom.isAllowedInRange()) { handleCCRangeWithPredefCharClass(startPos, firstAtom, secondAtom); - addCharClassAtom(firstAtom); - addCharClassAtom(secondAtom); - curCharClass.addRange('-', '-'); + // no syntax error thrown, so we have to emit the range as three separate atoms + position = charClassCurAtomStartIndex - 1; + charClassEmitInvalidRangeAtoms = 2; + return firstAtom; } else { if (secondAtom.getCodePoint() < firstAtom.getCodePoint()) { throw handleCCRangeOutOfOrder(startPos); } else { - curCharClass.addRange(firstAtom.getCodePoint(), secondAtom.getCodePoint()); + return ClassSetContents.createRange(firstAtom.getCodePoint(), secondAtom.getCodePoint()); } } } } else { - addCharClassAtom(firstAtom); + return firstAtom; } } - private void addCharClassAtom(ClassSetContents atom) { - assert atom.isCodePointSetOnly(); - curCharClass.addSet(atom.getCodePointSet()); - } - private ClassSetContents parseEscapeCharClass(char c) throws RegexSyntaxException { if (isPredefCharClass(c)) { - return ClassSetContents.createCharacterClass(getPredefinedCharClass(c)); + return ClassSetContents.createCharacterClass(getPredefinedCharClass(c, true)); } else if (featureEnabledUnicodePropertyEscapes() && (c == 'p' || c == 'P')) { return parseUnicodeCharacterProperty(c == 'P'); } else { @@ -1465,15 +1472,19 @@ private static boolean isPredefCharClass(char c) { return PREDEFINED_CHAR_CLASSES.get(c); } - protected static boolean isDecimalDigit(int c) { + public static boolean isDecimalDigit(int c) { return '0' <= c && c <= '9'; } - protected static boolean isOctalDigit(int c) { + public static boolean isOctalDigit(int c) { return '0' <= c && c <= '7'; } - protected static boolean isHexDigit(int c) { + public static boolean isHexDigit(int c) { return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'; } + + public static boolean isAscii(int c) { + return Integer.compareUnsigned(c, 128) < 0; + } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java index 6bb029e6a3f9..afa570e820d4 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java @@ -71,7 +71,11 @@ public enum Kind { lookAheadAssertionBegin, lookBehindAssertionBegin, groupEnd, + literalChar, charClass, + charClassBegin, + charClassAtom, + charClassEnd, classSet, inlineFlags, conditionalBackreference @@ -87,6 +91,8 @@ public enum Kind { private static final Token ALTERNATION = new Token(Kind.alternation); private static final Token CAPTURE_GROUP_BEGIN = new Token(Kind.captureGroupBegin); private static final Token NON_CAPTURE_GROUP_BEGIN = new Token(Kind.nonCaptureGroupBegin); + private static final Token CHAR_CLASS_BEGIN = new Token(Kind.charClassBegin); + private static final Token CHAR_CLASS_END = new Token(Kind.charClassEnd); private static final Token LOOK_AHEAD_ASSERTION_BEGIN = new LookAheadAssertionBegin(false); private static final Token NEGATIVE_LOOK_AHEAD_ASSERTION_BEGIN = new LookAheadAssertionBegin(true); private static final Token LOOK_BEHIND_ASSERTION_BEGIN = new LookBehindAssertionBegin(false); @@ -157,6 +163,10 @@ public static Quantifier createQuantifier(int min, int max, boolean greedy) { return new Quantifier(min, max, greedy); } + public static LiteralCharacter createLiteralCharacter(int codePoint) { + return new LiteralCharacter(codePoint); + } + public static CharacterClass createCharClass(CodePointSet codePointSet) { return new CharacterClass(codePointSet, false); } @@ -169,6 +179,18 @@ public static ClassSet createClassSetExpression(ClassSetContents contents) { return new ClassSet(contents); } + public static Token createCharacterClassBegin() { + return CHAR_CLASS_BEGIN; + } + + public static Token createCharacterClassAtom(ClassSetContents contents) { + return new CharacterClassAtom(contents); + } + + public static Token createCharacterClassEnd() { + return CHAR_CLASS_END; + } + public static Token createLookAheadAssertionBegin(boolean negated) { return negated ? NEGATIVE_LOOK_AHEAD_ASSERTION_BEGIN : LOOK_AHEAD_ASSERTION_BEGIN; } @@ -359,6 +381,46 @@ public JsonObject toJson() { } } + public static final class LiteralCharacter extends Token { + + private final int codePoint; + + public LiteralCharacter(int codePoint) { + super(Kind.literalChar); + this.codePoint = codePoint; + } + + @TruffleBoundary + @Override + public JsonObject toJson() { + return super.toJson().append(Json.prop("codePoint", codePoint)); + } + + public int getCodePoint() { + return codePoint; + } + } + + public static final class CharacterClassAtom extends Token { + + private final ClassSetContents contents; + + public CharacterClassAtom(ClassSetContents contents) { + super(Kind.charClassAtom); + this.contents = contents; + } + + @TruffleBoundary + @Override + public JsonObject toJson() { + return super.toJson().append(Json.prop("contents", contents)); + } + + public ClassSetContents getContents() { + return contents; + } + } + public static final class CharacterClass extends Token { private final CodePointSet codePointSet; @@ -420,7 +482,7 @@ public BackReference(Token.Kind kind, int[] groupNumbers, boolean namedReference @TruffleBoundary @Override public JsonObject toJson() { - return super.toJson().append(Json.prop("groupNumbers", Arrays.stream(groupNumbers).mapToObj(x -> Json.val(x)))); + return super.toJson().append(Json.prop("groupNumbers", Arrays.stream(groupNumbers).mapToObj(Json::val))); } public int[] getGroupNumbers() { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java index e91b67d0d5e1..07f154078c2f 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java @@ -43,7 +43,7 @@ import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.parser.JSRegexParser; import com.oracle.truffle.regex.tregex.parser.JSRegexValidator; import com.oracle.truffle.regex.tregex.parser.RegexParser; @@ -73,9 +73,9 @@ public RegexParser createParser(RegexLanguage language, RegexSource source, Comp @Override public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) { if (ast.getFlags().isEitherUnicode()) { - return CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptUnicode.getEqualsPredicate(); + return CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode.getEqualsPredicate(); } else { - return CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptNonUnicode.getEqualsPredicate(); + return CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptNonUnicode.getEqualsPredicate(); } } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java new file mode 100644 index 000000000000..c47e6253b6e5 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2023, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.regex.tregex.parser.flavors; + +import org.graalvm.collections.EconomicMap; + +import com.oracle.truffle.regex.charset.CodePointSet; + +final class OracleDBConstants { + + // This map contains the character sets of POSIX character classes like [[:alpha:]] and + // [[:punct:]]. + static final EconomicMap POSIX_CHAR_CLASSES = EconomicMap.create(12); + // \w + static final CodePointSet WORD_CHARACTERS; + + static { + + /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + POSIX_CHAR_CLASSES.put("alpha", CodePointSet.createNoDedup( + 0x000041, 0x00005a, 0x000061, 0x00007a, 0x0000aa, 0x0000aa, 0x0000b5, 0x0000b5, 0x0000ba, 0x0000ba, 0x0000c0, 0x0000d6, 0x0000d8, 0x0000f6, 0x0000f8, 0x0002b8, 0x0002bb, + 0x0002c1, 0x0002c7, 0x0002c7, 0x0002c9, 0x0002cb, 0x0002ce, 0x0002ce, 0x0002d1, 0x0002d1, 0x0002d9, 0x0002d9, 0x0002e0, 0x0002e4, 0x0002ec, 0x0002ec, 0x0002ee, 0x0002ee, + 0x000300, 0x000374, 0x000376, 0x00037d, 0x00037f, 0x000383, 0x000386, 0x000386, 0x000388, 0x0003f5, 0x0003f7, 0x000481, 0x000483, 0x000559, 0x000560, 0x000588, 0x00058b, + 0x00058c, 0x000590, 0x0005af, 0x0005ba, 0x0005ba, 0x0005c5, 0x0005c5, 0x0005c7, 0x0005f2, 0x0005f5, 0x0005ff, 0x000610, 0x00061a, 0x00061d, 0x00061d, 0x000620, 0x00065f, + 0x00066e, 0x0006d3, 0x0006d5, 0x0006dc, 0x0006df, 0x0006e8, 0x0006ea, 0x0006ef, 0x0006fa, 0x0006fc, 0x0006ff, 0x0006ff, 0x00070e, 0x00070e, 0x000710, 0x0007bf, 0x0007ca, + 0x0007f5, 0x0007fa, 0x0007fd, 0x000800, 0x00082f, 0x00083f, 0x00085d, 0x00085f, 0x0008e1, 0x0008e3, 0x000900, 0x000904, 0x00093b, 0x00094e, 0x000963, 0x000971, 0x0009e5, + 0x0009f0, 0x0009f1, 0x0009fc, 0x0009fc, 0x0009fe, 0x000a65, 0x000a70, 0x000a75, 0x000a77, 0x000ae5, 0x000af2, 0x000b65, 0x000b71, 0x000b71, 0x000b78, 0x000be5, 0x000bfb, + 0x000c65, 0x000c70, 0x000c76, 0x000c80, 0x000c83, 0x000c85, 0x000ce5, 0x000cf0, 0x000d4e, 0x000d50, 0x000d57, 0x000d5f, 0x000d65, 0x000d7a, 0x000de5, 0x000df0, 0x000df3, + 0x000df5, 0x000e2e, 0x000e30, 0x000e3e, 0x000e40, 0x000e45, 0x000e47, 0x000e4d, 0x000e5c, 0x000ecf, 0x000eda, 0x000f00, 0x000f18, 0x000f19, 0x000f35, 0x000f35, 0x000f37, + 0x000f37, 0x000f39, 0x000f39, 0x000f3e, 0x000f84, 0x000f86, 0x000fbd, 0x000fc6, 0x000fc6, 0x000fcd, 0x000fcd, 0x000fdb, 0x00103f, 0x001050, 0x00108f, 0x00109a, 0x00109d, + 0x0010a0, 0x0010fa, 0x0010fc, 0x00135f, 0x00137d, 0x00138f, 0x00139a, 0x0013ff, 0x001401, 0x00166c, 0x00166f, 0x00167f, 0x001681, 0x00169a, 0x00169d, 0x0016ea, 0x0016ee, + 0x001734, 0x001737, 0x0017d3, 0x0017d7, 0x0017d7, 0x0017dc, 0x0017df, 0x0017ea, 0x0017ef, 0x0017fa, 0x0017ff, 0x00180b, 0x00180d, 0x00180f, 0x00180f, 0x00181a, 0x00193f, + 0x001941, 0x001943, 0x001950, 0x0019cf, 0x0019db, 0x0019dd, 0x001a00, 0x001a1d, 0x001a20, 0x001a7f, 0x001a8a, 0x001a8f, 0x001a9a, 0x001a9f, 0x001aa7, 0x001aa7, 0x001aae, + 0x001b4f, 0x001b6b, 0x001b73, 0x001b7d, 0x001baf, 0x001bba, 0x001bfb, 0x001c00, 0x001c3a, 0x001c4a, 0x001c4f, 0x001c5a, 0x001c7d, 0x001c80, 0x001cbf, 0x001cc8, 0x001cd2, + 0x001cd4, 0x001fbc, 0x001fbe, 0x001fbe, 0x001fc2, 0x001fcc, 0x001fd0, 0x001fdc, 0x001fe0, 0x001fec, 0x001ff0, 0x001ffc, 0x001fff, 0x001fff, 0x002015, 0x002015, 0x002065, + 0x002065, 0x002071, 0x002073, 0x00208f, 0x00209f, 0x0020c0, 0x0020ff, 0x002102, 0x002102, 0x002107, 0x002107, 0x00210a, 0x002113, 0x002115, 0x002115, 0x002119, 0x00211d, + 0x002124, 0x002124, 0x002128, 0x002128, 0x00212a, 0x00212a, 0x00212c, 0x00212d, 0x00212f, 0x002139, 0x00213c, 0x00213f, 0x002145, 0x002149, 0x00214e, 0x00214e, 0x002160, + 0x002188, 0x00218c, 0x00218f, 0x002400, 0x002421, 0x002427, 0x00243f, 0x00244b, 0x002487, 0x002b74, 0x002b75, 0x002b96, 0x002b97, 0x002c00, 0x002ce4, 0x002ceb, 0x002cf8, + 0x002d00, 0x002d6f, 0x002d71, 0x002dff, 0x002e2f, 0x002e2f, 0x002e50, 0x002fef, 0x002ffc, 0x002fff, 0x003003, 0x003003, 0x003005, 0x003007, 0x00301c, 0x00301c, 0x003021, + 0x00302f, 0x003031, 0x003035, 0x003038, 0x00303c, 0x003040, 0x00309a, 0x00309d, 0x00309f, 0x0030a1, 0x0030fa, 0x0030fc, 0x00318f, 0x003192, 0x0031bf, 0x0031e4, 0x0031ff, + 0x00321f, 0x00321f, 0x003400, 0x004dbf, 0x004e00, 0x00a48f, 0x00a4c7, 0x00a4fd, 0x00a500, 0x00a60c, 0x00a610, 0x00a61f, 0x00a62a, 0x00a672, 0x00a674, 0x00a67d, 0x00a67f, + 0x00a6f1, 0x00a6f8, 0x00a6ff, 0x00a717, 0x00a71f, 0x00a722, 0x00a788, 0x00a78b, 0x00a827, 0x00a82c, 0x00a82f, 0x00a83a, 0x00a873, 0x00a878, 0x00a8cd, 0x00a8da, 0x00a8f7, + 0x00a8fb, 0x00a8fb, 0x00a8fd, 0x00a8ff, 0x00a90a, 0x00a92d, 0x00a930, 0x00a95e, 0x00a960, 0x00a9c0, 0x00a9ce, 0x00a9cf, 0x00a9da, 0x00a9dd, 0x00a9e0, 0x00a9ef, 0x00a9fa, + 0x00aa4f, 0x00aa5a, 0x00aa5b, 0x00aa60, 0x00aa76, 0x00aa7a, 0x00aadd, 0x00aae0, 0x00aaef, 0x00aaf2, 0x00ab5a, 0x00ab5c, 0x00abea, 0x00abec, 0x00abef, 0x00abfa, 0x00d7ff, + 0x00e000, 0x00fb1e, 0x00fb20, 0x00fb28, 0x00fb2a, 0x00fbb1, 0x00fbc2, 0x00fd3d, 0x00fd40, 0x00fdfb, 0x00fdfe, 0x00fe0f, 0x00fe1a, 0x00fe2f, 0x00fe53, 0x00fe53, 0x00fe67, + 0x00fe67, 0x00fe6c, 0x00fefe, 0x00ff00, 0x00ff00, 0x00ff21, 0x00ff3a, 0x00ff3f, 0x00ff3f, 0x00ff41, 0x00ff5a, 0x00ff66, 0x00ffdf, 0x00ffe7, 0x00ffe7, 0x00ffef, 0x00fff8, + 0x00fffd, 0x0100ff, 0x010103, 0x010106, 0x010134, 0x010136, 0x010140, 0x010174, 0x01018f, 0x01018f, 0x01019c, 0x01019f, 0x0101a1, 0x0101cf, 0x0101fd, 0x0102e0, 0x0102fc, + 0x01031f, 0x010324, 0x01039e, 0x0103a0, 0x0103cf, 0x0103d1, 0x01049f, 0x0104aa, 0x01056e, 0x010570, 0x010856, 0x010860, 0x010876, 0x010880, 0x0108a6, 0x0108b0, 0x0108fa, + 0x010900, 0x010915, 0x01091c, 0x01091e, 0x010920, 0x01093e, 0x010940, 0x0109bb, 0x0109be, 0x0109bf, 0x0109d0, 0x0109d1, 0x010a00, 0x010a3f, 0x010a49, 0x010a4f, 0x010a59, + 0x010a7c, 0x010a80, 0x010a9c, 0x010aa0, 0x010ac7, 0x010ac9, 0x010aea, 0x010af7, 0x010b38, 0x010b40, 0x010b57, 0x010b60, 0x010b77, 0x010b80, 0x010b98, 0x010b9d, 0x010ba8, + 0x010bb0, 0x010cf9, 0x010d00, 0x010d2f, 0x010d3a, 0x010e5f, 0x010e7f, 0x010f1c, 0x010f27, 0x010f50, 0x010f5a, 0x011046, 0x01104e, 0x011051, 0x011070, 0x0110ba, 0x0110c2, + 0x0110cc, 0x0110ce, 0x0110ef, 0x0110fa, 0x011135, 0x011144, 0x011173, 0x011176, 0x0111c4, 0x0111c9, 0x0111cc, 0x0111ce, 0x0111cf, 0x0111da, 0x0111da, 0x0111dc, 0x0111dc, + 0x0111e0, 0x0111e0, 0x0111f5, 0x011237, 0x01123e, 0x0112a8, 0x0112aa, 0x0112ef, 0x0112fa, 0x01144a, 0x01145a, 0x01145a, 0x01145c, 0x01145c, 0x01145e, 0x0114c5, 0x0114c7, + 0x0114cf, 0x0114da, 0x0115c0, 0x0115d8, 0x011640, 0x011644, 0x01164f, 0x01165a, 0x01165f, 0x01166d, 0x0116bf, 0x0116ca, 0x01172f, 0x011740, 0x01183a, 0x01183c, 0x0118df, + 0x0118f3, 0x0119e1, 0x0119e3, 0x011a3e, 0x011a47, 0x011a99, 0x011a9d, 0x011a9d, 0x011aa3, 0x011c40, 0x011c46, 0x011c4f, 0x011c6d, 0x011c6f, 0x011c72, 0x011d4f, 0x011d5a, + 0x011d9f, 0x011daa, 0x011ef6, 0x011ef9, 0x011fbf, 0x011ff2, 0x011ffe, 0x012000, 0x01246f, 0x012475, 0x01342f, 0x013439, 0x016a5f, 0x016a6a, 0x016a6d, 0x016a70, 0x016af4, + 0x016af6, 0x016b36, 0x016b40, 0x016b43, 0x016b46, 0x016b4f, 0x016b5a, 0x016b5a, 0x016b62, 0x016e7f, 0x016e9b, 0x016fe1, 0x016fe3, 0x01bc9b, 0x01bc9d, 0x01bc9e, 0x01bca4, + 0x01cfff, 0x01d0f6, 0x01d0ff, 0x01d127, 0x01d128, 0x01d165, 0x01d169, 0x01d16d, 0x01d172, 0x01d17b, 0x01d182, 0x01d185, 0x01d18b, 0x01d1aa, 0x01d1ad, 0x01d1e9, 0x01d1ff, + 0x01d242, 0x01d244, 0x01d246, 0x01d2df, 0x01d2f4, 0x01d2ff, 0x01d357, 0x01d35f, 0x01d379, 0x01d6c0, 0x01d6c2, 0x01d6da, 0x01d6dc, 0x01d6fa, 0x01d6fc, 0x01d714, 0x01d716, + 0x01d734, 0x01d736, 0x01d74e, 0x01d750, 0x01d76e, 0x01d770, 0x01d788, 0x01d78a, 0x01d7a8, 0x01d7aa, 0x01d7c2, 0x01d7c4, 0x01d7cd, 0x01da00, 0x01da36, 0x01da3b, 0x01da6c, + 0x01da75, 0x01da75, 0x01da84, 0x01da84, 0x01da8c, 0x01e13f, 0x01e14a, 0x01e14e, 0x01e150, 0x01e2ef, 0x01e2fa, 0x01e2fe, 0x01e300, 0x01e8c6, 0x01e8d0, 0x01e94f, 0x01e95a, + 0x01e95d, 0x01e960, 0x01ec70, 0x01ecb5, 0x01ed00, 0x01ed3e, 0x01eeef, 0x01eef2, 0x01efff, 0x01f02c, 0x01f02f, 0x01f094, 0x01f09f, 0x01f0af, 0x01f0b0, 0x01f0c0, 0x01f0c0, + 0x01f0d0, 0x01f0d0, 0x01f0f6, 0x01f0ff, 0x01f10d, 0x01f10f, 0x01f16d, 0x01f16f, 0x01f1ad, 0x01f1e5, 0x01f203, 0x01f20f, 0x01f23c, 0x01f23f, 0x01f249, 0x01f24f, 0x01f252, + 0x01f25f, 0x01f266, 0x01f2ff, 0x01f6d6, 0x01f6df, 0x01f6ed, 0x01f6ef, 0x01f6fb, 0x01f6ff, 0x01f774, 0x01f77f, 0x01f7d9, 0x01f7df, 0x01f7ec, 0x01f7ff, 0x01f80c, 0x01f80f, + 0x01f848, 0x01f84f, 0x01f85a, 0x01f85f, 0x01f888, 0x01f88f, 0x01f8ae, 0x01f8ff, 0x01f90c, 0x01f90c, 0x01f972, 0x01f972, 0x01f977, 0x01f979, 0x01f9a3, 0x01f9a4, 0x01f9ab, + 0x01f9ad, 0x01f9cb, 0x01f9cc, 0x01fa54, 0x01fa5f, 0x01fa6e, 0x01fa6f, 0x01fa74, 0x01fa77, 0x01fa7b, 0x01fa7f, 0x01fa83, 0x01fa8f, 0x01fa96, 0x0e0000, 0x0e0002, 0x0e001f, + 0x0e0080, 0x10fffd)); + + POSIX_CHAR_CLASSES.put("blank", CodePointSet.createNoDedup( + 0x000020, 0x000020, 0x001680, 0x001680, 0x002000, 0x00200a, 0x002028, 0x002029, 0x00202f, 0x00202f, 0x00205f, 0x00205f, 0x003000, 0x003000)); + + POSIX_CHAR_CLASSES.put("cntrl", CodePointSet.createNoDedup( + 0x000000, 0x00001f, 0x00007f, 0x00009f, 0x0000ad, 0x0000ad, 0x000600, 0x000605, 0x00061c, 0x00061c, 0x0006dd, 0x0006dd, 0x00070f, 0x00070f, 0x0008e2, 0x0008e2, 0x00180e, + 0x00180e, 0x00200b, 0x00200f, 0x00202a, 0x00202e, 0x002060, 0x002064, 0x002066, 0x00206f, 0x00feff, 0x00feff, 0x00fff9, 0x00fffb, 0x0110bd, 0x0110bd, 0x0110cd, 0x0110cd, + 0x013430, 0x013438, 0x01bca0, 0x01bca3, 0x01d173, 0x01d17a, 0x0e0001, 0x0e0001, 0x0e0020, 0x0e007f)); + + POSIX_CHAR_CLASSES.put("digit", CodePointSet.createNoDedup( + 0x000030, 0x000039, 0x000660, 0x000669, 0x0006f0, 0x0006f9, 0x0007c0, 0x0007c9, 0x000966, 0x00096f, 0x0009e6, 0x0009ef, 0x000a66, 0x000a6f, 0x000ae6, 0x000aef, 0x000b66, + 0x000b6f, 0x000be6, 0x000bef, 0x000c66, 0x000c6f, 0x000ce6, 0x000cef, 0x000d66, 0x000d6f, 0x000de6, 0x000def, 0x000e50, 0x000e59, 0x000ed0, 0x000ed9, 0x000f20, 0x000f29, + 0x001040, 0x001049, 0x001090, 0x001099, 0x0016ee, 0x0016f0, 0x0017e0, 0x0017e9, 0x001810, 0x001819, 0x001946, 0x00194f, 0x0019d0, 0x0019d9, 0x001a80, 0x001a89, 0x001a90, + 0x001a99, 0x001b50, 0x001b59, 0x001bb0, 0x001bb9, 0x001c40, 0x001c49, 0x001c50, 0x001c59, 0x002160, 0x002182, 0x002185, 0x002188, 0x003007, 0x003007, 0x003021, 0x003029, + 0x003038, 0x00303a, 0x00a620, 0x00a629, 0x00a6e6, 0x00a6ef, 0x00a8d0, 0x00a8d9, 0x00a900, 0x00a909, 0x00a9d0, 0x00a9d9, 0x00a9f0, 0x00a9f9, 0x00aa50, 0x00aa59, 0x00abf0, + 0x00abf9, 0x00ff10, 0x00ff19, 0x010140, 0x010174, 0x010341, 0x010341, 0x01034a, 0x01034a, 0x0103d1, 0x0103d5, 0x0104a0, 0x0104a9, 0x010d30, 0x010d39, 0x011066, 0x01106f, + 0x0110f0, 0x0110f9, 0x011136, 0x01113f, 0x0111d0, 0x0111d9, 0x0112f0, 0x0112f9, 0x011450, 0x011459, 0x0114d0, 0x0114d9, 0x011650, 0x011659, 0x0116c0, 0x0116c9, 0x011730, + 0x011739, 0x0118e0, 0x0118e9, 0x011c50, 0x011c59, 0x011d50, 0x011d59, 0x011da0, 0x011da9, 0x012400, 0x01246e, 0x016a60, 0x016a69, 0x016b50, 0x016b59, 0x01d7ce, 0x01d7ff, + 0x01e140, 0x01e149, 0x01e2f0, 0x01e2f9, 0x01e950, 0x01e959)); + + POSIX_CHAR_CLASSES.put("graph", CodePointSet.createNoDedup( + 0x000021, 0x00007e, 0x0000a0, 0x0000ac, 0x0000ae, 0x0000b1, 0x0000b4, 0x0000b8, 0x0000ba, 0x0000bb, 0x0000bf, 0x0005ff, 0x000606, 0x00061b, 0x00061d, 0x0006dc, 0x0006de, + 0x00070e, 0x000710, 0x0008e1, 0x0008e3, 0x0009f3, 0x0009fa, 0x000b71, 0x000b78, 0x000bef, 0x000bf3, 0x000c77, 0x000c7f, 0x000d57, 0x000d5f, 0x000d6f, 0x000d79, 0x000f29, + 0x000f34, 0x001368, 0x00137d, 0x00167f, 0x001681, 0x0017ef, 0x0017fa, 0x00180d, 0x00180f, 0x0019d9, 0x0019db, 0x001fff, 0x002010, 0x002027, 0x002030, 0x00205e, 0x002065, + 0x002065, 0x002071, 0x002073, 0x00207a, 0x00207f, 0x00208a, 0x00212a, 0x00212c, 0x00214f, 0x002160, 0x002188, 0x00218a, 0x002487, 0x00249c, 0x0024e9, 0x002500, 0x002775, + 0x002794, 0x002cfc, 0x002cfe, 0x002fff, 0x003001, 0x00321f, 0x00322a, 0x003247, 0x003250, 0x003250, 0x003260, 0x00327f, 0x00328a, 0x0032b0, 0x0032c0, 0x00a82f, 0x00a836, + 0x00d7ff, 0x00e000, 0x00fefe, 0x00ff00, 0x00fff8, 0x00fffc, 0x010106, 0x010134, 0x010174, 0x010179, 0x010189, 0x01018c, 0x0102e0, 0x0102fc, 0x01031f, 0x010324, 0x010857, + 0x010860, 0x010878, 0x010880, 0x0108a6, 0x0108b0, 0x0108fa, 0x010900, 0x010915, 0x01091c, 0x0109bb, 0x0109be, 0x0109bf, 0x0109d0, 0x0109d1, 0x010a00, 0x010a3f, 0x010a49, + 0x010a7c, 0x010a7f, 0x010a9c, 0x010aa0, 0x010aea, 0x010af0, 0x010b57, 0x010b60, 0x010b77, 0x010b80, 0x010ba8, 0x010bb0, 0x010cf9, 0x010d00, 0x010e5f, 0x010e7f, 0x010f1c, + 0x010f27, 0x010f50, 0x010f55, 0x011051, 0x011066, 0x0110bc, 0x0110be, 0x0110cc, 0x0110ce, 0x0111e0, 0x0111f5, 0x011739, 0x01173c, 0x0118e9, 0x0118f3, 0x011c59, 0x011c6d, + 0x011fbf, 0x011fd5, 0x01342f, 0x013439, 0x016b5a, 0x016b62, 0x016e7f, 0x016e97, 0x01bc9f, 0x01bca4, 0x01d172, 0x01d17b, 0x01d2df, 0x01d2f4, 0x01d35f, 0x01d379, 0x01e8c6, + 0x01e8d0, 0x01ec70, 0x01ecac, 0x01ecac, 0x01ecb0, 0x01ecb0, 0x01ecb5, 0x01ed00, 0x01ed2e, 0x01ed2e, 0x01ed3e, 0x01f0ff, 0x01f10d, 0x0e0000, 0x0e0002, 0x0e001f, 0x0e0080, + 0x10fffd)); + + POSIX_CHAR_CLASSES.put("lower", CodePointSet.createNoDedup( + 0x000061, 0x00007a, 0x0000b5, 0x0000b5, 0x0000df, 0x0000f6, 0x0000f8, 0x0000ff, 0x000101, 0x000101, 0x000103, 0x000103, 0x000105, 0x000105, 0x000107, 0x000107, 0x000109, + 0x000109, 0x00010b, 0x00010b, 0x00010d, 0x00010d, 0x00010f, 0x00010f, 0x000111, 0x000111, 0x000113, 0x000113, 0x000115, 0x000115, 0x000117, 0x000117, 0x000119, 0x000119, + 0x00011b, 0x00011b, 0x00011d, 0x00011d, 0x00011f, 0x00011f, 0x000121, 0x000121, 0x000123, 0x000123, 0x000125, 0x000125, 0x000127, 0x000127, 0x000129, 0x000129, 0x00012b, + 0x00012b, 0x00012d, 0x00012d, 0x00012f, 0x00012f, 0x000131, 0x000131, 0x000133, 0x000133, 0x000135, 0x000135, 0x000137, 0x000138, 0x00013a, 0x00013a, 0x00013c, 0x00013c, + 0x00013e, 0x00013e, 0x000140, 0x000140, 0x000142, 0x000142, 0x000144, 0x000144, 0x000146, 0x000146, 0x000148, 0x000149, 0x00014b, 0x00014b, 0x00014d, 0x00014d, 0x00014f, + 0x00014f, 0x000151, 0x000151, 0x000153, 0x000153, 0x000155, 0x000155, 0x000157, 0x000157, 0x000159, 0x000159, 0x00015b, 0x00015b, 0x00015d, 0x00015d, 0x00015f, 0x00015f, + 0x000161, 0x000161, 0x000163, 0x000163, 0x000165, 0x000165, 0x000167, 0x000167, 0x000169, 0x000169, 0x00016b, 0x00016b, 0x00016d, 0x00016d, 0x00016f, 0x00016f, 0x000171, + 0x000171, 0x000173, 0x000173, 0x000175, 0x000175, 0x000177, 0x000177, 0x00017a, 0x00017a, 0x00017c, 0x00017c, 0x00017e, 0x000180, 0x000183, 0x000183, 0x000185, 0x000185, + 0x000188, 0x000188, 0x00018c, 0x00018d, 0x000192, 0x000192, 0x000195, 0x000195, 0x000199, 0x00019b, 0x00019e, 0x00019e, 0x0001a1, 0x0001a1, 0x0001a3, 0x0001a3, 0x0001a5, + 0x0001a5, 0x0001a8, 0x0001a8, 0x0001aa, 0x0001ab, 0x0001ad, 0x0001ad, 0x0001b0, 0x0001b0, 0x0001b4, 0x0001b4, 0x0001b6, 0x0001b6, 0x0001b9, 0x0001ba, 0x0001bd, 0x0001bf, + 0x0001c5, 0x0001c6, 0x0001c8, 0x0001c9, 0x0001cb, 0x0001cc, 0x0001ce, 0x0001ce, 0x0001d0, 0x0001d0, 0x0001d2, 0x0001d2, 0x0001d4, 0x0001d4, 0x0001d6, 0x0001d6, 0x0001d8, + 0x0001d8, 0x0001da, 0x0001da, 0x0001dc, 0x0001dd, 0x0001df, 0x0001df, 0x0001e1, 0x0001e1, 0x0001e3, 0x0001e3, 0x0001e5, 0x0001e5, 0x0001e7, 0x0001e7, 0x0001e9, 0x0001e9, + 0x0001eb, 0x0001eb, 0x0001ed, 0x0001ed, 0x0001ef, 0x0001f0, 0x0001f2, 0x0001f3, 0x0001f5, 0x0001f5, 0x0001f9, 0x0001f9, 0x0001fb, 0x0001fb, 0x0001fd, 0x0001fd, 0x0001ff, + 0x0001ff, 0x000201, 0x000201, 0x000203, 0x000203, 0x000205, 0x000205, 0x000207, 0x000207, 0x000209, 0x000209, 0x00020b, 0x00020b, 0x00020d, 0x00020d, 0x00020f, 0x00020f, + 0x000211, 0x000211, 0x000213, 0x000213, 0x000215, 0x000215, 0x000217, 0x000217, 0x000219, 0x000219, 0x00021b, 0x00021b, 0x00021d, 0x00021d, 0x00021f, 0x00021f, 0x000221, + 0x000221, 0x000223, 0x000223, 0x000225, 0x000225, 0x000227, 0x000227, 0x000229, 0x000229, 0x00022b, 0x00022b, 0x00022d, 0x00022d, 0x00022f, 0x00022f, 0x000231, 0x000231, + 0x000233, 0x000239, 0x00023c, 0x00023c, 0x00023f, 0x000240, 0x000242, 0x000242, 0x000247, 0x000247, 0x000249, 0x000249, 0x00024b, 0x00024b, 0x00024d, 0x00024d, 0x00024f, + 0x000293, 0x000295, 0x0002af, 0x000345, 0x000345, 0x000371, 0x000371, 0x000373, 0x000373, 0x000377, 0x000377, 0x00037b, 0x00037d, 0x000390, 0x000390, 0x0003ac, 0x0003ce, + 0x0003d0, 0x0003d1, 0x0003d5, 0x0003d7, 0x0003d9, 0x0003d9, 0x0003db, 0x0003db, 0x0003dd, 0x0003dd, 0x0003df, 0x0003df, 0x0003e1, 0x0003e1, 0x0003e3, 0x0003e3, 0x0003e5, + 0x0003e5, 0x0003e7, 0x0003e7, 0x0003e9, 0x0003e9, 0x0003eb, 0x0003eb, 0x0003ed, 0x0003ed, 0x0003ef, 0x0003f3, 0x0003f5, 0x0003f5, 0x0003f8, 0x0003f8, 0x0003fb, 0x0003fc, + 0x000430, 0x00045f, 0x000461, 0x000461, 0x000463, 0x000463, 0x000465, 0x000465, 0x000467, 0x000467, 0x000469, 0x000469, 0x00046b, 0x00046b, 0x00046d, 0x00046d, 0x00046f, + 0x00046f, 0x000471, 0x000471, 0x000473, 0x000473, 0x000475, 0x000475, 0x000477, 0x000477, 0x000479, 0x000479, 0x00047b, 0x00047b, 0x00047d, 0x00047d, 0x00047f, 0x00047f, + 0x000481, 0x000481, 0x00048b, 0x00048b, 0x00048d, 0x00048d, 0x00048f, 0x00048f, 0x000491, 0x000491, 0x000493, 0x000493, 0x000495, 0x000495, 0x000497, 0x000497, 0x000499, + 0x000499, 0x00049b, 0x00049b, 0x00049d, 0x00049d, 0x00049f, 0x00049f, 0x0004a1, 0x0004a1, 0x0004a3, 0x0004a3, 0x0004a5, 0x0004a5, 0x0004a7, 0x0004a7, 0x0004a9, 0x0004a9, + 0x0004ab, 0x0004ab, 0x0004ad, 0x0004ad, 0x0004af, 0x0004af, 0x0004b1, 0x0004b1, 0x0004b3, 0x0004b3, 0x0004b5, 0x0004b5, 0x0004b7, 0x0004b7, 0x0004b9, 0x0004b9, 0x0004bb, + 0x0004bb, 0x0004bd, 0x0004bd, 0x0004bf, 0x0004bf, 0x0004c2, 0x0004c2, 0x0004c4, 0x0004c4, 0x0004c6, 0x0004c6, 0x0004c8, 0x0004c8, 0x0004ca, 0x0004ca, 0x0004cc, 0x0004cc, + 0x0004ce, 0x0004cf, 0x0004d1, 0x0004d1, 0x0004d3, 0x0004d3, 0x0004d5, 0x0004d5, 0x0004d7, 0x0004d7, 0x0004d9, 0x0004d9, 0x0004db, 0x0004db, 0x0004dd, 0x0004dd, 0x0004df, + 0x0004df, 0x0004e1, 0x0004e1, 0x0004e3, 0x0004e3, 0x0004e5, 0x0004e5, 0x0004e7, 0x0004e7, 0x0004e9, 0x0004e9, 0x0004eb, 0x0004eb, 0x0004ed, 0x0004ed, 0x0004ef, 0x0004ef, + 0x0004f1, 0x0004f1, 0x0004f3, 0x0004f3, 0x0004f5, 0x0004f5, 0x0004f7, 0x0004f7, 0x0004f9, 0x0004f9, 0x0004fb, 0x0004fb, 0x0004fd, 0x0004fd, 0x0004ff, 0x0004ff, 0x000501, + 0x000501, 0x000503, 0x000503, 0x000505, 0x000505, 0x000507, 0x000507, 0x000509, 0x000509, 0x00050b, 0x00050b, 0x00050d, 0x00050d, 0x00050f, 0x00050f, 0x000511, 0x000511, + 0x000513, 0x000513, 0x000515, 0x000515, 0x000517, 0x000517, 0x000519, 0x000519, 0x00051b, 0x00051b, 0x00051d, 0x00051d, 0x00051f, 0x00051f, 0x000521, 0x000521, 0x000523, + 0x000523, 0x000525, 0x000525, 0x000527, 0x000527, 0x000529, 0x000529, 0x00052b, 0x00052b, 0x00052d, 0x00052d, 0x00052f, 0x00052f, 0x000560, 0x000588, 0x0010d0, 0x0010fa, + 0x0010fd, 0x0010ff, 0x0013f8, 0x0013fd, 0x001c80, 0x001c88, 0x001d00, 0x001d2b, 0x001d6b, 0x001d77, 0x001d79, 0x001d9a, 0x001e01, 0x001e01, 0x001e03, 0x001e03, 0x001e05, + 0x001e05, 0x001e07, 0x001e07, 0x001e09, 0x001e09, 0x001e0b, 0x001e0b, 0x001e0d, 0x001e0d, 0x001e0f, 0x001e0f, 0x001e11, 0x001e11, 0x001e13, 0x001e13, 0x001e15, 0x001e15, + 0x001e17, 0x001e17, 0x001e19, 0x001e19, 0x001e1b, 0x001e1b, 0x001e1d, 0x001e1d, 0x001e1f, 0x001e1f, 0x001e21, 0x001e21, 0x001e23, 0x001e23, 0x001e25, 0x001e25, 0x001e27, + 0x001e27, 0x001e29, 0x001e29, 0x001e2b, 0x001e2b, 0x001e2d, 0x001e2d, 0x001e2f, 0x001e2f, 0x001e31, 0x001e31, 0x001e33, 0x001e33, 0x001e35, 0x001e35, 0x001e37, 0x001e37, + 0x001e39, 0x001e39, 0x001e3b, 0x001e3b, 0x001e3d, 0x001e3d, 0x001e3f, 0x001e3f, 0x001e41, 0x001e41, 0x001e43, 0x001e43, 0x001e45, 0x001e45, 0x001e47, 0x001e47, 0x001e49, + 0x001e49, 0x001e4b, 0x001e4b, 0x001e4d, 0x001e4d, 0x001e4f, 0x001e4f, 0x001e51, 0x001e51, 0x001e53, 0x001e53, 0x001e55, 0x001e55, 0x001e57, 0x001e57, 0x001e59, 0x001e59, + 0x001e5b, 0x001e5b, 0x001e5d, 0x001e5d, 0x001e5f, 0x001e5f, 0x001e61, 0x001e61, 0x001e63, 0x001e63, 0x001e65, 0x001e65, 0x001e67, 0x001e67, 0x001e69, 0x001e69, 0x001e6b, + 0x001e6b, 0x001e6d, 0x001e6d, 0x001e6f, 0x001e6f, 0x001e71, 0x001e71, 0x001e73, 0x001e73, 0x001e75, 0x001e75, 0x001e77, 0x001e77, 0x001e79, 0x001e79, 0x001e7b, 0x001e7b, + 0x001e7d, 0x001e7d, 0x001e7f, 0x001e7f, 0x001e81, 0x001e81, 0x001e83, 0x001e83, 0x001e85, 0x001e85, 0x001e87, 0x001e87, 0x001e89, 0x001e89, 0x001e8b, 0x001e8b, 0x001e8d, + 0x001e8d, 0x001e8f, 0x001e8f, 0x001e91, 0x001e91, 0x001e93, 0x001e93, 0x001e95, 0x001e9d, 0x001e9f, 0x001e9f, 0x001ea1, 0x001ea1, 0x001ea3, 0x001ea3, 0x001ea5, 0x001ea5, + 0x001ea7, 0x001ea7, 0x001ea9, 0x001ea9, 0x001eab, 0x001eab, 0x001ead, 0x001ead, 0x001eaf, 0x001eaf, 0x001eb1, 0x001eb1, 0x001eb3, 0x001eb3, 0x001eb5, 0x001eb5, 0x001eb7, + 0x001eb7, 0x001eb9, 0x001eb9, 0x001ebb, 0x001ebb, 0x001ebd, 0x001ebd, 0x001ebf, 0x001ebf, 0x001ec1, 0x001ec1, 0x001ec3, 0x001ec3, 0x001ec5, 0x001ec5, 0x001ec7, 0x001ec7, + 0x001ec9, 0x001ec9, 0x001ecb, 0x001ecb, 0x001ecd, 0x001ecd, 0x001ecf, 0x001ecf, 0x001ed1, 0x001ed1, 0x001ed3, 0x001ed3, 0x001ed5, 0x001ed5, 0x001ed7, 0x001ed7, 0x001ed9, + 0x001ed9, 0x001edb, 0x001edb, 0x001edd, 0x001edd, 0x001edf, 0x001edf, 0x001ee1, 0x001ee1, 0x001ee3, 0x001ee3, 0x001ee5, 0x001ee5, 0x001ee7, 0x001ee7, 0x001ee9, 0x001ee9, + 0x001eeb, 0x001eeb, 0x001eed, 0x001eed, 0x001eef, 0x001eef, 0x001ef1, 0x001ef1, 0x001ef3, 0x001ef3, 0x001ef5, 0x001ef5, 0x001ef7, 0x001ef7, 0x001ef9, 0x001ef9, 0x001efb, + 0x001efb, 0x001efd, 0x001efd, 0x001eff, 0x001f07, 0x001f10, 0x001f15, 0x001f20, 0x001f27, 0x001f30, 0x001f37, 0x001f40, 0x001f45, 0x001f50, 0x001f57, 0x001f60, 0x001f67, + 0x001f70, 0x001f7d, 0x001f80, 0x001f87, 0x001f90, 0x001f97, 0x001fa0, 0x001fa7, 0x001fb0, 0x001fb4, 0x001fb6, 0x001fb7, 0x001fbe, 0x001fbe, 0x001fc2, 0x001fc4, 0x001fc6, + 0x001fc7, 0x001fd0, 0x001fd3, 0x001fd6, 0x001fd7, 0x001fe0, 0x001fe7, 0x001ff2, 0x001ff4, 0x001ff6, 0x001ff7, 0x00207f, 0x00207f, 0x00210a, 0x00210a, 0x00210e, 0x00210f, + 0x002113, 0x002113, 0x00212f, 0x00212f, 0x002134, 0x002134, 0x002139, 0x002139, 0x00213c, 0x00213d, 0x002146, 0x002149, 0x00214e, 0x00214e, 0x002170, 0x00217f, 0x002184, + 0x002184, 0x0024d0, 0x0024e9, 0x002c30, 0x002c5e, 0x002c61, 0x002c61, 0x002c65, 0x002c66, 0x002c68, 0x002c68, 0x002c6a, 0x002c6a, 0x002c6c, 0x002c6c, 0x002c71, 0x002c71, + 0x002c73, 0x002c74, 0x002c76, 0x002c7b, 0x002c81, 0x002c81, 0x002c83, 0x002c83, 0x002c85, 0x002c85, 0x002c87, 0x002c87, 0x002c89, 0x002c89, 0x002c8b, 0x002c8b, 0x002c8d, + 0x002c8d, 0x002c8f, 0x002c8f, 0x002c91, 0x002c91, 0x002c93, 0x002c93, 0x002c95, 0x002c95, 0x002c97, 0x002c97, 0x002c99, 0x002c99, 0x002c9b, 0x002c9b, 0x002c9d, 0x002c9d, + 0x002c9f, 0x002c9f, 0x002ca1, 0x002ca1, 0x002ca3, 0x002ca3, 0x002ca5, 0x002ca5, 0x002ca7, 0x002ca7, 0x002ca9, 0x002ca9, 0x002cab, 0x002cab, 0x002cad, 0x002cad, 0x002caf, + 0x002caf, 0x002cb1, 0x002cb1, 0x002cb3, 0x002cb3, 0x002cb5, 0x002cb5, 0x002cb7, 0x002cb7, 0x002cb9, 0x002cb9, 0x002cbb, 0x002cbb, 0x002cbd, 0x002cbd, 0x002cbf, 0x002cbf, + 0x002cc1, 0x002cc1, 0x002cc3, 0x002cc3, 0x002cc5, 0x002cc5, 0x002cc7, 0x002cc7, 0x002cc9, 0x002cc9, 0x002ccb, 0x002ccb, 0x002ccd, 0x002ccd, 0x002ccf, 0x002ccf, 0x002cd1, + 0x002cd1, 0x002cd3, 0x002cd3, 0x002cd5, 0x002cd5, 0x002cd7, 0x002cd7, 0x002cd9, 0x002cd9, 0x002cdb, 0x002cdb, 0x002cdd, 0x002cdd, 0x002cdf, 0x002cdf, 0x002ce1, 0x002ce1, + 0x002ce3, 0x002ce4, 0x002cec, 0x002cec, 0x002cee, 0x002cee, 0x002cf3, 0x002cf3, 0x002d00, 0x002d25, 0x002d27, 0x002d27, 0x002d2d, 0x002d2d, 0x00a641, 0x00a641, 0x00a643, + 0x00a643, 0x00a645, 0x00a645, 0x00a647, 0x00a647, 0x00a649, 0x00a649, 0x00a64b, 0x00a64b, 0x00a64d, 0x00a64d, 0x00a64f, 0x00a64f, 0x00a651, 0x00a651, 0x00a653, 0x00a653, + 0x00a655, 0x00a655, 0x00a657, 0x00a657, 0x00a659, 0x00a659, 0x00a65b, 0x00a65b, 0x00a65d, 0x00a65d, 0x00a65f, 0x00a65f, 0x00a661, 0x00a661, 0x00a663, 0x00a663, 0x00a665, + 0x00a665, 0x00a667, 0x00a667, 0x00a669, 0x00a669, 0x00a66b, 0x00a66b, 0x00a66d, 0x00a66d, 0x00a681, 0x00a681, 0x00a683, 0x00a683, 0x00a685, 0x00a685, 0x00a687, 0x00a687, + 0x00a689, 0x00a689, 0x00a68b, 0x00a68b, 0x00a68d, 0x00a68d, 0x00a68f, 0x00a68f, 0x00a691, 0x00a691, 0x00a693, 0x00a693, 0x00a695, 0x00a695, 0x00a697, 0x00a697, 0x00a699, + 0x00a699, 0x00a69b, 0x00a69b, 0x00a723, 0x00a723, 0x00a725, 0x00a725, 0x00a727, 0x00a727, 0x00a729, 0x00a729, 0x00a72b, 0x00a72b, 0x00a72d, 0x00a72d, 0x00a72f, 0x00a731, + 0x00a733, 0x00a733, 0x00a735, 0x00a735, 0x00a737, 0x00a737, 0x00a739, 0x00a739, 0x00a73b, 0x00a73b, 0x00a73d, 0x00a73d, 0x00a73f, 0x00a73f, 0x00a741, 0x00a741, 0x00a743, + 0x00a743, 0x00a745, 0x00a745, 0x00a747, 0x00a747, 0x00a749, 0x00a749, 0x00a74b, 0x00a74b, 0x00a74d, 0x00a74d, 0x00a74f, 0x00a74f, 0x00a751, 0x00a751, 0x00a753, 0x00a753, + 0x00a755, 0x00a755, 0x00a757, 0x00a757, 0x00a759, 0x00a759, 0x00a75b, 0x00a75b, 0x00a75d, 0x00a75d, 0x00a75f, 0x00a75f, 0x00a761, 0x00a761, 0x00a763, 0x00a763, 0x00a765, + 0x00a765, 0x00a767, 0x00a767, 0x00a769, 0x00a769, 0x00a76b, 0x00a76b, 0x00a76d, 0x00a76d, 0x00a76f, 0x00a76f, 0x00a771, 0x00a778, 0x00a77a, 0x00a77a, 0x00a77c, 0x00a77c, + 0x00a77f, 0x00a77f, 0x00a781, 0x00a781, 0x00a783, 0x00a783, 0x00a785, 0x00a785, 0x00a787, 0x00a787, 0x00a78c, 0x00a78c, 0x00a78e, 0x00a78e, 0x00a791, 0x00a791, 0x00a793, + 0x00a795, 0x00a797, 0x00a797, 0x00a799, 0x00a799, 0x00a79b, 0x00a79b, 0x00a79d, 0x00a79d, 0x00a79f, 0x00a79f, 0x00a7a1, 0x00a7a1, 0x00a7a3, 0x00a7a3, 0x00a7a5, 0x00a7a5, + 0x00a7a7, 0x00a7a7, 0x00a7a9, 0x00a7a9, 0x00a7af, 0x00a7af, 0x00a7b5, 0x00a7b5, 0x00a7b7, 0x00a7b7, 0x00a7b9, 0x00a7b9, 0x00a7bb, 0x00a7bb, 0x00a7bd, 0x00a7bd, 0x00a7bf, + 0x00a7bf, 0x00a7c3, 0x00a7c3, 0x00a7fa, 0x00a7fa, 0x00ab30, 0x00ab5a, 0x00ab60, 0x00ab67, 0x00ab70, 0x00abbf, 0x00fb00, 0x00fb06, 0x00fb13, 0x00fb17, 0x00ff41, 0x00ff5a, + 0x010428, 0x01044f, 0x0104d8, 0x0104fb, 0x010cc0, 0x010cf2, 0x0118c0, 0x0118df, 0x016e60, 0x016e7f, 0x01d41a, 0x01d433, 0x01d44e, 0x01d454, 0x01d456, 0x01d467, 0x01d482, + 0x01d49b, 0x01d4b6, 0x01d4b9, 0x01d4bb, 0x01d4bb, 0x01d4bd, 0x01d4c3, 0x01d4c5, 0x01d4cf, 0x01d4ea, 0x01d503, 0x01d51e, 0x01d537, 0x01d552, 0x01d56b, 0x01d586, 0x01d59f, + 0x01d5ba, 0x01d5d3, 0x01d5ee, 0x01d607, 0x01d622, 0x01d63b, 0x01d656, 0x01d66f, 0x01d68a, 0x01d6a5, 0x01d6c2, 0x01d6da, 0x01d6dc, 0x01d6e1, 0x01d6fc, 0x01d714, 0x01d716, + 0x01d71b, 0x01d736, 0x01d74e, 0x01d750, 0x01d755, 0x01d770, 0x01d788, 0x01d78a, 0x01d78f, 0x01d7aa, 0x01d7c2, 0x01d7c4, 0x01d7c9, 0x01d7cb, 0x01d7cb, 0x01e922, 0x01e943)); + + POSIX_CHAR_CLASSES.put("print", CodePointSet.createNoDedup( + 0x000020, 0x00007e, 0x0000a0, 0x0000ac, 0x0000ae, 0x0005ff, 0x000606, 0x00061b, 0x00061d, 0x0006dc, 0x0006de, 0x00070e, 0x000710, 0x0008e1, 0x0008e3, 0x00180d, 0x00180f, + 0x00200a, 0x002010, 0x002029, 0x00202f, 0x00205f, 0x002065, 0x002065, 0x002070, 0x00d7ff, 0x00e000, 0x00fefe, 0x00ff00, 0x00fff8, 0x00fffc, 0x0110bc, 0x0110be, 0x0110cc, + 0x0110ce, 0x01342f, 0x013439, 0x01bc9f, 0x01bca4, 0x01d172, 0x01d17b, 0x0e0000, 0x0e0002, 0x0e001f, 0x0e0080, 0x10fffd)); + + POSIX_CHAR_CLASSES.put("punct", CodePointSet.createNoDedup( + 0x000021, 0x00002f, 0x00003a, 0x000040, 0x00005b, 0x000060, 0x00007b, 0x00007e, 0x0000a0, 0x0000a9, 0x0000ab, 0x0000ac, 0x0000ae, 0x0000b1, 0x0000b4, 0x0000b4, 0x0000b6, + 0x0000b8, 0x0000bb, 0x0000bb, 0x0000bf, 0x0000bf, 0x0000d7, 0x0000d7, 0x0000f7, 0x0000f7, 0x0002b9, 0x0002ba, 0x0002c2, 0x0002c6, 0x0002c8, 0x0002c8, 0x0002cc, 0x0002cd, + 0x0002cf, 0x0002d0, 0x0002d2, 0x0002d8, 0x0002da, 0x0002df, 0x0002e5, 0x0002eb, 0x0002ed, 0x0002ed, 0x0002ef, 0x0002ff, 0x000375, 0x000375, 0x00037e, 0x00037e, 0x000384, + 0x000385, 0x000387, 0x000387, 0x0003f6, 0x0003f6, 0x000482, 0x000482, 0x00055a, 0x00055f, 0x000589, 0x00058a, 0x00058d, 0x00058f, 0x0005b0, 0x0005b9, 0x0005bb, 0x0005c4, + 0x0005c6, 0x0005c6, 0x0005f3, 0x0005f4, 0x000606, 0x00060f, 0x00061b, 0x00061b, 0x00061e, 0x00061f, 0x00066a, 0x00066d, 0x0006d4, 0x0006d4, 0x0006de, 0x0006de, 0x0006e9, + 0x0006e9, 0x0006fd, 0x0006fe, 0x000700, 0x00070d, 0x0007f6, 0x0007f9, 0x0007fe, 0x0007ff, 0x000830, 0x00083e, 0x00085e, 0x00085e, 0x000901, 0x000903, 0x00093c, 0x00094d, + 0x000964, 0x000965, 0x000970, 0x000970, 0x0009f2, 0x0009f3, 0x0009fa, 0x0009fb, 0x0009fd, 0x0009fd, 0x000a76, 0x000a76, 0x000af0, 0x000af1, 0x000b70, 0x000b70, 0x000bf3, + 0x000bfa, 0x000c77, 0x000c77, 0x000c7f, 0x000c7f, 0x000c84, 0x000c84, 0x000d4f, 0x000d4f, 0x000d79, 0x000d79, 0x000df4, 0x000df4, 0x000e2f, 0x000e2f, 0x000e3f, 0x000e3f, + 0x000e46, 0x000e46, 0x000e4e, 0x000e4f, 0x000e5a, 0x000e5b, 0x000f01, 0x000f17, 0x000f1a, 0x000f1f, 0x000f34, 0x000f34, 0x000f36, 0x000f36, 0x000f38, 0x000f38, 0x000f3a, + 0x000f3d, 0x000f85, 0x000f85, 0x000fbe, 0x000fc5, 0x000fc7, 0x000fcc, 0x000fce, 0x000fda, 0x00104a, 0x00104f, 0x00109e, 0x00109f, 0x0010fb, 0x0010fb, 0x001360, 0x001368, + 0x001390, 0x001399, 0x001400, 0x001400, 0x00166d, 0x00166e, 0x00169b, 0x00169c, 0x0016eb, 0x0016ed, 0x001735, 0x001736, 0x0017d4, 0x0017d6, 0x0017d8, 0x0017db, 0x001800, + 0x00180a, 0x001940, 0x001940, 0x001944, 0x001945, 0x0019de, 0x0019ff, 0x001a1e, 0x001a1f, 0x001aa0, 0x001aa6, 0x001aa8, 0x001aad, 0x001b5a, 0x001b6a, 0x001b74, 0x001b7c, + 0x001bfc, 0x001bff, 0x001c3b, 0x001c3f, 0x001c7e, 0x001c7f, 0x001cc0, 0x001cc7, 0x001cd3, 0x001cd3, 0x001fbd, 0x001fbd, 0x001fbf, 0x001fc1, 0x001fcd, 0x001fcf, 0x001fdd, + 0x001fdf, 0x001fed, 0x001fef, 0x001ffd, 0x001ffe, 0x002010, 0x002014, 0x002016, 0x002027, 0x002030, 0x00205e, 0x00207a, 0x00207f, 0x00208a, 0x00208e, 0x0020a0, 0x0020bf, + 0x002100, 0x002101, 0x002103, 0x002106, 0x002108, 0x002109, 0x002114, 0x002114, 0x002116, 0x002118, 0x00211e, 0x002123, 0x002125, 0x002127, 0x002129, 0x002129, 0x00212e, + 0x00212e, 0x00213a, 0x00213b, 0x002140, 0x002144, 0x00214a, 0x00214d, 0x00214f, 0x00214f, 0x00218a, 0x00218b, 0x002190, 0x0023ff, 0x002422, 0x002426, 0x002440, 0x00244a, + 0x00249c, 0x0024e9, 0x002500, 0x002775, 0x002794, 0x002b73, 0x002b76, 0x002b95, 0x002b98, 0x002bff, 0x002ce5, 0x002cea, 0x002cf9, 0x002cfc, 0x002cfe, 0x002cff, 0x002d70, + 0x002d70, 0x002e00, 0x002e2e, 0x002e30, 0x002e4f, 0x002ff0, 0x002ffb, 0x003001, 0x003002, 0x003004, 0x003004, 0x003008, 0x00301b, 0x00301d, 0x003020, 0x003030, 0x003030, + 0x003036, 0x003037, 0x00303d, 0x00303f, 0x00309b, 0x00309c, 0x0030a0, 0x0030a0, 0x0030fb, 0x0030fb, 0x003190, 0x003191, 0x0031c0, 0x0031e3, 0x003200, 0x00321e, 0x00322a, + 0x003247, 0x003250, 0x003250, 0x003260, 0x00327f, 0x00328a, 0x0032b0, 0x0032c0, 0x0033ff, 0x004dc0, 0x004dff, 0x00a490, 0x00a4c6, 0x00a4fe, 0x00a4ff, 0x00a60d, 0x00a60f, + 0x00a673, 0x00a673, 0x00a67e, 0x00a67e, 0x00a6f2, 0x00a6f7, 0x00a700, 0x00a716, 0x00a720, 0x00a721, 0x00a789, 0x00a78a, 0x00a828, 0x00a82b, 0x00a836, 0x00a839, 0x00a874, + 0x00a877, 0x00a8ce, 0x00a8cf, 0x00a8f8, 0x00a8fa, 0x00a8fc, 0x00a8fc, 0x00a92e, 0x00a92f, 0x00a95f, 0x00a95f, 0x00a9c1, 0x00a9cd, 0x00a9de, 0x00a9df, 0x00aa5c, 0x00aa5f, + 0x00aa77, 0x00aa79, 0x00aade, 0x00aadf, 0x00aaf0, 0x00aaf1, 0x00ab5b, 0x00ab5b, 0x00abeb, 0x00abeb, 0x00fb1f, 0x00fb1f, 0x00fb29, 0x00fb29, 0x00fbb2, 0x00fbc1, 0x00fd3e, + 0x00fd3f, 0x00fdfc, 0x00fdfd, 0x00fe10, 0x00fe19, 0x00fe30, 0x00fe52, 0x00fe54, 0x00fe66, 0x00fe68, 0x00fe6b, 0x00ff01, 0x00ff0f, 0x00ff1a, 0x00ff20, 0x00ff3b, 0x00ff3e, + 0x00ff40, 0x00ff40, 0x00ff5b, 0x00ff65, 0x00ffe0, 0x00ffe6, 0x00ffe8, 0x00ffee, 0x00fffc, 0x00fffc, 0x010100, 0x010102, 0x010137, 0x01013f, 0x010179, 0x010189, 0x01018c, + 0x01018e, 0x010190, 0x01019b, 0x0101a0, 0x0101a0, 0x0101d0, 0x0101fc, 0x01039f, 0x01039f, 0x0103d0, 0x0103d0, 0x01056f, 0x01056f, 0x010857, 0x010857, 0x010877, 0x010878, + 0x01091f, 0x01091f, 0x01093f, 0x01093f, 0x010a50, 0x010a58, 0x010a7f, 0x010a7f, 0x010ac8, 0x010ac8, 0x010af0, 0x010af6, 0x010b39, 0x010b3f, 0x010b99, 0x010b9c, 0x010f55, + 0x010f59, 0x011047, 0x01104d, 0x0110bb, 0x0110bc, 0x0110be, 0x0110c1, 0x011140, 0x011143, 0x011174, 0x011175, 0x0111c5, 0x0111c8, 0x0111cd, 0x0111cd, 0x0111db, 0x0111db, + 0x0111dd, 0x0111df, 0x011238, 0x01123d, 0x0112a9, 0x0112a9, 0x01144b, 0x01144f, 0x01145b, 0x01145b, 0x01145d, 0x01145d, 0x0114c6, 0x0114c6, 0x0115c1, 0x0115d7, 0x011641, + 0x011643, 0x011660, 0x01166c, 0x01173c, 0x01173f, 0x01183b, 0x01183b, 0x0119e2, 0x0119e2, 0x011a3f, 0x011a46, 0x011a9a, 0x011a9c, 0x011a9e, 0x011aa2, 0x011c41, 0x011c45, + 0x011c70, 0x011c71, 0x011ef7, 0x011ef8, 0x011fd5, 0x011ff1, 0x011fff, 0x011fff, 0x012470, 0x012474, 0x016a6e, 0x016a6f, 0x016af5, 0x016af5, 0x016b37, 0x016b3f, 0x016b44, + 0x016b45, 0x016e97, 0x016e9a, 0x016fe2, 0x016fe2, 0x01bc9c, 0x01bc9c, 0x01bc9f, 0x01bc9f, 0x01d000, 0x01d0f5, 0x01d100, 0x01d126, 0x01d129, 0x01d164, 0x01d16a, 0x01d16c, + 0x01d183, 0x01d184, 0x01d18c, 0x01d1a9, 0x01d1ae, 0x01d1e8, 0x01d200, 0x01d241, 0x01d245, 0x01d245, 0x01d300, 0x01d356, 0x01d6c1, 0x01d6c1, 0x01d6db, 0x01d6db, 0x01d6fb, + 0x01d6fb, 0x01d715, 0x01d715, 0x01d735, 0x01d735, 0x01d74f, 0x01d74f, 0x01d76f, 0x01d76f, 0x01d789, 0x01d789, 0x01d7a9, 0x01d7a9, 0x01d7c3, 0x01d7c3, 0x01d800, 0x01d9ff, + 0x01da37, 0x01da3a, 0x01da6d, 0x01da74, 0x01da76, 0x01da83, 0x01da85, 0x01da8b, 0x01e14f, 0x01e14f, 0x01e2ff, 0x01e2ff, 0x01e95e, 0x01e95f, 0x01ecac, 0x01ecac, 0x01ecb0, + 0x01ecb0, 0x01ed2e, 0x01ed2e, 0x01eef0, 0x01eef1, 0x01f000, 0x01f02b, 0x01f030, 0x01f093, 0x01f0a0, 0x01f0ae, 0x01f0b1, 0x01f0bf, 0x01f0c1, 0x01f0cf, 0x01f0d1, 0x01f0f5, + 0x01f110, 0x01f16c, 0x01f170, 0x01f1ac, 0x01f1e6, 0x01f202, 0x01f210, 0x01f23b, 0x01f240, 0x01f248, 0x01f250, 0x01f251, 0x01f260, 0x01f265, 0x01f300, 0x01f6d5, 0x01f6e0, + 0x01f6ec, 0x01f6f0, 0x01f6fa, 0x01f700, 0x01f773, 0x01f780, 0x01f7d8, 0x01f7e0, 0x01f7eb, 0x01f800, 0x01f80b, 0x01f810, 0x01f847, 0x01f850, 0x01f859, 0x01f860, 0x01f887, + 0x01f890, 0x01f8ad, 0x01f900, 0x01f90b, 0x01f90d, 0x01f971, 0x01f973, 0x01f976, 0x01f97a, 0x01f9a2, 0x01f9a5, 0x01f9aa, 0x01f9ae, 0x01f9ca, 0x01f9cd, 0x01fa53, 0x01fa60, + 0x01fa6d, 0x01fa70, 0x01fa73, 0x01fa78, 0x01fa7a, 0x01fa80, 0x01fa82, 0x01fa90, 0x01fa95)); + + POSIX_CHAR_CLASSES.put("space", CodePointSet.createNoDedup( + 0x000009, 0x00000d, 0x000020, 0x000020, 0x001680, 0x001680, 0x002000, 0x00200a, 0x002028, 0x002029, 0x00202f, 0x00202f, 0x00205f, 0x00205f, 0x003000, 0x003000)); + + POSIX_CHAR_CLASSES.put("upper", CodePointSet.createNoDedup( + 0x000041, 0x00005a, 0x0000c0, 0x0000d6, 0x0000d8, 0x0000de, 0x000100, 0x000100, 0x000102, 0x000102, 0x000104, 0x000104, 0x000106, 0x000106, 0x000108, 0x000108, 0x00010a, + 0x00010a, 0x00010c, 0x00010c, 0x00010e, 0x00010e, 0x000110, 0x000110, 0x000112, 0x000112, 0x000114, 0x000114, 0x000116, 0x000116, 0x000118, 0x000118, 0x00011a, 0x00011a, + 0x00011c, 0x00011c, 0x00011e, 0x00011e, 0x000120, 0x000120, 0x000122, 0x000122, 0x000124, 0x000124, 0x000126, 0x000126, 0x000128, 0x000128, 0x00012a, 0x00012a, 0x00012c, + 0x00012c, 0x00012e, 0x00012e, 0x000130, 0x000130, 0x000132, 0x000132, 0x000134, 0x000134, 0x000136, 0x000136, 0x000139, 0x000139, 0x00013b, 0x00013b, 0x00013d, 0x00013d, + 0x00013f, 0x00013f, 0x000141, 0x000141, 0x000143, 0x000143, 0x000145, 0x000145, 0x000147, 0x000147, 0x00014a, 0x00014a, 0x00014c, 0x00014c, 0x00014e, 0x00014e, 0x000150, + 0x000150, 0x000152, 0x000152, 0x000154, 0x000154, 0x000156, 0x000156, 0x000158, 0x000158, 0x00015a, 0x00015a, 0x00015c, 0x00015c, 0x00015e, 0x00015e, 0x000160, 0x000160, + 0x000162, 0x000162, 0x000164, 0x000164, 0x000166, 0x000166, 0x000168, 0x000168, 0x00016a, 0x00016a, 0x00016c, 0x00016c, 0x00016e, 0x00016e, 0x000170, 0x000170, 0x000172, + 0x000172, 0x000174, 0x000174, 0x000176, 0x000176, 0x000178, 0x000179, 0x00017b, 0x00017b, 0x00017d, 0x00017d, 0x000181, 0x000182, 0x000184, 0x000184, 0x000186, 0x000187, + 0x000189, 0x00018b, 0x00018e, 0x000191, 0x000193, 0x000194, 0x000196, 0x000198, 0x00019c, 0x00019d, 0x00019f, 0x0001a0, 0x0001a2, 0x0001a2, 0x0001a4, 0x0001a4, 0x0001a6, + 0x0001a7, 0x0001a9, 0x0001a9, 0x0001ac, 0x0001ac, 0x0001ae, 0x0001af, 0x0001b1, 0x0001b3, 0x0001b5, 0x0001b5, 0x0001b7, 0x0001b8, 0x0001bc, 0x0001bc, 0x0001c4, 0x0001c5, + 0x0001c7, 0x0001c8, 0x0001ca, 0x0001cb, 0x0001cd, 0x0001cd, 0x0001cf, 0x0001cf, 0x0001d1, 0x0001d1, 0x0001d3, 0x0001d3, 0x0001d5, 0x0001d5, 0x0001d7, 0x0001d7, 0x0001d9, + 0x0001d9, 0x0001db, 0x0001db, 0x0001de, 0x0001de, 0x0001e0, 0x0001e0, 0x0001e2, 0x0001e2, 0x0001e4, 0x0001e4, 0x0001e6, 0x0001e6, 0x0001e8, 0x0001e8, 0x0001ea, 0x0001ea, + 0x0001ec, 0x0001ec, 0x0001ee, 0x0001ee, 0x0001f1, 0x0001f2, 0x0001f4, 0x0001f4, 0x0001f6, 0x0001f8, 0x0001fa, 0x0001fa, 0x0001fc, 0x0001fc, 0x0001fe, 0x0001fe, 0x000200, + 0x000200, 0x000202, 0x000202, 0x000204, 0x000204, 0x000206, 0x000206, 0x000208, 0x000208, 0x00020a, 0x00020a, 0x00020c, 0x00020c, 0x00020e, 0x00020e, 0x000210, 0x000210, + 0x000212, 0x000212, 0x000214, 0x000214, 0x000216, 0x000216, 0x000218, 0x000218, 0x00021a, 0x00021a, 0x00021c, 0x00021c, 0x00021e, 0x00021e, 0x000220, 0x000220, 0x000222, + 0x000222, 0x000224, 0x000224, 0x000226, 0x000226, 0x000228, 0x000228, 0x00022a, 0x00022a, 0x00022c, 0x00022c, 0x00022e, 0x00022e, 0x000230, 0x000230, 0x000232, 0x000232, + 0x00023a, 0x00023b, 0x00023d, 0x00023e, 0x000241, 0x000241, 0x000243, 0x000246, 0x000248, 0x000248, 0x00024a, 0x00024a, 0x00024c, 0x00024c, 0x00024e, 0x00024e, 0x000370, + 0x000370, 0x000372, 0x000372, 0x000376, 0x000376, 0x00037f, 0x00037f, 0x000386, 0x000386, 0x000388, 0x00038a, 0x00038c, 0x00038c, 0x00038e, 0x00038f, 0x000391, 0x0003a1, + 0x0003a3, 0x0003ab, 0x0003cf, 0x0003cf, 0x0003d2, 0x0003d4, 0x0003d8, 0x0003d8, 0x0003da, 0x0003da, 0x0003dc, 0x0003dc, 0x0003de, 0x0003de, 0x0003e0, 0x0003e0, 0x0003e2, + 0x0003e2, 0x0003e4, 0x0003e4, 0x0003e6, 0x0003e6, 0x0003e8, 0x0003e8, 0x0003ea, 0x0003ea, 0x0003ec, 0x0003ec, 0x0003ee, 0x0003ee, 0x0003f4, 0x0003f4, 0x0003f7, 0x0003f7, + 0x0003f9, 0x0003fa, 0x0003fd, 0x00042f, 0x000460, 0x000460, 0x000462, 0x000462, 0x000464, 0x000464, 0x000466, 0x000466, 0x000468, 0x000468, 0x00046a, 0x00046a, 0x00046c, + 0x00046c, 0x00046e, 0x00046e, 0x000470, 0x000470, 0x000472, 0x000472, 0x000474, 0x000474, 0x000476, 0x000476, 0x000478, 0x000478, 0x00047a, 0x00047a, 0x00047c, 0x00047c, + 0x00047e, 0x00047e, 0x000480, 0x000480, 0x00048a, 0x00048a, 0x00048c, 0x00048c, 0x00048e, 0x00048e, 0x000490, 0x000490, 0x000492, 0x000492, 0x000494, 0x000494, 0x000496, + 0x000496, 0x000498, 0x000498, 0x00049a, 0x00049a, 0x00049c, 0x00049c, 0x00049e, 0x00049e, 0x0004a0, 0x0004a0, 0x0004a2, 0x0004a2, 0x0004a4, 0x0004a4, 0x0004a6, 0x0004a6, + 0x0004a8, 0x0004a8, 0x0004aa, 0x0004aa, 0x0004ac, 0x0004ac, 0x0004ae, 0x0004ae, 0x0004b0, 0x0004b0, 0x0004b2, 0x0004b2, 0x0004b4, 0x0004b4, 0x0004b6, 0x0004b6, 0x0004b8, + 0x0004b8, 0x0004ba, 0x0004ba, 0x0004bc, 0x0004bc, 0x0004be, 0x0004be, 0x0004c0, 0x0004c1, 0x0004c3, 0x0004c3, 0x0004c5, 0x0004c5, 0x0004c7, 0x0004c7, 0x0004c9, 0x0004c9, + 0x0004cb, 0x0004cb, 0x0004cd, 0x0004cd, 0x0004d0, 0x0004d0, 0x0004d2, 0x0004d2, 0x0004d4, 0x0004d4, 0x0004d6, 0x0004d6, 0x0004d8, 0x0004d8, 0x0004da, 0x0004da, 0x0004dc, + 0x0004dc, 0x0004de, 0x0004de, 0x0004e0, 0x0004e0, 0x0004e2, 0x0004e2, 0x0004e4, 0x0004e4, 0x0004e6, 0x0004e6, 0x0004e8, 0x0004e8, 0x0004ea, 0x0004ea, 0x0004ec, 0x0004ec, + 0x0004ee, 0x0004ee, 0x0004f0, 0x0004f0, 0x0004f2, 0x0004f2, 0x0004f4, 0x0004f4, 0x0004f6, 0x0004f6, 0x0004f8, 0x0004f8, 0x0004fa, 0x0004fa, 0x0004fc, 0x0004fc, 0x0004fe, + 0x0004fe, 0x000500, 0x000500, 0x000502, 0x000502, 0x000504, 0x000504, 0x000506, 0x000506, 0x000508, 0x000508, 0x00050a, 0x00050a, 0x00050c, 0x00050c, 0x00050e, 0x00050e, + 0x000510, 0x000510, 0x000512, 0x000512, 0x000514, 0x000514, 0x000516, 0x000516, 0x000518, 0x000518, 0x00051a, 0x00051a, 0x00051c, 0x00051c, 0x00051e, 0x00051e, 0x000520, + 0x000520, 0x000522, 0x000522, 0x000524, 0x000524, 0x000526, 0x000526, 0x000528, 0x000528, 0x00052a, 0x00052a, 0x00052c, 0x00052c, 0x00052e, 0x00052e, 0x000531, 0x000556, + 0x0010a0, 0x0010c5, 0x0010c7, 0x0010c7, 0x0010cd, 0x0010cd, 0x0013a0, 0x0013f5, 0x001c90, 0x001cba, 0x001cbd, 0x001cbf, 0x001e00, 0x001e00, 0x001e02, 0x001e02, 0x001e04, + 0x001e04, 0x001e06, 0x001e06, 0x001e08, 0x001e08, 0x001e0a, 0x001e0a, 0x001e0c, 0x001e0c, 0x001e0e, 0x001e0e, 0x001e10, 0x001e10, 0x001e12, 0x001e12, 0x001e14, 0x001e14, + 0x001e16, 0x001e16, 0x001e18, 0x001e18, 0x001e1a, 0x001e1a, 0x001e1c, 0x001e1c, 0x001e1e, 0x001e1e, 0x001e20, 0x001e20, 0x001e22, 0x001e22, 0x001e24, 0x001e24, 0x001e26, + 0x001e26, 0x001e28, 0x001e28, 0x001e2a, 0x001e2a, 0x001e2c, 0x001e2c, 0x001e2e, 0x001e2e, 0x001e30, 0x001e30, 0x001e32, 0x001e32, 0x001e34, 0x001e34, 0x001e36, 0x001e36, + 0x001e38, 0x001e38, 0x001e3a, 0x001e3a, 0x001e3c, 0x001e3c, 0x001e3e, 0x001e3e, 0x001e40, 0x001e40, 0x001e42, 0x001e42, 0x001e44, 0x001e44, 0x001e46, 0x001e46, 0x001e48, + 0x001e48, 0x001e4a, 0x001e4a, 0x001e4c, 0x001e4c, 0x001e4e, 0x001e4e, 0x001e50, 0x001e50, 0x001e52, 0x001e52, 0x001e54, 0x001e54, 0x001e56, 0x001e56, 0x001e58, 0x001e58, + 0x001e5a, 0x001e5a, 0x001e5c, 0x001e5c, 0x001e5e, 0x001e5e, 0x001e60, 0x001e60, 0x001e62, 0x001e62, 0x001e64, 0x001e64, 0x001e66, 0x001e66, 0x001e68, 0x001e68, 0x001e6a, + 0x001e6a, 0x001e6c, 0x001e6c, 0x001e6e, 0x001e6e, 0x001e70, 0x001e70, 0x001e72, 0x001e72, 0x001e74, 0x001e74, 0x001e76, 0x001e76, 0x001e78, 0x001e78, 0x001e7a, 0x001e7a, + 0x001e7c, 0x001e7c, 0x001e7e, 0x001e7e, 0x001e80, 0x001e80, 0x001e82, 0x001e82, 0x001e84, 0x001e84, 0x001e86, 0x001e86, 0x001e88, 0x001e88, 0x001e8a, 0x001e8a, 0x001e8c, + 0x001e8c, 0x001e8e, 0x001e8e, 0x001e90, 0x001e90, 0x001e92, 0x001e92, 0x001e94, 0x001e94, 0x001e9e, 0x001e9e, 0x001ea0, 0x001ea0, 0x001ea2, 0x001ea2, 0x001ea4, 0x001ea4, + 0x001ea6, 0x001ea6, 0x001ea8, 0x001ea8, 0x001eaa, 0x001eaa, 0x001eac, 0x001eac, 0x001eae, 0x001eae, 0x001eb0, 0x001eb0, 0x001eb2, 0x001eb2, 0x001eb4, 0x001eb4, 0x001eb6, + 0x001eb6, 0x001eb8, 0x001eb8, 0x001eba, 0x001eba, 0x001ebc, 0x001ebc, 0x001ebe, 0x001ebe, 0x001ec0, 0x001ec0, 0x001ec2, 0x001ec2, 0x001ec4, 0x001ec4, 0x001ec6, 0x001ec6, + 0x001ec8, 0x001ec8, 0x001eca, 0x001eca, 0x001ecc, 0x001ecc, 0x001ece, 0x001ece, 0x001ed0, 0x001ed0, 0x001ed2, 0x001ed2, 0x001ed4, 0x001ed4, 0x001ed6, 0x001ed6, 0x001ed8, + 0x001ed8, 0x001eda, 0x001eda, 0x001edc, 0x001edc, 0x001ede, 0x001ede, 0x001ee0, 0x001ee0, 0x001ee2, 0x001ee2, 0x001ee4, 0x001ee4, 0x001ee6, 0x001ee6, 0x001ee8, 0x001ee8, + 0x001eea, 0x001eea, 0x001eec, 0x001eec, 0x001eee, 0x001eee, 0x001ef0, 0x001ef0, 0x001ef2, 0x001ef2, 0x001ef4, 0x001ef4, 0x001ef6, 0x001ef6, 0x001ef8, 0x001ef8, 0x001efa, + 0x001efa, 0x001efc, 0x001efc, 0x001efe, 0x001efe, 0x001f08, 0x001f0f, 0x001f18, 0x001f1d, 0x001f28, 0x001f2f, 0x001f38, 0x001f3f, 0x001f48, 0x001f4d, 0x001f59, 0x001f59, + 0x001f5b, 0x001f5b, 0x001f5d, 0x001f5d, 0x001f5f, 0x001f5f, 0x001f68, 0x001f6f, 0x001f88, 0x001f8f, 0x001f98, 0x001f9f, 0x001fa8, 0x001faf, 0x001fb8, 0x001fbc, 0x001fc8, + 0x001fcc, 0x001fd8, 0x001fdb, 0x001fe8, 0x001fec, 0x001ff8, 0x001ffc, 0x002102, 0x002102, 0x002107, 0x002107, 0x00210b, 0x00210d, 0x002110, 0x002112, 0x002115, 0x002115, + 0x002119, 0x00211d, 0x002124, 0x002124, 0x002126, 0x002126, 0x002128, 0x002128, 0x00212a, 0x00212d, 0x002130, 0x002133, 0x00213e, 0x00213f, 0x002145, 0x002145, 0x002160, + 0x00216f, 0x002183, 0x002183, 0x0024b6, 0x0024cf, 0x002c00, 0x002c2e, 0x002c60, 0x002c60, 0x002c62, 0x002c64, 0x002c67, 0x002c67, 0x002c69, 0x002c69, 0x002c6b, 0x002c6b, + 0x002c6d, 0x002c70, 0x002c72, 0x002c72, 0x002c75, 0x002c75, 0x002c7e, 0x002c80, 0x002c82, 0x002c82, 0x002c84, 0x002c84, 0x002c86, 0x002c86, 0x002c88, 0x002c88, 0x002c8a, + 0x002c8a, 0x002c8c, 0x002c8c, 0x002c8e, 0x002c8e, 0x002c90, 0x002c90, 0x002c92, 0x002c92, 0x002c94, 0x002c94, 0x002c96, 0x002c96, 0x002c98, 0x002c98, 0x002c9a, 0x002c9a, + 0x002c9c, 0x002c9c, 0x002c9e, 0x002c9e, 0x002ca0, 0x002ca0, 0x002ca2, 0x002ca2, 0x002ca4, 0x002ca4, 0x002ca6, 0x002ca6, 0x002ca8, 0x002ca8, 0x002caa, 0x002caa, 0x002cac, + 0x002cac, 0x002cae, 0x002cae, 0x002cb0, 0x002cb0, 0x002cb2, 0x002cb2, 0x002cb4, 0x002cb4, 0x002cb6, 0x002cb6, 0x002cb8, 0x002cb8, 0x002cba, 0x002cba, 0x002cbc, 0x002cbc, + 0x002cbe, 0x002cbe, 0x002cc0, 0x002cc0, 0x002cc2, 0x002cc2, 0x002cc4, 0x002cc4, 0x002cc6, 0x002cc6, 0x002cc8, 0x002cc8, 0x002cca, 0x002cca, 0x002ccc, 0x002ccc, 0x002cce, + 0x002cce, 0x002cd0, 0x002cd0, 0x002cd2, 0x002cd2, 0x002cd4, 0x002cd4, 0x002cd6, 0x002cd6, 0x002cd8, 0x002cd8, 0x002cda, 0x002cda, 0x002cdc, 0x002cdc, 0x002cde, 0x002cde, + 0x002ce0, 0x002ce0, 0x002ce2, 0x002ce2, 0x002ceb, 0x002ceb, 0x002ced, 0x002ced, 0x002cf2, 0x002cf2, 0x00a640, 0x00a640, 0x00a642, 0x00a642, 0x00a644, 0x00a644, 0x00a646, + 0x00a646, 0x00a648, 0x00a648, 0x00a64a, 0x00a64a, 0x00a64c, 0x00a64c, 0x00a64e, 0x00a64e, 0x00a650, 0x00a650, 0x00a652, 0x00a652, 0x00a654, 0x00a654, 0x00a656, 0x00a656, + 0x00a658, 0x00a658, 0x00a65a, 0x00a65a, 0x00a65c, 0x00a65c, 0x00a65e, 0x00a65e, 0x00a660, 0x00a660, 0x00a662, 0x00a662, 0x00a664, 0x00a664, 0x00a666, 0x00a666, 0x00a668, + 0x00a668, 0x00a66a, 0x00a66a, 0x00a66c, 0x00a66c, 0x00a680, 0x00a680, 0x00a682, 0x00a682, 0x00a684, 0x00a684, 0x00a686, 0x00a686, 0x00a688, 0x00a688, 0x00a68a, 0x00a68a, + 0x00a68c, 0x00a68c, 0x00a68e, 0x00a68e, 0x00a690, 0x00a690, 0x00a692, 0x00a692, 0x00a694, 0x00a694, 0x00a696, 0x00a696, 0x00a698, 0x00a698, 0x00a69a, 0x00a69a, 0x00a722, + 0x00a722, 0x00a724, 0x00a724, 0x00a726, 0x00a726, 0x00a728, 0x00a728, 0x00a72a, 0x00a72a, 0x00a72c, 0x00a72c, 0x00a72e, 0x00a72e, 0x00a732, 0x00a732, 0x00a734, 0x00a734, + 0x00a736, 0x00a736, 0x00a738, 0x00a738, 0x00a73a, 0x00a73a, 0x00a73c, 0x00a73c, 0x00a73e, 0x00a73e, 0x00a740, 0x00a740, 0x00a742, 0x00a742, 0x00a744, 0x00a744, 0x00a746, + 0x00a746, 0x00a748, 0x00a748, 0x00a74a, 0x00a74a, 0x00a74c, 0x00a74c, 0x00a74e, 0x00a74e, 0x00a750, 0x00a750, 0x00a752, 0x00a752, 0x00a754, 0x00a754, 0x00a756, 0x00a756, + 0x00a758, 0x00a758, 0x00a75a, 0x00a75a, 0x00a75c, 0x00a75c, 0x00a75e, 0x00a75e, 0x00a760, 0x00a760, 0x00a762, 0x00a762, 0x00a764, 0x00a764, 0x00a766, 0x00a766, 0x00a768, + 0x00a768, 0x00a76a, 0x00a76a, 0x00a76c, 0x00a76c, 0x00a76e, 0x00a76e, 0x00a779, 0x00a779, 0x00a77b, 0x00a77b, 0x00a77d, 0x00a77e, 0x00a780, 0x00a780, 0x00a782, 0x00a782, + 0x00a784, 0x00a784, 0x00a786, 0x00a786, 0x00a78b, 0x00a78b, 0x00a78d, 0x00a78d, 0x00a790, 0x00a790, 0x00a792, 0x00a792, 0x00a796, 0x00a796, 0x00a798, 0x00a798, 0x00a79a, + 0x00a79a, 0x00a79c, 0x00a79c, 0x00a79e, 0x00a79e, 0x00a7a0, 0x00a7a0, 0x00a7a2, 0x00a7a2, 0x00a7a4, 0x00a7a4, 0x00a7a6, 0x00a7a6, 0x00a7a8, 0x00a7a8, 0x00a7aa, 0x00a7ae, + 0x00a7b0, 0x00a7b4, 0x00a7b6, 0x00a7b6, 0x00a7b8, 0x00a7b8, 0x00a7ba, 0x00a7ba, 0x00a7bc, 0x00a7bc, 0x00a7be, 0x00a7be, 0x00a7c2, 0x00a7c2, 0x00a7c4, 0x00a7c6, 0x00ff21, + 0x00ff3a, 0x010400, 0x010427, 0x0104b0, 0x0104d3, 0x010c80, 0x010cb2, 0x0118a0, 0x0118bf, 0x016e40, 0x016e5f, 0x01d400, 0x01d419, 0x01d434, 0x01d44d, 0x01d468, 0x01d481, + 0x01d49c, 0x01d49c, 0x01d49e, 0x01d49f, 0x01d4a2, 0x01d4a2, 0x01d4a5, 0x01d4a6, 0x01d4a9, 0x01d4ac, 0x01d4ae, 0x01d4b5, 0x01d4d0, 0x01d4e9, 0x01d504, 0x01d505, 0x01d507, + 0x01d50a, 0x01d50d, 0x01d514, 0x01d516, 0x01d51c, 0x01d538, 0x01d539, 0x01d53b, 0x01d53e, 0x01d540, 0x01d544, 0x01d546, 0x01d546, 0x01d54a, 0x01d550, 0x01d56c, 0x01d585, + 0x01d5a0, 0x01d5b9, 0x01d5d4, 0x01d5ed, 0x01d608, 0x01d621, 0x01d63c, 0x01d655, 0x01d670, 0x01d689, 0x01d6a8, 0x01d6c0, 0x01d6e2, 0x01d6fa, 0x01d71c, 0x01d734, 0x01d756, + 0x01d76e, 0x01d790, 0x01d7a8, 0x01d7ca, 0x01d7ca, 0x01e900, 0x01e921)); + + POSIX_CHAR_CLASSES.put("xdigit", CodePointSet.createNoDedup( + 0x000030, 0x000039, 0x000041, 0x000046, 0x000061, 0x000066, 0x00ff10, 0x00ff19, 0x00ff21, 0x00ff26, 0x00ff41, 0x00ff46)); + + /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + POSIX_CHAR_CLASSES.put("alnum", POSIX_CHAR_CLASSES.get("alpha").union(POSIX_CHAR_CLASSES.get("digit"))); + WORD_CHARACTERS = POSIX_CHAR_CLASSES.get("alnum").union(CodePointSet.create('_')); + } +} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java index 1957b5230f65..887a68208c91 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java @@ -42,11 +42,11 @@ import java.util.function.BiPredicate; -import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; - import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; @@ -59,7 +59,8 @@ public final class OracleDBFlavor extends RegexFlavor { public static final OracleDBFlavor INSTANCE = new OracleDBFlavor(); private OracleDBFlavor() { - super(BACKREFERENCES_TO_UNMATCHED_GROUPS_FAIL | NESTED_CAPTURE_GROUPS_KEPT_ON_LOOP_REENTRY | SUPPORTS_RECURSIVE_BACKREFERENCES); + super(EMPTY_CHECKS_MONITOR_CAPTURE_GROUPS | FAILING_EMPTY_CHECKS_DONT_BACKTRACK | BACKREFERENCES_TO_UNMATCHED_GROUPS_FAIL | NESTED_CAPTURE_GROUPS_KEPT_ON_LOOP_REENTRY | + SUPPORTS_RECURSIVE_BACKREFERENCES); } @Override @@ -74,10 +75,10 @@ public RegexParser createParser(RegexLanguage language, RegexSource source, Comp @Override public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) { - return OracleDBFlavor::equalsIgnoreCaseUnicode; + return OracleDBFlavor::equalsIgnoreCase; } - private static boolean equalsIgnoreCaseUnicode(int codePointA, int codePointB) { - return UCharacter.toLowerCase(codePointA) == UCharacter.toLowerCase(codePointB); + private static boolean equalsIgnoreCase(int codePointA, int codePointB) { + return MultiCharacterCaseFolding.equalsIgnoreCase(CaseFoldData.CaseFoldAlgorithm.OracleDB, codePointA, codePointB); } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java index 2402c75086f9..04beda84ec91 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java @@ -40,7 +40,8 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; -import org.graalvm.collections.EconomicMap; +import static com.oracle.truffle.regex.tregex.parser.flavors.OracleDBConstants.POSIX_CHAR_CLASSES; +import static com.oracle.truffle.regex.tregex.parser.flavors.OracleDBConstants.WORD_CHARACTERS; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.regex.RegexSource; @@ -49,10 +50,9 @@ import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.charset.Constants; -import com.oracle.truffle.regex.charset.UnicodeProperties; import com.oracle.truffle.regex.errors.OracleDBErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.parser.RegexLexer; import com.oracle.truffle.regex.tregex.parser.Token; import com.oracle.truffle.regex.tregex.string.Encodings; @@ -60,38 +60,7 @@ import com.oracle.truffle.regex.util.TBitSet; public final class OracleDBRegexLexer extends RegexLexer { - - // This map contains the character sets of POSIX character classes like [[:alpha:]] and - // [[:punct:]]. - private static final EconomicMap UNICODE_POSIX_CHAR_CLASSES; private static final CodePointSet EMPTY_POSIX_CHAR_CLASS = CodePointSet.create(':', ':', '[', '[', ']', ']'); - - static { - CodePointSet alpha = UnicodeProperties.getProperty("Alphabetic"); - CodePointSet digit = UnicodeProperties.getProperty("General_Category=Decimal_Number"); - CodePointSet space = UnicodeProperties.getProperty("White_Space"); - CodePointSet xdigit = CodePointSet.create('0', '9', 'A', 'F', 'a', 'f'); - - UNICODE_POSIX_CHAR_CLASSES = EconomicMap.create(12); - CompilationBuffer buffer = new CompilationBuffer(Encodings.UTF_32); - - CodePointSet blank = UnicodeProperties.getProperty("General_Category=Space_Separator").union(CodePointSet.create('\t', '\t')); - CodePointSet cntrl = UnicodeProperties.getProperty("General_Category=Control"); - CodePointSet graph = space.union(UnicodeProperties.getProperty("General_Category=Control")).union(UnicodeProperties.getProperty("General_Category=Surrogate")).union( - UnicodeProperties.getProperty("General_Category=Unassigned")).createInverse(Encodings.UTF_32); - UNICODE_POSIX_CHAR_CLASSES.put("alpha", alpha); - UNICODE_POSIX_CHAR_CLASSES.put("alnum", alpha.union(digit)); - UNICODE_POSIX_CHAR_CLASSES.put("blank", blank); - UNICODE_POSIX_CHAR_CLASSES.put("cntrl", cntrl); - UNICODE_POSIX_CHAR_CLASSES.put("digit", digit); - UNICODE_POSIX_CHAR_CLASSES.put("graph", graph); - UNICODE_POSIX_CHAR_CLASSES.put("lower", UnicodeProperties.getProperty("Lowercase")); - UNICODE_POSIX_CHAR_CLASSES.put("print", graph.union(blank).subtract(cntrl, buffer)); - UNICODE_POSIX_CHAR_CLASSES.put("punct", UnicodeProperties.getProperty("General_Category=Punctuation").union(UnicodeProperties.getProperty("General_Category=Symbol").subtract(alpha, buffer))); - UNICODE_POSIX_CHAR_CLASSES.put("space", space); - UNICODE_POSIX_CHAR_CLASSES.put("upper", UnicodeProperties.getProperty("Uppercase")); - UNICODE_POSIX_CHAR_CLASSES.put("xdigit", xdigit); - } private static final TBitSet WHITESPACE = TBitSet.valueOf('\n', ' '); private final OracleDBFlags flags; private final CodePointSetAccumulator caseFoldTmp = new CodePointSetAccumulator(); @@ -114,7 +83,7 @@ public boolean hasNext() { @Override protected boolean featureEnabledIgnoreCase() { - return flags.isIgnoreCase(); + return false; } @Override @@ -158,7 +127,7 @@ protected CodePointSet getPOSIXCharClass(String name) { // oracledb quirk: [::] inside a character class is treated as [:] instead of re-parsing return EMPTY_POSIX_CHAR_CLASS; } - CodePointSet cps = UNICODE_POSIX_CHAR_CLASSES.get(name); + CodePointSet cps = POSIX_CHAR_CLASSES.get(name); if (cps != null) { return cps; } @@ -224,7 +193,7 @@ protected boolean featureEnabledClassSetExpressions() { @Override protected void caseFoldUnfold(CodePointSetAccumulator charClass) { - CaseFoldTable.applyCaseFoldUnfold(charClass, caseFoldTmp, CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptUnicode); + CaseFoldData.applyCaseFoldUnfold(charClass, caseFoldTmp, CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode); } @Override @@ -258,9 +227,13 @@ protected int getMaxBackReferenceDigits() { } @Override - protected CodePointSet getPredefinedCharClass(char c) { - assert UNICODE_POSIX_CHAR_CLASSES.containsKey(getPOSIXCharClassName(c)); - CodePointSet cps = UNICODE_POSIX_CHAR_CLASSES.get(getPOSIXCharClassName(c)); + protected CodePointSet getPredefinedCharClass(char c, boolean inCharClass) { + if (inCharClass) { + // OracleDB ignores \s \d \w inside character classes, and interprets them as literal + // characters instead + return '\\' < c ? CodePointSet.create('\\', '\\', c, c) : CodePointSet.create(c, c, '\\', '\\'); + } + CodePointSet cps = getPOSIXCharClass(c); if (isLowerCase(c)) { return cps; } else { @@ -276,14 +249,14 @@ private static boolean isLowerCase(char c) { return (c & 0x20) != 0; } - private static String getPOSIXCharClassName(char c) { + private static CodePointSet getPOSIXCharClass(char c) { switch (toLowerCase(c)) { case 's': - return "space"; + return POSIX_CHAR_CLASSES.get("space"); case 'd': - return "digit"; + return POSIX_CHAR_CLASSES.get("digit"); case 'w': - return "alnum"; + return WORD_CHARACTERS; default: throw CompilerDirectives.shouldNotReachHere(); } @@ -303,7 +276,7 @@ protected RegexSyntaxException handleBoundedQuantifierOutOfOrder() { protected Token handleBoundedQuantifierSyntaxError() throws RegexSyntaxException { // invalid bounded quantifiers are treated as string literals position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } @Override @@ -311,7 +284,7 @@ protected Token handleBoundedQuantifierOverflow(long min, long max) { if (min == -1 || max == -1) { // bounded quantifiers outside uint32 range are treated as string literals position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } if (Long.compareUnsigned(min, max) > 0) { throw handleBoundedQuantifierOutOfOrder(); @@ -326,7 +299,7 @@ protected Token handleBoundedQuantifierOverflowMin(long min, long max) { if (min == -1) { // bounded quantifiers outside uint32 range are treated as string literals position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } // oracledb quirk: values between 0x7fff_ffff and 0xffff_ffff are treated as uint32 in the // quantifier order check, but are later "cast" to int32 by stripping the sign bit. diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java index 3193adae5015..d82d5aa3fe3d 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java @@ -40,6 +40,10 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; +import java.util.List; + +import org.graalvm.collections.Pair; + import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.regex.AbstractRegexObject; @@ -47,14 +51,20 @@ import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; +import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.errors.OracleDBErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; +import com.oracle.truffle.regex.tregex.buffer.IntArrayBuffer; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.Token; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; import com.oracle.truffle.regex.tregex.parser.ast.RegexASTRootNode; +import com.oracle.truffle.regex.tregex.string.Encodings; public final class OracleDBRegexParser implements RegexParser { @@ -62,6 +72,9 @@ public final class OracleDBRegexParser implements RegexParser { private final OracleDBFlags flags; private final OracleDBRegexLexer lexer; private final RegexASTBuilder astBuilder; + private CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); + private CodePointSetAccumulator curCharClassPosixEquivalenceClasses = new CodePointSetAccumulator(); + private CodePointSetAccumulator charClassTmp = new CodePointSetAccumulator(); @TruffleBoundary public OracleDBRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException { @@ -91,12 +104,26 @@ public AbstractRegexObject getNamedCaptureGroups() { @Override @TruffleBoundary public RegexAST parse() throws RegexSyntaxException { + IntArrayBuffer literalStringBuffer = new IntArrayBuffer(); astBuilder.pushRootGroup(); Token token = null; Token.Kind prevKind; while (lexer.hasNext()) { prevKind = token == null ? null : token.kind; token = lexer.next(); + if (token.kind != Token.Kind.literalChar && !literalStringBuffer.isEmpty()) { + int last = -1; + if (token.kind == Token.Kind.quantifier) { + last = literalStringBuffer.get(literalStringBuffer.length() - 1); + literalStringBuffer.setLength(literalStringBuffer.length() - 1); + } + addLiteralString(literalStringBuffer); + if (last >= 0) { + assert literalStringBuffer.isEmpty(); + literalStringBuffer.add(last); + addLiteralString(literalStringBuffer); + } + } switch (token.kind) { case A, z: astBuilder.addPositionAssertion(token); @@ -143,13 +170,31 @@ public RegexAST parse() throws RegexSyntaxException { // quantifiers without target are ignored break; } - astBuilder.addQuantifier((Token.Quantifier) token); + Token.Quantifier quantifier = (Token.Quantifier) token; + if (astBuilder.getCurTerm().isQuantifiableTerm() && astBuilder.getCurTerm().asQuantifiableTerm().hasQuantifier()) { + Token.Quantifier existingQuantifier = astBuilder.getCurTerm().asQuantifiableTerm().getQuantifier(); + if (existingQuantifier.getMin() > 1) { + astBuilder.wrapCurTermInGroup(); + } else { + astBuilder.addQuantifier(Token.createQuantifier( + Math.max(quantifier.getMin(), existingQuantifier.getMin()), + (int) Math.max(Integer.toUnsignedLong(quantifier.getMax()), Integer.toUnsignedLong(existingQuantifier.getMax())), + quantifier.isGreedy() && existingQuantifier.isGreedy())); + break; + } + } + astBuilder.addQuantifier(quantifier); break; case alternation: astBuilder.nextSequence(); break; case captureGroupBegin: - astBuilder.pushCaptureGroup(token); + if (lexer.numberOfCaptureGroupsSoFar() <= 10) { + // oracledb only tracks capture groups 0 - 9 + astBuilder.pushCaptureGroup(token); + } else { + astBuilder.pushGroup(token); + } break; case groupEnd: if (astBuilder.getCurGroup().getParent() instanceof RegexASTRootNode) { @@ -157,9 +202,28 @@ public RegexAST parse() throws RegexSyntaxException { } astBuilder.popGroup(token); break; + case literalChar: + literalStringBuffer.add(((Token.LiteralCharacter) token).getCodePoint()); + break; case charClass: astBuilder.addCharClass((Token.CharacterClass) token); break; + case charClassBegin: + curCharClass.clear(); + curCharClassPosixEquivalenceClasses.clear(); + break; + case charClassAtom: + ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents(); + assert contents.isCodePointSetOnly(); + if (contents.isPosixCollationEquivalenceClass()) { + curCharClassPosixEquivalenceClasses.addSet(contents.getCodePointSet()); + } else { + curCharClass.addSet(contents.getCodePointSet()); + } + break; + case charClassEnd: + addCharClass(); + break; default: throw CompilerDirectives.shouldNotReachHere(); } @@ -167,9 +231,69 @@ public RegexAST parse() throws RegexSyntaxException { if (!astBuilder.curGroupIsRoot()) { throw syntaxError(OracleDBErrorMessages.UNTERMINATED_GROUP); } + if (!literalStringBuffer.isEmpty()) { + addLiteralString(literalStringBuffer); + } return astBuilder.popRootGroup(); } + private void addCharClass() { + boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar() && curCharClassPosixEquivalenceClasses.isEmpty(); + if (flags.isIgnoreCase()) { + MultiCharacterCaseFolding.caseClosure(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClass, charClassTmp, (a, b) -> true, Encodings.UTF_8.getFullSet()); + } + MultiCharacterCaseFolding.caseClosure(CaseFoldData.CaseFoldAlgorithm.OracleDBAI, curCharClassPosixEquivalenceClasses, charClassTmp, (a, b) -> true, Encodings.UTF_8.getFullSet()); + curCharClass.addSet(curCharClassPosixEquivalenceClasses.get()); + if (lexer.isCurCharClassInverted()) { + curCharClass.invert(Encodings.UTF_8); + } + if (flags.isIgnoreCase()) { + List> multiCodePointExpansions = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClass); + List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClassPosixEquivalenceClasses); + if (!multiCodePointExpansions.isEmpty() || !multiCodePointExpansionsPEC.isEmpty()) { + astBuilder.pushGroup(); + astBuilder.addCharClass(curCharClass.toCodePointSet()); + addMultiCodePointExpansions(multiCodePointExpansions, CaseFoldData.CaseFoldAlgorithm.OracleDB); + addMultiCodePointExpansions(multiCodePointExpansionsPEC, CaseFoldData.CaseFoldAlgorithm.OracleDBAI); + astBuilder.popGroup(); + } else { + astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); + } + } else if (!curCharClassPosixEquivalenceClasses.isEmpty()) { + List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClassPosixEquivalenceClasses); + if (!multiCodePointExpansionsPEC.isEmpty()) { + astBuilder.pushGroup(); + astBuilder.addCharClass(curCharClass.toCodePointSet()); + addMultiCodePointExpansions(multiCodePointExpansionsPEC, CaseFoldData.CaseFoldAlgorithm.OracleDBAI); + astBuilder.popGroup(); + } else { + astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); + } + } else { + astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); + } + } + + private void addMultiCodePointExpansions(List> multiCodePointExpansions, CaseFoldData.CaseFoldAlgorithm algorithm) { + for (Pair pair : multiCodePointExpansions) { + astBuilder.nextSequence(); + int[] to = pair.getRight(); + boolean dropAsciiOnStart = false; + MultiCharacterCaseFolding.caseFoldUnfoldString(algorithm, to, Encodings.UTF_8.getFullSet(), dropAsciiOnStart, astBuilder); + } + } + + private void addLiteralString(IntArrayBuffer literalStringBuffer) { + if (flags.isIgnoreCase()) { + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.OracleDB, literalStringBuffer.toArray(), Encodings.UTF_8.getFullSet(), astBuilder); + } else { + for (int i = 0; i < literalStringBuffer.length(); i++) { + astBuilder.addCharClass(CodePointSet.create(literalStringBuffer.get(i)), true); + } + } + literalStringBuffer.clear(); + } + private RegexSyntaxException syntaxError(String msg) { return RegexSyntaxException.createPattern(source, msg, lexer.getLastTokenPosition()); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java index 0fabd94d2625..de3b43e1316d 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java @@ -42,12 +42,12 @@ import java.util.function.BiPredicate; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; @@ -84,7 +84,7 @@ public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) return PythonFlavor::equalsIgnoreCaseUnicode; } else { assert ast.getOptions().getEncoding() == Encodings.LATIN_1; - return CaseFoldTable.CaseFoldingAlgorithm.PythonAscii.getEqualsPredicate(); + return CaseFoldData.CaseFoldUnfoldAlgorithm.PythonAscii.getEqualsPredicate(); } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java index 1550869421e4..ff477414b300 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java @@ -46,6 +46,7 @@ import java.util.Locale; import java.util.Map; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; import com.oracle.truffle.api.CompilerDirectives; @@ -59,7 +60,6 @@ import com.oracle.truffle.regex.charset.UnicodeProperties; import com.oracle.truffle.regex.errors.PyErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.tregex.parser.RegexLexer; import com.oracle.truffle.regex.tregex.parser.Token; @@ -414,8 +414,8 @@ protected void caseFoldUnfold(CodePointSetAccumulator charClass) { if (getLocalFlags().isLocale()) { getLocaleData().caseFoldUnfold(charClass, caseFoldTmp); } else { - CaseFoldTable.CaseFoldingAlgorithm caseFolding = getLocalFlags().isUnicode(mode) ? CaseFoldTable.CaseFoldingAlgorithm.PythonUnicode : CaseFoldTable.CaseFoldingAlgorithm.PythonAscii; - CaseFoldTable.applyCaseFoldUnfold(charClass, caseFoldTmp, caseFolding); + CaseFoldData.CaseFoldUnfoldAlgorithm caseFolding = getLocalFlags().isUnicode(mode) ? CaseFoldData.CaseFoldUnfoldAlgorithm.PythonUnicode : CaseFoldData.CaseFoldUnfoldAlgorithm.PythonAscii; + CaseFoldData.applyCaseFoldUnfold(charClass, caseFoldTmp, caseFolding); } } @@ -430,7 +430,7 @@ protected ClassSetContents caseFoldClassSetAtom(ClassSetContents classSetContent } @Override - protected CodePointSet getPredefinedCharClass(char c) { + protected CodePointSet getPredefinedCharClass(char c, boolean inCharClass) { if (getLocalFlags().isUnicode(mode)) { return UNICODE_CHAR_CLASS_SETS.get(c); } @@ -487,7 +487,7 @@ protected RegexSyntaxException handleBoundedQuantifierOutOfOrder() { @Override protected Token handleBoundedQuantifierSyntaxError() throws RegexSyntaxException { position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } @Override @@ -614,7 +614,7 @@ protected void handleUnmatchedRightBrace() { @Override protected RegexSyntaxException handleUnmatchedLeftBracket() { - return syntaxErrorAtAbs(PyErrorMessages.UNTERMINATED_CHARACTER_SET, getLastTokenPosition()); + return syntaxErrorAtAbs(PyErrorMessages.UNTERMINATED_CHARACTER_SET, getLastCharacterClassBeginPosition()); } @Override @@ -635,7 +635,7 @@ protected Token parseCustomEscape(char c) { if (codePoint > 0xff) { handleOctalOutOfRange(); } - return charClass(codePoint); + return literalChar(codePoint); } return null; } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java index 7f82bab72c96..33ecdefbda5d 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java @@ -50,7 +50,9 @@ import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; +import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.charset.Constants; import com.oracle.truffle.regex.errors.PyErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; @@ -65,7 +67,7 @@ public final class PythonRegexParser implements RegexParser { - private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.charClass, Token.Kind.groupEnd, Token.Kind.backReference); + private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.literalChar, Token.Kind.charClass, Token.Kind.charClassEnd, Token.Kind.groupEnd, Token.Kind.backReference); /** * Indicates whether the regex being parsed is a 'str' pattern or a 'bytes' pattern. @@ -73,6 +75,7 @@ public final class PythonRegexParser implements RegexParser { private final PythonREMode mode; private final PythonRegexLexer lexer; private final RegexASTBuilder astBuilder; + private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); public PythonRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException { this.mode = PythonREMode.fromEncoding(source.getEncoding()); @@ -160,7 +163,7 @@ public RegexAST parse() throws RegexSyntaxException { break; } if (getLocalFlags().isUnicode(mode)) { - astBuilder.addWordBoundaryAssertion(lexer.getPredefinedCharClass('w'), lexer.getPredefinedCharClass('W')); + astBuilder.addWordBoundaryAssertion(lexer.getPredefinedCharClass('w', false), lexer.getPredefinedCharClass('W', false)); } else if (getLocalFlags().isLocale()) { astBuilder.addWordBoundaryAssertion(lexer.getLocaleData().getWordCharacters(), lexer.getLocaleData().getNonWordCharacters()); } else { @@ -176,7 +179,7 @@ public RegexAST parse() throws RegexSyntaxException { break; } if (getLocalFlags().isUnicode(mode)) { - astBuilder.addWordNonBoundaryAssertionPython(lexer.getPredefinedCharClass('w'), lexer.getPredefinedCharClass('W')); + astBuilder.addWordNonBoundaryAssertionPython(lexer.getPredefinedCharClass('w', false), lexer.getPredefinedCharClass('W', false)); } else if (getLocalFlags().isLocale()) { astBuilder.addWordNonBoundaryAssertionPython(lexer.getLocaleData().getWordCharacters(), lexer.getLocaleData().getNonWordCharacters()); } else { @@ -228,9 +231,28 @@ public RegexAST parse() throws RegexSyntaxException { } astBuilder.popGroup(token); break; + case literalChar: + literalChar(((Token.LiteralCharacter) token).getCodePoint()); + break; case charClass: astBuilder.addCharClass((Token.CharacterClass) token); break; + case charClassBegin: + curCharClass.clear(); + break; + case charClassAtom: + ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents(); + assert contents.isCodePointSetOnly(); + curCharClass.addSet(contents.getCodePointSet()); + break; + case charClassEnd: + boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar(); + if (lexer.featureEnabledIgnoreCase()) { + lexer.caseFoldUnfold(curCharClass); + } + CodePointSet cps = curCharClass.toCodePointSet(); + astBuilder.addCharClass(lexer.isCurCharClassInverted() ? cps.createInverse(lexer.source.getEncoding()) : cps, wasSingleChar); + break; case conditionalBackreference: Token.BackReference conditionalBackRefToken = (Token.BackReference) token; verifyGroupReference(conditionalBackRefToken); @@ -263,6 +285,17 @@ public RegexAST parse() throws RegexSyntaxException { return ast; } + private void literalChar(int codePoint) { + if (lexer.featureEnabledIgnoreCase()) { + curCharClass.clear(); + curCharClass.addCodePoint(codePoint); + lexer.caseFoldUnfold(curCharClass); + astBuilder.addCharClass(curCharClass.toCodePointSet(), true); + } else { + astBuilder.addCharClass(CodePointSet.create(codePoint)); + } + } + /** * Verifies that making a back-reference to a certain group is legal in the current context. * diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java deleted file mode 100644 index 4efcb62e609d..000000000000 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java +++ /dev/null @@ -1,1584 +0,0 @@ -/* - * Copyright (c) 2021, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.truffle.regex.tregex.parser.flavors; - -import java.util.SortedMap; -import java.util.TreeMap; - -public class RubyCaseFoldingData { - - public static final SortedMap CASE_FOLD; - - static { - CASE_FOLD = new TreeMap<>(); - - CASE_FOLD.put(0x0041, new int[]{0x0061}); - CASE_FOLD.put(0x0042, new int[]{0x0062}); - CASE_FOLD.put(0x0043, new int[]{0x0063}); - CASE_FOLD.put(0x0044, new int[]{0x0064}); - CASE_FOLD.put(0x0045, new int[]{0x0065}); - CASE_FOLD.put(0x0046, new int[]{0x0066}); - CASE_FOLD.put(0x0047, new int[]{0x0067}); - CASE_FOLD.put(0x0048, new int[]{0x0068}); - CASE_FOLD.put(0x0049, new int[]{0x0069}); - CASE_FOLD.put(0x004A, new int[]{0x006A}); - CASE_FOLD.put(0x004B, new int[]{0x006B}); - CASE_FOLD.put(0x004C, new int[]{0x006C}); - CASE_FOLD.put(0x004D, new int[]{0x006D}); - CASE_FOLD.put(0x004E, new int[]{0x006E}); - CASE_FOLD.put(0x004F, new int[]{0x006F}); - CASE_FOLD.put(0x0050, new int[]{0x0070}); - CASE_FOLD.put(0x0051, new int[]{0x0071}); - CASE_FOLD.put(0x0052, new int[]{0x0072}); - CASE_FOLD.put(0x0053, new int[]{0x0073}); - CASE_FOLD.put(0x0054, new int[]{0x0074}); - CASE_FOLD.put(0x0055, new int[]{0x0075}); - CASE_FOLD.put(0x0056, new int[]{0x0076}); - CASE_FOLD.put(0x0057, new int[]{0x0077}); - CASE_FOLD.put(0x0058, new int[]{0x0078}); - CASE_FOLD.put(0x0059, new int[]{0x0079}); - CASE_FOLD.put(0x005A, new int[]{0x007A}); - CASE_FOLD.put(0x00B5, new int[]{0x03BC}); - CASE_FOLD.put(0x00C0, new int[]{0x00E0}); - CASE_FOLD.put(0x00C1, new int[]{0x00E1}); - CASE_FOLD.put(0x00C2, new int[]{0x00E2}); - CASE_FOLD.put(0x00C3, new int[]{0x00E3}); - CASE_FOLD.put(0x00C4, new int[]{0x00E4}); - CASE_FOLD.put(0x00C5, new int[]{0x00E5}); - CASE_FOLD.put(0x00C6, new int[]{0x00E6}); - CASE_FOLD.put(0x00C7, new int[]{0x00E7}); - CASE_FOLD.put(0x00C8, new int[]{0x00E8}); - CASE_FOLD.put(0x00C9, new int[]{0x00E9}); - CASE_FOLD.put(0x00CA, new int[]{0x00EA}); - CASE_FOLD.put(0x00CB, new int[]{0x00EB}); - CASE_FOLD.put(0x00CC, new int[]{0x00EC}); - CASE_FOLD.put(0x00CD, new int[]{0x00ED}); - CASE_FOLD.put(0x00CE, new int[]{0x00EE}); - CASE_FOLD.put(0x00CF, new int[]{0x00EF}); - CASE_FOLD.put(0x00D0, new int[]{0x00F0}); - CASE_FOLD.put(0x00D1, new int[]{0x00F1}); - CASE_FOLD.put(0x00D2, new int[]{0x00F2}); - CASE_FOLD.put(0x00D3, new int[]{0x00F3}); - CASE_FOLD.put(0x00D4, new int[]{0x00F4}); - CASE_FOLD.put(0x00D5, new int[]{0x00F5}); - CASE_FOLD.put(0x00D6, new int[]{0x00F6}); - CASE_FOLD.put(0x00D8, new int[]{0x00F8}); - CASE_FOLD.put(0x00D9, new int[]{0x00F9}); - CASE_FOLD.put(0x00DA, new int[]{0x00FA}); - CASE_FOLD.put(0x00DB, new int[]{0x00FB}); - CASE_FOLD.put(0x00DC, new int[]{0x00FC}); - CASE_FOLD.put(0x00DD, new int[]{0x00FD}); - CASE_FOLD.put(0x00DE, new int[]{0x00FE}); - CASE_FOLD.put(0x00DF, new int[]{0x0073, 0x0073}); - CASE_FOLD.put(0x0100, new int[]{0x0101}); - CASE_FOLD.put(0x0102, new int[]{0x0103}); - CASE_FOLD.put(0x0104, new int[]{0x0105}); - CASE_FOLD.put(0x0106, new int[]{0x0107}); - CASE_FOLD.put(0x0108, new int[]{0x0109}); - CASE_FOLD.put(0x010A, new int[]{0x010B}); - CASE_FOLD.put(0x010C, new int[]{0x010D}); - CASE_FOLD.put(0x010E, new int[]{0x010F}); - CASE_FOLD.put(0x0110, new int[]{0x0111}); - CASE_FOLD.put(0x0112, new int[]{0x0113}); - CASE_FOLD.put(0x0114, new int[]{0x0115}); - CASE_FOLD.put(0x0116, new int[]{0x0117}); - CASE_FOLD.put(0x0118, new int[]{0x0119}); - CASE_FOLD.put(0x011A, new int[]{0x011B}); - CASE_FOLD.put(0x011C, new int[]{0x011D}); - CASE_FOLD.put(0x011E, new int[]{0x011F}); - CASE_FOLD.put(0x0120, new int[]{0x0121}); - CASE_FOLD.put(0x0122, new int[]{0x0123}); - CASE_FOLD.put(0x0124, new int[]{0x0125}); - CASE_FOLD.put(0x0126, new int[]{0x0127}); - CASE_FOLD.put(0x0128, new int[]{0x0129}); - CASE_FOLD.put(0x012A, new int[]{0x012B}); - CASE_FOLD.put(0x012C, new int[]{0x012D}); - CASE_FOLD.put(0x012E, new int[]{0x012F}); - CASE_FOLD.put(0x0130, new int[]{0x0069, 0x0307}); - CASE_FOLD.put(0x0132, new int[]{0x0133}); - CASE_FOLD.put(0x0134, new int[]{0x0135}); - CASE_FOLD.put(0x0136, new int[]{0x0137}); - CASE_FOLD.put(0x0139, new int[]{0x013A}); - CASE_FOLD.put(0x013B, new int[]{0x013C}); - CASE_FOLD.put(0x013D, new int[]{0x013E}); - CASE_FOLD.put(0x013F, new int[]{0x0140}); - CASE_FOLD.put(0x0141, new int[]{0x0142}); - CASE_FOLD.put(0x0143, new int[]{0x0144}); - CASE_FOLD.put(0x0145, new int[]{0x0146}); - CASE_FOLD.put(0x0147, new int[]{0x0148}); - CASE_FOLD.put(0x0149, new int[]{0x02BC, 0x006E}); - CASE_FOLD.put(0x014A, new int[]{0x014B}); - CASE_FOLD.put(0x014C, new int[]{0x014D}); - CASE_FOLD.put(0x014E, new int[]{0x014F}); - CASE_FOLD.put(0x0150, new int[]{0x0151}); - CASE_FOLD.put(0x0152, new int[]{0x0153}); - CASE_FOLD.put(0x0154, new int[]{0x0155}); - CASE_FOLD.put(0x0156, new int[]{0x0157}); - CASE_FOLD.put(0x0158, new int[]{0x0159}); - CASE_FOLD.put(0x015A, new int[]{0x015B}); - CASE_FOLD.put(0x015C, new int[]{0x015D}); - CASE_FOLD.put(0x015E, new int[]{0x015F}); - CASE_FOLD.put(0x0160, new int[]{0x0161}); - CASE_FOLD.put(0x0162, new int[]{0x0163}); - CASE_FOLD.put(0x0164, new int[]{0x0165}); - CASE_FOLD.put(0x0166, new int[]{0x0167}); - CASE_FOLD.put(0x0168, new int[]{0x0169}); - CASE_FOLD.put(0x016A, new int[]{0x016B}); - CASE_FOLD.put(0x016C, new int[]{0x016D}); - CASE_FOLD.put(0x016E, new int[]{0x016F}); - CASE_FOLD.put(0x0170, new int[]{0x0171}); - CASE_FOLD.put(0x0172, new int[]{0x0173}); - CASE_FOLD.put(0x0174, new int[]{0x0175}); - CASE_FOLD.put(0x0176, new int[]{0x0177}); - CASE_FOLD.put(0x0178, new int[]{0x00FF}); - CASE_FOLD.put(0x0179, new int[]{0x017A}); - CASE_FOLD.put(0x017B, new int[]{0x017C}); - CASE_FOLD.put(0x017D, new int[]{0x017E}); - CASE_FOLD.put(0x017F, new int[]{0x0073}); - CASE_FOLD.put(0x0181, new int[]{0x0253}); - CASE_FOLD.put(0x0182, new int[]{0x0183}); - CASE_FOLD.put(0x0184, new int[]{0x0185}); - CASE_FOLD.put(0x0186, new int[]{0x0254}); - CASE_FOLD.put(0x0187, new int[]{0x0188}); - CASE_FOLD.put(0x0189, new int[]{0x0256}); - CASE_FOLD.put(0x018A, new int[]{0x0257}); - CASE_FOLD.put(0x018B, new int[]{0x018C}); - CASE_FOLD.put(0x018E, new int[]{0x01DD}); - CASE_FOLD.put(0x018F, new int[]{0x0259}); - CASE_FOLD.put(0x0190, new int[]{0x025B}); - CASE_FOLD.put(0x0191, new int[]{0x0192}); - CASE_FOLD.put(0x0193, new int[]{0x0260}); - CASE_FOLD.put(0x0194, new int[]{0x0263}); - CASE_FOLD.put(0x0196, new int[]{0x0269}); - CASE_FOLD.put(0x0197, new int[]{0x0268}); - CASE_FOLD.put(0x0198, new int[]{0x0199}); - CASE_FOLD.put(0x019C, new int[]{0x026F}); - CASE_FOLD.put(0x019D, new int[]{0x0272}); - CASE_FOLD.put(0x019F, new int[]{0x0275}); - CASE_FOLD.put(0x01A0, new int[]{0x01A1}); - CASE_FOLD.put(0x01A2, new int[]{0x01A3}); - CASE_FOLD.put(0x01A4, new int[]{0x01A5}); - CASE_FOLD.put(0x01A6, new int[]{0x0280}); - CASE_FOLD.put(0x01A7, new int[]{0x01A8}); - CASE_FOLD.put(0x01A9, new int[]{0x0283}); - CASE_FOLD.put(0x01AC, new int[]{0x01AD}); - CASE_FOLD.put(0x01AE, new int[]{0x0288}); - CASE_FOLD.put(0x01AF, new int[]{0x01B0}); - CASE_FOLD.put(0x01B1, new int[]{0x028A}); - CASE_FOLD.put(0x01B2, new int[]{0x028B}); - CASE_FOLD.put(0x01B3, new int[]{0x01B4}); - CASE_FOLD.put(0x01B5, new int[]{0x01B6}); - CASE_FOLD.put(0x01B7, new int[]{0x0292}); - CASE_FOLD.put(0x01B8, new int[]{0x01B9}); - CASE_FOLD.put(0x01BC, new int[]{0x01BD}); - CASE_FOLD.put(0x01C4, new int[]{0x01C6}); - CASE_FOLD.put(0x01C5, new int[]{0x01C6}); - CASE_FOLD.put(0x01C7, new int[]{0x01C9}); - CASE_FOLD.put(0x01C8, new int[]{0x01C9}); - CASE_FOLD.put(0x01CA, new int[]{0x01CC}); - CASE_FOLD.put(0x01CB, new int[]{0x01CC}); - CASE_FOLD.put(0x01CD, new int[]{0x01CE}); - CASE_FOLD.put(0x01CF, new int[]{0x01D0}); - CASE_FOLD.put(0x01D1, new int[]{0x01D2}); - CASE_FOLD.put(0x01D3, new int[]{0x01D4}); - CASE_FOLD.put(0x01D5, new int[]{0x01D6}); - CASE_FOLD.put(0x01D7, new int[]{0x01D8}); - CASE_FOLD.put(0x01D9, new int[]{0x01DA}); - CASE_FOLD.put(0x01DB, new int[]{0x01DC}); - CASE_FOLD.put(0x01DE, new int[]{0x01DF}); - CASE_FOLD.put(0x01E0, new int[]{0x01E1}); - CASE_FOLD.put(0x01E2, new int[]{0x01E3}); - CASE_FOLD.put(0x01E4, new int[]{0x01E5}); - CASE_FOLD.put(0x01E6, new int[]{0x01E7}); - CASE_FOLD.put(0x01E8, new int[]{0x01E9}); - CASE_FOLD.put(0x01EA, new int[]{0x01EB}); - CASE_FOLD.put(0x01EC, new int[]{0x01ED}); - CASE_FOLD.put(0x01EE, new int[]{0x01EF}); - CASE_FOLD.put(0x01F0, new int[]{0x006A, 0x030C}); - CASE_FOLD.put(0x01F1, new int[]{0x01F3}); - CASE_FOLD.put(0x01F2, new int[]{0x01F3}); - CASE_FOLD.put(0x01F4, new int[]{0x01F5}); - CASE_FOLD.put(0x01F6, new int[]{0x0195}); - CASE_FOLD.put(0x01F7, new int[]{0x01BF}); - CASE_FOLD.put(0x01F8, new int[]{0x01F9}); - CASE_FOLD.put(0x01FA, new int[]{0x01FB}); - CASE_FOLD.put(0x01FC, new int[]{0x01FD}); - CASE_FOLD.put(0x01FE, new int[]{0x01FF}); - CASE_FOLD.put(0x0200, new int[]{0x0201}); - CASE_FOLD.put(0x0202, new int[]{0x0203}); - CASE_FOLD.put(0x0204, new int[]{0x0205}); - CASE_FOLD.put(0x0206, new int[]{0x0207}); - CASE_FOLD.put(0x0208, new int[]{0x0209}); - CASE_FOLD.put(0x020A, new int[]{0x020B}); - CASE_FOLD.put(0x020C, new int[]{0x020D}); - CASE_FOLD.put(0x020E, new int[]{0x020F}); - CASE_FOLD.put(0x0210, new int[]{0x0211}); - CASE_FOLD.put(0x0212, new int[]{0x0213}); - CASE_FOLD.put(0x0214, new int[]{0x0215}); - CASE_FOLD.put(0x0216, new int[]{0x0217}); - CASE_FOLD.put(0x0218, new int[]{0x0219}); - CASE_FOLD.put(0x021A, new int[]{0x021B}); - CASE_FOLD.put(0x021C, new int[]{0x021D}); - CASE_FOLD.put(0x021E, new int[]{0x021F}); - CASE_FOLD.put(0x0220, new int[]{0x019E}); - CASE_FOLD.put(0x0222, new int[]{0x0223}); - CASE_FOLD.put(0x0224, new int[]{0x0225}); - CASE_FOLD.put(0x0226, new int[]{0x0227}); - CASE_FOLD.put(0x0228, new int[]{0x0229}); - CASE_FOLD.put(0x022A, new int[]{0x022B}); - CASE_FOLD.put(0x022C, new int[]{0x022D}); - CASE_FOLD.put(0x022E, new int[]{0x022F}); - CASE_FOLD.put(0x0230, new int[]{0x0231}); - CASE_FOLD.put(0x0232, new int[]{0x0233}); - CASE_FOLD.put(0x023A, new int[]{0x2C65}); - CASE_FOLD.put(0x023B, new int[]{0x023C}); - CASE_FOLD.put(0x023D, new int[]{0x019A}); - CASE_FOLD.put(0x023E, new int[]{0x2C66}); - CASE_FOLD.put(0x0241, new int[]{0x0242}); - CASE_FOLD.put(0x0243, new int[]{0x0180}); - CASE_FOLD.put(0x0244, new int[]{0x0289}); - CASE_FOLD.put(0x0245, new int[]{0x028C}); - CASE_FOLD.put(0x0246, new int[]{0x0247}); - CASE_FOLD.put(0x0248, new int[]{0x0249}); - CASE_FOLD.put(0x024A, new int[]{0x024B}); - CASE_FOLD.put(0x024C, new int[]{0x024D}); - CASE_FOLD.put(0x024E, new int[]{0x024F}); - CASE_FOLD.put(0x0345, new int[]{0x03B9}); - CASE_FOLD.put(0x0370, new int[]{0x0371}); - CASE_FOLD.put(0x0372, new int[]{0x0373}); - CASE_FOLD.put(0x0376, new int[]{0x0377}); - CASE_FOLD.put(0x037F, new int[]{0x03F3}); - CASE_FOLD.put(0x0386, new int[]{0x03AC}); - CASE_FOLD.put(0x0388, new int[]{0x03AD}); - CASE_FOLD.put(0x0389, new int[]{0x03AE}); - CASE_FOLD.put(0x038A, new int[]{0x03AF}); - CASE_FOLD.put(0x038C, new int[]{0x03CC}); - CASE_FOLD.put(0x038E, new int[]{0x03CD}); - CASE_FOLD.put(0x038F, new int[]{0x03CE}); - CASE_FOLD.put(0x0390, new int[]{0x03B9, 0x0308, 0x0301}); - CASE_FOLD.put(0x0391, new int[]{0x03B1}); - CASE_FOLD.put(0x0392, new int[]{0x03B2}); - CASE_FOLD.put(0x0393, new int[]{0x03B3}); - CASE_FOLD.put(0x0394, new int[]{0x03B4}); - CASE_FOLD.put(0x0395, new int[]{0x03B5}); - CASE_FOLD.put(0x0396, new int[]{0x03B6}); - CASE_FOLD.put(0x0397, new int[]{0x03B7}); - CASE_FOLD.put(0x0398, new int[]{0x03B8}); - CASE_FOLD.put(0x0399, new int[]{0x03B9}); - CASE_FOLD.put(0x039A, new int[]{0x03BA}); - CASE_FOLD.put(0x039B, new int[]{0x03BB}); - CASE_FOLD.put(0x039C, new int[]{0x03BC}); - CASE_FOLD.put(0x039D, new int[]{0x03BD}); - CASE_FOLD.put(0x039E, new int[]{0x03BE}); - CASE_FOLD.put(0x039F, new int[]{0x03BF}); - CASE_FOLD.put(0x03A0, new int[]{0x03C0}); - CASE_FOLD.put(0x03A1, new int[]{0x03C1}); - CASE_FOLD.put(0x03A3, new int[]{0x03C3}); - CASE_FOLD.put(0x03A4, new int[]{0x03C4}); - CASE_FOLD.put(0x03A5, new int[]{0x03C5}); - CASE_FOLD.put(0x03A6, new int[]{0x03C6}); - CASE_FOLD.put(0x03A7, new int[]{0x03C7}); - CASE_FOLD.put(0x03A8, new int[]{0x03C8}); - CASE_FOLD.put(0x03A9, new int[]{0x03C9}); - CASE_FOLD.put(0x03AA, new int[]{0x03CA}); - CASE_FOLD.put(0x03AB, new int[]{0x03CB}); - CASE_FOLD.put(0x03B0, new int[]{0x03C5, 0x0308, 0x0301}); - CASE_FOLD.put(0x03C2, new int[]{0x03C3}); - CASE_FOLD.put(0x03CF, new int[]{0x03D7}); - CASE_FOLD.put(0x03D0, new int[]{0x03B2}); - CASE_FOLD.put(0x03D1, new int[]{0x03B8}); - CASE_FOLD.put(0x03D5, new int[]{0x03C6}); - CASE_FOLD.put(0x03D6, new int[]{0x03C0}); - CASE_FOLD.put(0x03D8, new int[]{0x03D9}); - CASE_FOLD.put(0x03DA, new int[]{0x03DB}); - CASE_FOLD.put(0x03DC, new int[]{0x03DD}); - CASE_FOLD.put(0x03DE, new int[]{0x03DF}); - CASE_FOLD.put(0x03E0, new int[]{0x03E1}); - CASE_FOLD.put(0x03E2, new int[]{0x03E3}); - CASE_FOLD.put(0x03E4, new int[]{0x03E5}); - CASE_FOLD.put(0x03E6, new int[]{0x03E7}); - CASE_FOLD.put(0x03E8, new int[]{0x03E9}); - CASE_FOLD.put(0x03EA, new int[]{0x03EB}); - CASE_FOLD.put(0x03EC, new int[]{0x03ED}); - CASE_FOLD.put(0x03EE, new int[]{0x03EF}); - CASE_FOLD.put(0x03F0, new int[]{0x03BA}); - CASE_FOLD.put(0x03F1, new int[]{0x03C1}); - CASE_FOLD.put(0x03F4, new int[]{0x03B8}); - CASE_FOLD.put(0x03F5, new int[]{0x03B5}); - CASE_FOLD.put(0x03F7, new int[]{0x03F8}); - CASE_FOLD.put(0x03F9, new int[]{0x03F2}); - CASE_FOLD.put(0x03FA, new int[]{0x03FB}); - CASE_FOLD.put(0x03FD, new int[]{0x037B}); - CASE_FOLD.put(0x03FE, new int[]{0x037C}); - CASE_FOLD.put(0x03FF, new int[]{0x037D}); - CASE_FOLD.put(0x0400, new int[]{0x0450}); - CASE_FOLD.put(0x0401, new int[]{0x0451}); - CASE_FOLD.put(0x0402, new int[]{0x0452}); - CASE_FOLD.put(0x0403, new int[]{0x0453}); - CASE_FOLD.put(0x0404, new int[]{0x0454}); - CASE_FOLD.put(0x0405, new int[]{0x0455}); - CASE_FOLD.put(0x0406, new int[]{0x0456}); - CASE_FOLD.put(0x0407, new int[]{0x0457}); - CASE_FOLD.put(0x0408, new int[]{0x0458}); - CASE_FOLD.put(0x0409, new int[]{0x0459}); - CASE_FOLD.put(0x040A, new int[]{0x045A}); - CASE_FOLD.put(0x040B, new int[]{0x045B}); - CASE_FOLD.put(0x040C, new int[]{0x045C}); - CASE_FOLD.put(0x040D, new int[]{0x045D}); - CASE_FOLD.put(0x040E, new int[]{0x045E}); - CASE_FOLD.put(0x040F, new int[]{0x045F}); - CASE_FOLD.put(0x0410, new int[]{0x0430}); - CASE_FOLD.put(0x0411, new int[]{0x0431}); - CASE_FOLD.put(0x0412, new int[]{0x0432}); - CASE_FOLD.put(0x0413, new int[]{0x0433}); - CASE_FOLD.put(0x0414, new int[]{0x0434}); - CASE_FOLD.put(0x0415, new int[]{0x0435}); - CASE_FOLD.put(0x0416, new int[]{0x0436}); - CASE_FOLD.put(0x0417, new int[]{0x0437}); - CASE_FOLD.put(0x0418, new int[]{0x0438}); - CASE_FOLD.put(0x0419, new int[]{0x0439}); - CASE_FOLD.put(0x041A, new int[]{0x043A}); - CASE_FOLD.put(0x041B, new int[]{0x043B}); - CASE_FOLD.put(0x041C, new int[]{0x043C}); - CASE_FOLD.put(0x041D, new int[]{0x043D}); - CASE_FOLD.put(0x041E, new int[]{0x043E}); - CASE_FOLD.put(0x041F, new int[]{0x043F}); - CASE_FOLD.put(0x0420, new int[]{0x0440}); - CASE_FOLD.put(0x0421, new int[]{0x0441}); - CASE_FOLD.put(0x0422, new int[]{0x0442}); - CASE_FOLD.put(0x0423, new int[]{0x0443}); - CASE_FOLD.put(0x0424, new int[]{0x0444}); - CASE_FOLD.put(0x0425, new int[]{0x0445}); - CASE_FOLD.put(0x0426, new int[]{0x0446}); - CASE_FOLD.put(0x0427, new int[]{0x0447}); - CASE_FOLD.put(0x0428, new int[]{0x0448}); - CASE_FOLD.put(0x0429, new int[]{0x0449}); - CASE_FOLD.put(0x042A, new int[]{0x044A}); - CASE_FOLD.put(0x042B, new int[]{0x044B}); - CASE_FOLD.put(0x042C, new int[]{0x044C}); - CASE_FOLD.put(0x042D, new int[]{0x044D}); - CASE_FOLD.put(0x042E, new int[]{0x044E}); - CASE_FOLD.put(0x042F, new int[]{0x044F}); - CASE_FOLD.put(0x0460, new int[]{0x0461}); - CASE_FOLD.put(0x0462, new int[]{0x0463}); - CASE_FOLD.put(0x0464, new int[]{0x0465}); - CASE_FOLD.put(0x0466, new int[]{0x0467}); - CASE_FOLD.put(0x0468, new int[]{0x0469}); - CASE_FOLD.put(0x046A, new int[]{0x046B}); - CASE_FOLD.put(0x046C, new int[]{0x046D}); - CASE_FOLD.put(0x046E, new int[]{0x046F}); - CASE_FOLD.put(0x0470, new int[]{0x0471}); - CASE_FOLD.put(0x0472, new int[]{0x0473}); - CASE_FOLD.put(0x0474, new int[]{0x0475}); - CASE_FOLD.put(0x0476, new int[]{0x0477}); - CASE_FOLD.put(0x0478, new int[]{0x0479}); - CASE_FOLD.put(0x047A, new int[]{0x047B}); - CASE_FOLD.put(0x047C, new int[]{0x047D}); - CASE_FOLD.put(0x047E, new int[]{0x047F}); - CASE_FOLD.put(0x0480, new int[]{0x0481}); - CASE_FOLD.put(0x048A, new int[]{0x048B}); - CASE_FOLD.put(0x048C, new int[]{0x048D}); - CASE_FOLD.put(0x048E, new int[]{0x048F}); - CASE_FOLD.put(0x0490, new int[]{0x0491}); - CASE_FOLD.put(0x0492, new int[]{0x0493}); - CASE_FOLD.put(0x0494, new int[]{0x0495}); - CASE_FOLD.put(0x0496, new int[]{0x0497}); - CASE_FOLD.put(0x0498, new int[]{0x0499}); - CASE_FOLD.put(0x049A, new int[]{0x049B}); - CASE_FOLD.put(0x049C, new int[]{0x049D}); - CASE_FOLD.put(0x049E, new int[]{0x049F}); - CASE_FOLD.put(0x04A0, new int[]{0x04A1}); - CASE_FOLD.put(0x04A2, new int[]{0x04A3}); - CASE_FOLD.put(0x04A4, new int[]{0x04A5}); - CASE_FOLD.put(0x04A6, new int[]{0x04A7}); - CASE_FOLD.put(0x04A8, new int[]{0x04A9}); - CASE_FOLD.put(0x04AA, new int[]{0x04AB}); - CASE_FOLD.put(0x04AC, new int[]{0x04AD}); - CASE_FOLD.put(0x04AE, new int[]{0x04AF}); - CASE_FOLD.put(0x04B0, new int[]{0x04B1}); - CASE_FOLD.put(0x04B2, new int[]{0x04B3}); - CASE_FOLD.put(0x04B4, new int[]{0x04B5}); - CASE_FOLD.put(0x04B6, new int[]{0x04B7}); - CASE_FOLD.put(0x04B8, new int[]{0x04B9}); - CASE_FOLD.put(0x04BA, new int[]{0x04BB}); - CASE_FOLD.put(0x04BC, new int[]{0x04BD}); - CASE_FOLD.put(0x04BE, new int[]{0x04BF}); - CASE_FOLD.put(0x04C0, new int[]{0x04CF}); - CASE_FOLD.put(0x04C1, new int[]{0x04C2}); - CASE_FOLD.put(0x04C3, new int[]{0x04C4}); - CASE_FOLD.put(0x04C5, new int[]{0x04C6}); - CASE_FOLD.put(0x04C7, new int[]{0x04C8}); - CASE_FOLD.put(0x04C9, new int[]{0x04CA}); - CASE_FOLD.put(0x04CB, new int[]{0x04CC}); - CASE_FOLD.put(0x04CD, new int[]{0x04CE}); - CASE_FOLD.put(0x04D0, new int[]{0x04D1}); - CASE_FOLD.put(0x04D2, new int[]{0x04D3}); - CASE_FOLD.put(0x04D4, new int[]{0x04D5}); - CASE_FOLD.put(0x04D6, new int[]{0x04D7}); - CASE_FOLD.put(0x04D8, new int[]{0x04D9}); - CASE_FOLD.put(0x04DA, new int[]{0x04DB}); - CASE_FOLD.put(0x04DC, new int[]{0x04DD}); - CASE_FOLD.put(0x04DE, new int[]{0x04DF}); - CASE_FOLD.put(0x04E0, new int[]{0x04E1}); - CASE_FOLD.put(0x04E2, new int[]{0x04E3}); - CASE_FOLD.put(0x04E4, new int[]{0x04E5}); - CASE_FOLD.put(0x04E6, new int[]{0x04E7}); - CASE_FOLD.put(0x04E8, new int[]{0x04E9}); - CASE_FOLD.put(0x04EA, new int[]{0x04EB}); - CASE_FOLD.put(0x04EC, new int[]{0x04ED}); - CASE_FOLD.put(0x04EE, new int[]{0x04EF}); - CASE_FOLD.put(0x04F0, new int[]{0x04F1}); - CASE_FOLD.put(0x04F2, new int[]{0x04F3}); - CASE_FOLD.put(0x04F4, new int[]{0x04F5}); - CASE_FOLD.put(0x04F6, new int[]{0x04F7}); - CASE_FOLD.put(0x04F8, new int[]{0x04F9}); - CASE_FOLD.put(0x04FA, new int[]{0x04FB}); - CASE_FOLD.put(0x04FC, new int[]{0x04FD}); - CASE_FOLD.put(0x04FE, new int[]{0x04FF}); - CASE_FOLD.put(0x0500, new int[]{0x0501}); - CASE_FOLD.put(0x0502, new int[]{0x0503}); - CASE_FOLD.put(0x0504, new int[]{0x0505}); - CASE_FOLD.put(0x0506, new int[]{0x0507}); - CASE_FOLD.put(0x0508, new int[]{0x0509}); - CASE_FOLD.put(0x050A, new int[]{0x050B}); - CASE_FOLD.put(0x050C, new int[]{0x050D}); - CASE_FOLD.put(0x050E, new int[]{0x050F}); - CASE_FOLD.put(0x0510, new int[]{0x0511}); - CASE_FOLD.put(0x0512, new int[]{0x0513}); - CASE_FOLD.put(0x0514, new int[]{0x0515}); - CASE_FOLD.put(0x0516, new int[]{0x0517}); - CASE_FOLD.put(0x0518, new int[]{0x0519}); - CASE_FOLD.put(0x051A, new int[]{0x051B}); - CASE_FOLD.put(0x051C, new int[]{0x051D}); - CASE_FOLD.put(0x051E, new int[]{0x051F}); - CASE_FOLD.put(0x0520, new int[]{0x0521}); - CASE_FOLD.put(0x0522, new int[]{0x0523}); - CASE_FOLD.put(0x0524, new int[]{0x0525}); - CASE_FOLD.put(0x0526, new int[]{0x0527}); - CASE_FOLD.put(0x0528, new int[]{0x0529}); - CASE_FOLD.put(0x052A, new int[]{0x052B}); - CASE_FOLD.put(0x052C, new int[]{0x052D}); - CASE_FOLD.put(0x052E, new int[]{0x052F}); - CASE_FOLD.put(0x0531, new int[]{0x0561}); - CASE_FOLD.put(0x0532, new int[]{0x0562}); - CASE_FOLD.put(0x0533, new int[]{0x0563}); - CASE_FOLD.put(0x0534, new int[]{0x0564}); - CASE_FOLD.put(0x0535, new int[]{0x0565}); - CASE_FOLD.put(0x0536, new int[]{0x0566}); - CASE_FOLD.put(0x0537, new int[]{0x0567}); - CASE_FOLD.put(0x0538, new int[]{0x0568}); - CASE_FOLD.put(0x0539, new int[]{0x0569}); - CASE_FOLD.put(0x053A, new int[]{0x056A}); - CASE_FOLD.put(0x053B, new int[]{0x056B}); - CASE_FOLD.put(0x053C, new int[]{0x056C}); - CASE_FOLD.put(0x053D, new int[]{0x056D}); - CASE_FOLD.put(0x053E, new int[]{0x056E}); - CASE_FOLD.put(0x053F, new int[]{0x056F}); - CASE_FOLD.put(0x0540, new int[]{0x0570}); - CASE_FOLD.put(0x0541, new int[]{0x0571}); - CASE_FOLD.put(0x0542, new int[]{0x0572}); - CASE_FOLD.put(0x0543, new int[]{0x0573}); - CASE_FOLD.put(0x0544, new int[]{0x0574}); - CASE_FOLD.put(0x0545, new int[]{0x0575}); - CASE_FOLD.put(0x0546, new int[]{0x0576}); - CASE_FOLD.put(0x0547, new int[]{0x0577}); - CASE_FOLD.put(0x0548, new int[]{0x0578}); - CASE_FOLD.put(0x0549, new int[]{0x0579}); - CASE_FOLD.put(0x054A, new int[]{0x057A}); - CASE_FOLD.put(0x054B, new int[]{0x057B}); - CASE_FOLD.put(0x054C, new int[]{0x057C}); - CASE_FOLD.put(0x054D, new int[]{0x057D}); - CASE_FOLD.put(0x054E, new int[]{0x057E}); - CASE_FOLD.put(0x054F, new int[]{0x057F}); - CASE_FOLD.put(0x0550, new int[]{0x0580}); - CASE_FOLD.put(0x0551, new int[]{0x0581}); - CASE_FOLD.put(0x0552, new int[]{0x0582}); - CASE_FOLD.put(0x0553, new int[]{0x0583}); - CASE_FOLD.put(0x0554, new int[]{0x0584}); - CASE_FOLD.put(0x0555, new int[]{0x0585}); - CASE_FOLD.put(0x0556, new int[]{0x0586}); - CASE_FOLD.put(0x0587, new int[]{0x0565, 0x0582}); - CASE_FOLD.put(0x10A0, new int[]{0x2D00}); - CASE_FOLD.put(0x10A1, new int[]{0x2D01}); - CASE_FOLD.put(0x10A2, new int[]{0x2D02}); - CASE_FOLD.put(0x10A3, new int[]{0x2D03}); - CASE_FOLD.put(0x10A4, new int[]{0x2D04}); - CASE_FOLD.put(0x10A5, new int[]{0x2D05}); - CASE_FOLD.put(0x10A6, new int[]{0x2D06}); - CASE_FOLD.put(0x10A7, new int[]{0x2D07}); - CASE_FOLD.put(0x10A8, new int[]{0x2D08}); - CASE_FOLD.put(0x10A9, new int[]{0x2D09}); - CASE_FOLD.put(0x10AA, new int[]{0x2D0A}); - CASE_FOLD.put(0x10AB, new int[]{0x2D0B}); - CASE_FOLD.put(0x10AC, new int[]{0x2D0C}); - CASE_FOLD.put(0x10AD, new int[]{0x2D0D}); - CASE_FOLD.put(0x10AE, new int[]{0x2D0E}); - CASE_FOLD.put(0x10AF, new int[]{0x2D0F}); - CASE_FOLD.put(0x10B0, new int[]{0x2D10}); - CASE_FOLD.put(0x10B1, new int[]{0x2D11}); - CASE_FOLD.put(0x10B2, new int[]{0x2D12}); - CASE_FOLD.put(0x10B3, new int[]{0x2D13}); - CASE_FOLD.put(0x10B4, new int[]{0x2D14}); - CASE_FOLD.put(0x10B5, new int[]{0x2D15}); - CASE_FOLD.put(0x10B6, new int[]{0x2D16}); - CASE_FOLD.put(0x10B7, new int[]{0x2D17}); - CASE_FOLD.put(0x10B8, new int[]{0x2D18}); - CASE_FOLD.put(0x10B9, new int[]{0x2D19}); - CASE_FOLD.put(0x10BA, new int[]{0x2D1A}); - CASE_FOLD.put(0x10BB, new int[]{0x2D1B}); - CASE_FOLD.put(0x10BC, new int[]{0x2D1C}); - CASE_FOLD.put(0x10BD, new int[]{0x2D1D}); - CASE_FOLD.put(0x10BE, new int[]{0x2D1E}); - CASE_FOLD.put(0x10BF, new int[]{0x2D1F}); - CASE_FOLD.put(0x10C0, new int[]{0x2D20}); - CASE_FOLD.put(0x10C1, new int[]{0x2D21}); - CASE_FOLD.put(0x10C2, new int[]{0x2D22}); - CASE_FOLD.put(0x10C3, new int[]{0x2D23}); - CASE_FOLD.put(0x10C4, new int[]{0x2D24}); - CASE_FOLD.put(0x10C5, new int[]{0x2D25}); - CASE_FOLD.put(0x10C7, new int[]{0x2D27}); - CASE_FOLD.put(0x10CD, new int[]{0x2D2D}); - CASE_FOLD.put(0x13F8, new int[]{0x13F0}); - CASE_FOLD.put(0x13F9, new int[]{0x13F1}); - CASE_FOLD.put(0x13FA, new int[]{0x13F2}); - CASE_FOLD.put(0x13FB, new int[]{0x13F3}); - CASE_FOLD.put(0x13FC, new int[]{0x13F4}); - CASE_FOLD.put(0x13FD, new int[]{0x13F5}); - CASE_FOLD.put(0x1C80, new int[]{0x0432}); - CASE_FOLD.put(0x1C81, new int[]{0x0434}); - CASE_FOLD.put(0x1C82, new int[]{0x043E}); - CASE_FOLD.put(0x1C83, new int[]{0x0441}); - CASE_FOLD.put(0x1C84, new int[]{0x0442}); - CASE_FOLD.put(0x1C85, new int[]{0x0442}); - CASE_FOLD.put(0x1C86, new int[]{0x044A}); - CASE_FOLD.put(0x1C87, new int[]{0x0463}); - CASE_FOLD.put(0x1C88, new int[]{0xA64B}); - CASE_FOLD.put(0x1C90, new int[]{0x10D0}); - CASE_FOLD.put(0x1C91, new int[]{0x10D1}); - CASE_FOLD.put(0x1C92, new int[]{0x10D2}); - CASE_FOLD.put(0x1C93, new int[]{0x10D3}); - CASE_FOLD.put(0x1C94, new int[]{0x10D4}); - CASE_FOLD.put(0x1C95, new int[]{0x10D5}); - CASE_FOLD.put(0x1C96, new int[]{0x10D6}); - CASE_FOLD.put(0x1C97, new int[]{0x10D7}); - CASE_FOLD.put(0x1C98, new int[]{0x10D8}); - CASE_FOLD.put(0x1C99, new int[]{0x10D9}); - CASE_FOLD.put(0x1C9A, new int[]{0x10DA}); - CASE_FOLD.put(0x1C9B, new int[]{0x10DB}); - CASE_FOLD.put(0x1C9C, new int[]{0x10DC}); - CASE_FOLD.put(0x1C9D, new int[]{0x10DD}); - CASE_FOLD.put(0x1C9E, new int[]{0x10DE}); - CASE_FOLD.put(0x1C9F, new int[]{0x10DF}); - CASE_FOLD.put(0x1CA0, new int[]{0x10E0}); - CASE_FOLD.put(0x1CA1, new int[]{0x10E1}); - CASE_FOLD.put(0x1CA2, new int[]{0x10E2}); - CASE_FOLD.put(0x1CA3, new int[]{0x10E3}); - CASE_FOLD.put(0x1CA4, new int[]{0x10E4}); - CASE_FOLD.put(0x1CA5, new int[]{0x10E5}); - CASE_FOLD.put(0x1CA6, new int[]{0x10E6}); - CASE_FOLD.put(0x1CA7, new int[]{0x10E7}); - CASE_FOLD.put(0x1CA8, new int[]{0x10E8}); - CASE_FOLD.put(0x1CA9, new int[]{0x10E9}); - CASE_FOLD.put(0x1CAA, new int[]{0x10EA}); - CASE_FOLD.put(0x1CAB, new int[]{0x10EB}); - CASE_FOLD.put(0x1CAC, new int[]{0x10EC}); - CASE_FOLD.put(0x1CAD, new int[]{0x10ED}); - CASE_FOLD.put(0x1CAE, new int[]{0x10EE}); - CASE_FOLD.put(0x1CAF, new int[]{0x10EF}); - CASE_FOLD.put(0x1CB0, new int[]{0x10F0}); - CASE_FOLD.put(0x1CB1, new int[]{0x10F1}); - CASE_FOLD.put(0x1CB2, new int[]{0x10F2}); - CASE_FOLD.put(0x1CB3, new int[]{0x10F3}); - CASE_FOLD.put(0x1CB4, new int[]{0x10F4}); - CASE_FOLD.put(0x1CB5, new int[]{0x10F5}); - CASE_FOLD.put(0x1CB6, new int[]{0x10F6}); - CASE_FOLD.put(0x1CB7, new int[]{0x10F7}); - CASE_FOLD.put(0x1CB8, new int[]{0x10F8}); - CASE_FOLD.put(0x1CB9, new int[]{0x10F9}); - CASE_FOLD.put(0x1CBA, new int[]{0x10FA}); - CASE_FOLD.put(0x1CBD, new int[]{0x10FD}); - CASE_FOLD.put(0x1CBE, new int[]{0x10FE}); - CASE_FOLD.put(0x1CBF, new int[]{0x10FF}); - CASE_FOLD.put(0x1E00, new int[]{0x1E01}); - CASE_FOLD.put(0x1E02, new int[]{0x1E03}); - CASE_FOLD.put(0x1E04, new int[]{0x1E05}); - CASE_FOLD.put(0x1E06, new int[]{0x1E07}); - CASE_FOLD.put(0x1E08, new int[]{0x1E09}); - CASE_FOLD.put(0x1E0A, new int[]{0x1E0B}); - CASE_FOLD.put(0x1E0C, new int[]{0x1E0D}); - CASE_FOLD.put(0x1E0E, new int[]{0x1E0F}); - CASE_FOLD.put(0x1E10, new int[]{0x1E11}); - CASE_FOLD.put(0x1E12, new int[]{0x1E13}); - CASE_FOLD.put(0x1E14, new int[]{0x1E15}); - CASE_FOLD.put(0x1E16, new int[]{0x1E17}); - CASE_FOLD.put(0x1E18, new int[]{0x1E19}); - CASE_FOLD.put(0x1E1A, new int[]{0x1E1B}); - CASE_FOLD.put(0x1E1C, new int[]{0x1E1D}); - CASE_FOLD.put(0x1E1E, new int[]{0x1E1F}); - CASE_FOLD.put(0x1E20, new int[]{0x1E21}); - CASE_FOLD.put(0x1E22, new int[]{0x1E23}); - CASE_FOLD.put(0x1E24, new int[]{0x1E25}); - CASE_FOLD.put(0x1E26, new int[]{0x1E27}); - CASE_FOLD.put(0x1E28, new int[]{0x1E29}); - CASE_FOLD.put(0x1E2A, new int[]{0x1E2B}); - CASE_FOLD.put(0x1E2C, new int[]{0x1E2D}); - CASE_FOLD.put(0x1E2E, new int[]{0x1E2F}); - CASE_FOLD.put(0x1E30, new int[]{0x1E31}); - CASE_FOLD.put(0x1E32, new int[]{0x1E33}); - CASE_FOLD.put(0x1E34, new int[]{0x1E35}); - CASE_FOLD.put(0x1E36, new int[]{0x1E37}); - CASE_FOLD.put(0x1E38, new int[]{0x1E39}); - CASE_FOLD.put(0x1E3A, new int[]{0x1E3B}); - CASE_FOLD.put(0x1E3C, new int[]{0x1E3D}); - CASE_FOLD.put(0x1E3E, new int[]{0x1E3F}); - CASE_FOLD.put(0x1E40, new int[]{0x1E41}); - CASE_FOLD.put(0x1E42, new int[]{0x1E43}); - CASE_FOLD.put(0x1E44, new int[]{0x1E45}); - CASE_FOLD.put(0x1E46, new int[]{0x1E47}); - CASE_FOLD.put(0x1E48, new int[]{0x1E49}); - CASE_FOLD.put(0x1E4A, new int[]{0x1E4B}); - CASE_FOLD.put(0x1E4C, new int[]{0x1E4D}); - CASE_FOLD.put(0x1E4E, new int[]{0x1E4F}); - CASE_FOLD.put(0x1E50, new int[]{0x1E51}); - CASE_FOLD.put(0x1E52, new int[]{0x1E53}); - CASE_FOLD.put(0x1E54, new int[]{0x1E55}); - CASE_FOLD.put(0x1E56, new int[]{0x1E57}); - CASE_FOLD.put(0x1E58, new int[]{0x1E59}); - CASE_FOLD.put(0x1E5A, new int[]{0x1E5B}); - CASE_FOLD.put(0x1E5C, new int[]{0x1E5D}); - CASE_FOLD.put(0x1E5E, new int[]{0x1E5F}); - CASE_FOLD.put(0x1E60, new int[]{0x1E61}); - CASE_FOLD.put(0x1E62, new int[]{0x1E63}); - CASE_FOLD.put(0x1E64, new int[]{0x1E65}); - CASE_FOLD.put(0x1E66, new int[]{0x1E67}); - CASE_FOLD.put(0x1E68, new int[]{0x1E69}); - CASE_FOLD.put(0x1E6A, new int[]{0x1E6B}); - CASE_FOLD.put(0x1E6C, new int[]{0x1E6D}); - CASE_FOLD.put(0x1E6E, new int[]{0x1E6F}); - CASE_FOLD.put(0x1E70, new int[]{0x1E71}); - CASE_FOLD.put(0x1E72, new int[]{0x1E73}); - CASE_FOLD.put(0x1E74, new int[]{0x1E75}); - CASE_FOLD.put(0x1E76, new int[]{0x1E77}); - CASE_FOLD.put(0x1E78, new int[]{0x1E79}); - CASE_FOLD.put(0x1E7A, new int[]{0x1E7B}); - CASE_FOLD.put(0x1E7C, new int[]{0x1E7D}); - CASE_FOLD.put(0x1E7E, new int[]{0x1E7F}); - CASE_FOLD.put(0x1E80, new int[]{0x1E81}); - CASE_FOLD.put(0x1E82, new int[]{0x1E83}); - CASE_FOLD.put(0x1E84, new int[]{0x1E85}); - CASE_FOLD.put(0x1E86, new int[]{0x1E87}); - CASE_FOLD.put(0x1E88, new int[]{0x1E89}); - CASE_FOLD.put(0x1E8A, new int[]{0x1E8B}); - CASE_FOLD.put(0x1E8C, new int[]{0x1E8D}); - CASE_FOLD.put(0x1E8E, new int[]{0x1E8F}); - CASE_FOLD.put(0x1E90, new int[]{0x1E91}); - CASE_FOLD.put(0x1E92, new int[]{0x1E93}); - CASE_FOLD.put(0x1E94, new int[]{0x1E95}); - CASE_FOLD.put(0x1E96, new int[]{0x0068, 0x0331}); - CASE_FOLD.put(0x1E97, new int[]{0x0074, 0x0308}); - CASE_FOLD.put(0x1E98, new int[]{0x0077, 0x030A}); - CASE_FOLD.put(0x1E99, new int[]{0x0079, 0x030A}); - CASE_FOLD.put(0x1E9A, new int[]{0x0061, 0x02BE}); - CASE_FOLD.put(0x1E9B, new int[]{0x1E61}); - CASE_FOLD.put(0x1E9E, new int[]{0x0073, 0x0073}); - CASE_FOLD.put(0x1EA0, new int[]{0x1EA1}); - CASE_FOLD.put(0x1EA2, new int[]{0x1EA3}); - CASE_FOLD.put(0x1EA4, new int[]{0x1EA5}); - CASE_FOLD.put(0x1EA6, new int[]{0x1EA7}); - CASE_FOLD.put(0x1EA8, new int[]{0x1EA9}); - CASE_FOLD.put(0x1EAA, new int[]{0x1EAB}); - CASE_FOLD.put(0x1EAC, new int[]{0x1EAD}); - CASE_FOLD.put(0x1EAE, new int[]{0x1EAF}); - CASE_FOLD.put(0x1EB0, new int[]{0x1EB1}); - CASE_FOLD.put(0x1EB2, new int[]{0x1EB3}); - CASE_FOLD.put(0x1EB4, new int[]{0x1EB5}); - CASE_FOLD.put(0x1EB6, new int[]{0x1EB7}); - CASE_FOLD.put(0x1EB8, new int[]{0x1EB9}); - CASE_FOLD.put(0x1EBA, new int[]{0x1EBB}); - CASE_FOLD.put(0x1EBC, new int[]{0x1EBD}); - CASE_FOLD.put(0x1EBE, new int[]{0x1EBF}); - CASE_FOLD.put(0x1EC0, new int[]{0x1EC1}); - CASE_FOLD.put(0x1EC2, new int[]{0x1EC3}); - CASE_FOLD.put(0x1EC4, new int[]{0x1EC5}); - CASE_FOLD.put(0x1EC6, new int[]{0x1EC7}); - CASE_FOLD.put(0x1EC8, new int[]{0x1EC9}); - CASE_FOLD.put(0x1ECA, new int[]{0x1ECB}); - CASE_FOLD.put(0x1ECC, new int[]{0x1ECD}); - CASE_FOLD.put(0x1ECE, new int[]{0x1ECF}); - CASE_FOLD.put(0x1ED0, new int[]{0x1ED1}); - CASE_FOLD.put(0x1ED2, new int[]{0x1ED3}); - CASE_FOLD.put(0x1ED4, new int[]{0x1ED5}); - CASE_FOLD.put(0x1ED6, new int[]{0x1ED7}); - CASE_FOLD.put(0x1ED8, new int[]{0x1ED9}); - CASE_FOLD.put(0x1EDA, new int[]{0x1EDB}); - CASE_FOLD.put(0x1EDC, new int[]{0x1EDD}); - CASE_FOLD.put(0x1EDE, new int[]{0x1EDF}); - CASE_FOLD.put(0x1EE0, new int[]{0x1EE1}); - CASE_FOLD.put(0x1EE2, new int[]{0x1EE3}); - CASE_FOLD.put(0x1EE4, new int[]{0x1EE5}); - CASE_FOLD.put(0x1EE6, new int[]{0x1EE7}); - CASE_FOLD.put(0x1EE8, new int[]{0x1EE9}); - CASE_FOLD.put(0x1EEA, new int[]{0x1EEB}); - CASE_FOLD.put(0x1EEC, new int[]{0x1EED}); - CASE_FOLD.put(0x1EEE, new int[]{0x1EEF}); - CASE_FOLD.put(0x1EF0, new int[]{0x1EF1}); - CASE_FOLD.put(0x1EF2, new int[]{0x1EF3}); - CASE_FOLD.put(0x1EF4, new int[]{0x1EF5}); - CASE_FOLD.put(0x1EF6, new int[]{0x1EF7}); - CASE_FOLD.put(0x1EF8, new int[]{0x1EF9}); - CASE_FOLD.put(0x1EFA, new int[]{0x1EFB}); - CASE_FOLD.put(0x1EFC, new int[]{0x1EFD}); - CASE_FOLD.put(0x1EFE, new int[]{0x1EFF}); - CASE_FOLD.put(0x1F08, new int[]{0x1F00}); - CASE_FOLD.put(0x1F09, new int[]{0x1F01}); - CASE_FOLD.put(0x1F0A, new int[]{0x1F02}); - CASE_FOLD.put(0x1F0B, new int[]{0x1F03}); - CASE_FOLD.put(0x1F0C, new int[]{0x1F04}); - CASE_FOLD.put(0x1F0D, new int[]{0x1F05}); - CASE_FOLD.put(0x1F0E, new int[]{0x1F06}); - CASE_FOLD.put(0x1F0F, new int[]{0x1F07}); - CASE_FOLD.put(0x1F18, new int[]{0x1F10}); - CASE_FOLD.put(0x1F19, new int[]{0x1F11}); - CASE_FOLD.put(0x1F1A, new int[]{0x1F12}); - CASE_FOLD.put(0x1F1B, new int[]{0x1F13}); - CASE_FOLD.put(0x1F1C, new int[]{0x1F14}); - CASE_FOLD.put(0x1F1D, new int[]{0x1F15}); - CASE_FOLD.put(0x1F28, new int[]{0x1F20}); - CASE_FOLD.put(0x1F29, new int[]{0x1F21}); - CASE_FOLD.put(0x1F2A, new int[]{0x1F22}); - CASE_FOLD.put(0x1F2B, new int[]{0x1F23}); - CASE_FOLD.put(0x1F2C, new int[]{0x1F24}); - CASE_FOLD.put(0x1F2D, new int[]{0x1F25}); - CASE_FOLD.put(0x1F2E, new int[]{0x1F26}); - CASE_FOLD.put(0x1F2F, new int[]{0x1F27}); - CASE_FOLD.put(0x1F38, new int[]{0x1F30}); - CASE_FOLD.put(0x1F39, new int[]{0x1F31}); - CASE_FOLD.put(0x1F3A, new int[]{0x1F32}); - CASE_FOLD.put(0x1F3B, new int[]{0x1F33}); - CASE_FOLD.put(0x1F3C, new int[]{0x1F34}); - CASE_FOLD.put(0x1F3D, new int[]{0x1F35}); - CASE_FOLD.put(0x1F3E, new int[]{0x1F36}); - CASE_FOLD.put(0x1F3F, new int[]{0x1F37}); - CASE_FOLD.put(0x1F48, new int[]{0x1F40}); - CASE_FOLD.put(0x1F49, new int[]{0x1F41}); - CASE_FOLD.put(0x1F4A, new int[]{0x1F42}); - CASE_FOLD.put(0x1F4B, new int[]{0x1F43}); - CASE_FOLD.put(0x1F4C, new int[]{0x1F44}); - CASE_FOLD.put(0x1F4D, new int[]{0x1F45}); - CASE_FOLD.put(0x1F50, new int[]{0x03C5, 0x0313}); - CASE_FOLD.put(0x1F52, new int[]{0x03C5, 0x0313, 0x0300}); - CASE_FOLD.put(0x1F54, new int[]{0x03C5, 0x0313, 0x0301}); - CASE_FOLD.put(0x1F56, new int[]{0x03C5, 0x0313, 0x0342}); - CASE_FOLD.put(0x1F59, new int[]{0x1F51}); - CASE_FOLD.put(0x1F5B, new int[]{0x1F53}); - CASE_FOLD.put(0x1F5D, new int[]{0x1F55}); - CASE_FOLD.put(0x1F5F, new int[]{0x1F57}); - CASE_FOLD.put(0x1F68, new int[]{0x1F60}); - CASE_FOLD.put(0x1F69, new int[]{0x1F61}); - CASE_FOLD.put(0x1F6A, new int[]{0x1F62}); - CASE_FOLD.put(0x1F6B, new int[]{0x1F63}); - CASE_FOLD.put(0x1F6C, new int[]{0x1F64}); - CASE_FOLD.put(0x1F6D, new int[]{0x1F65}); - CASE_FOLD.put(0x1F6E, new int[]{0x1F66}); - CASE_FOLD.put(0x1F6F, new int[]{0x1F67}); - CASE_FOLD.put(0x1F80, new int[]{0x1F00, 0x03B9}); - CASE_FOLD.put(0x1F81, new int[]{0x1F01, 0x03B9}); - CASE_FOLD.put(0x1F82, new int[]{0x1F02, 0x03B9}); - CASE_FOLD.put(0x1F83, new int[]{0x1F03, 0x03B9}); - CASE_FOLD.put(0x1F84, new int[]{0x1F04, 0x03B9}); - CASE_FOLD.put(0x1F85, new int[]{0x1F05, 0x03B9}); - CASE_FOLD.put(0x1F86, new int[]{0x1F06, 0x03B9}); - CASE_FOLD.put(0x1F87, new int[]{0x1F07, 0x03B9}); - CASE_FOLD.put(0x1F88, new int[]{0x1F00, 0x03B9}); - CASE_FOLD.put(0x1F89, new int[]{0x1F01, 0x03B9}); - CASE_FOLD.put(0x1F8A, new int[]{0x1F02, 0x03B9}); - CASE_FOLD.put(0x1F8B, new int[]{0x1F03, 0x03B9}); - CASE_FOLD.put(0x1F8C, new int[]{0x1F04, 0x03B9}); - CASE_FOLD.put(0x1F8D, new int[]{0x1F05, 0x03B9}); - CASE_FOLD.put(0x1F8E, new int[]{0x1F06, 0x03B9}); - CASE_FOLD.put(0x1F8F, new int[]{0x1F07, 0x03B9}); - CASE_FOLD.put(0x1F90, new int[]{0x1F20, 0x03B9}); - CASE_FOLD.put(0x1F91, new int[]{0x1F21, 0x03B9}); - CASE_FOLD.put(0x1F92, new int[]{0x1F22, 0x03B9}); - CASE_FOLD.put(0x1F93, new int[]{0x1F23, 0x03B9}); - CASE_FOLD.put(0x1F94, new int[]{0x1F24, 0x03B9}); - CASE_FOLD.put(0x1F95, new int[]{0x1F25, 0x03B9}); - CASE_FOLD.put(0x1F96, new int[]{0x1F26, 0x03B9}); - CASE_FOLD.put(0x1F97, new int[]{0x1F27, 0x03B9}); - CASE_FOLD.put(0x1F98, new int[]{0x1F20, 0x03B9}); - CASE_FOLD.put(0x1F99, new int[]{0x1F21, 0x03B9}); - CASE_FOLD.put(0x1F9A, new int[]{0x1F22, 0x03B9}); - CASE_FOLD.put(0x1F9B, new int[]{0x1F23, 0x03B9}); - CASE_FOLD.put(0x1F9C, new int[]{0x1F24, 0x03B9}); - CASE_FOLD.put(0x1F9D, new int[]{0x1F25, 0x03B9}); - CASE_FOLD.put(0x1F9E, new int[]{0x1F26, 0x03B9}); - CASE_FOLD.put(0x1F9F, new int[]{0x1F27, 0x03B9}); - CASE_FOLD.put(0x1FA0, new int[]{0x1F60, 0x03B9}); - CASE_FOLD.put(0x1FA1, new int[]{0x1F61, 0x03B9}); - CASE_FOLD.put(0x1FA2, new int[]{0x1F62, 0x03B9}); - CASE_FOLD.put(0x1FA3, new int[]{0x1F63, 0x03B9}); - CASE_FOLD.put(0x1FA4, new int[]{0x1F64, 0x03B9}); - CASE_FOLD.put(0x1FA5, new int[]{0x1F65, 0x03B9}); - CASE_FOLD.put(0x1FA6, new int[]{0x1F66, 0x03B9}); - CASE_FOLD.put(0x1FA7, new int[]{0x1F67, 0x03B9}); - CASE_FOLD.put(0x1FA8, new int[]{0x1F60, 0x03B9}); - CASE_FOLD.put(0x1FA9, new int[]{0x1F61, 0x03B9}); - CASE_FOLD.put(0x1FAA, new int[]{0x1F62, 0x03B9}); - CASE_FOLD.put(0x1FAB, new int[]{0x1F63, 0x03B9}); - CASE_FOLD.put(0x1FAC, new int[]{0x1F64, 0x03B9}); - CASE_FOLD.put(0x1FAD, new int[]{0x1F65, 0x03B9}); - CASE_FOLD.put(0x1FAE, new int[]{0x1F66, 0x03B9}); - CASE_FOLD.put(0x1FAF, new int[]{0x1F67, 0x03B9}); - CASE_FOLD.put(0x1FB2, new int[]{0x1F70, 0x03B9}); - CASE_FOLD.put(0x1FB3, new int[]{0x03B1, 0x03B9}); - CASE_FOLD.put(0x1FB4, new int[]{0x03AC, 0x03B9}); - CASE_FOLD.put(0x1FB6, new int[]{0x03B1, 0x0342}); - CASE_FOLD.put(0x1FB7, new int[]{0x03B1, 0x0342, 0x03B9}); - CASE_FOLD.put(0x1FB8, new int[]{0x1FB0}); - CASE_FOLD.put(0x1FB9, new int[]{0x1FB1}); - CASE_FOLD.put(0x1FBA, new int[]{0x1F70}); - CASE_FOLD.put(0x1FBB, new int[]{0x1F71}); - CASE_FOLD.put(0x1FBC, new int[]{0x03B1, 0x03B9}); - CASE_FOLD.put(0x1FBE, new int[]{0x03B9}); - CASE_FOLD.put(0x1FC2, new int[]{0x1F74, 0x03B9}); - CASE_FOLD.put(0x1FC3, new int[]{0x03B7, 0x03B9}); - CASE_FOLD.put(0x1FC4, new int[]{0x03AE, 0x03B9}); - CASE_FOLD.put(0x1FC6, new int[]{0x03B7, 0x0342}); - CASE_FOLD.put(0x1FC7, new int[]{0x03B7, 0x0342, 0x03B9}); - CASE_FOLD.put(0x1FC8, new int[]{0x1F72}); - CASE_FOLD.put(0x1FC9, new int[]{0x1F73}); - CASE_FOLD.put(0x1FCA, new int[]{0x1F74}); - CASE_FOLD.put(0x1FCB, new int[]{0x1F75}); - CASE_FOLD.put(0x1FCC, new int[]{0x03B7, 0x03B9}); - CASE_FOLD.put(0x1FD2, new int[]{0x03B9, 0x0308, 0x0300}); - CASE_FOLD.put(0x1FD3, new int[]{0x03B9, 0x0308, 0x0301}); - CASE_FOLD.put(0x1FD6, new int[]{0x03B9, 0x0342}); - CASE_FOLD.put(0x1FD7, new int[]{0x03B9, 0x0308, 0x0342}); - CASE_FOLD.put(0x1FD8, new int[]{0x1FD0}); - CASE_FOLD.put(0x1FD9, new int[]{0x1FD1}); - CASE_FOLD.put(0x1FDA, new int[]{0x1F76}); - CASE_FOLD.put(0x1FDB, new int[]{0x1F77}); - CASE_FOLD.put(0x1FE2, new int[]{0x03C5, 0x0308, 0x0300}); - CASE_FOLD.put(0x1FE3, new int[]{0x03C5, 0x0308, 0x0301}); - CASE_FOLD.put(0x1FE4, new int[]{0x03C1, 0x0313}); - CASE_FOLD.put(0x1FE6, new int[]{0x03C5, 0x0342}); - CASE_FOLD.put(0x1FE7, new int[]{0x03C5, 0x0308, 0x0342}); - CASE_FOLD.put(0x1FE8, new int[]{0x1FE0}); - CASE_FOLD.put(0x1FE9, new int[]{0x1FE1}); - CASE_FOLD.put(0x1FEA, new int[]{0x1F7A}); - CASE_FOLD.put(0x1FEB, new int[]{0x1F7B}); - CASE_FOLD.put(0x1FEC, new int[]{0x1FE5}); - CASE_FOLD.put(0x1FF2, new int[]{0x1F7C, 0x03B9}); - CASE_FOLD.put(0x1FF3, new int[]{0x03C9, 0x03B9}); - CASE_FOLD.put(0x1FF4, new int[]{0x03CE, 0x03B9}); - CASE_FOLD.put(0x1FF6, new int[]{0x03C9, 0x0342}); - CASE_FOLD.put(0x1FF7, new int[]{0x03C9, 0x0342, 0x03B9}); - CASE_FOLD.put(0x1FF8, new int[]{0x1F78}); - CASE_FOLD.put(0x1FF9, new int[]{0x1F79}); - CASE_FOLD.put(0x1FFA, new int[]{0x1F7C}); - CASE_FOLD.put(0x1FFB, new int[]{0x1F7D}); - CASE_FOLD.put(0x1FFC, new int[]{0x03C9, 0x03B9}); - CASE_FOLD.put(0x2126, new int[]{0x03C9}); - CASE_FOLD.put(0x212A, new int[]{0x006B}); - CASE_FOLD.put(0x212B, new int[]{0x00E5}); - CASE_FOLD.put(0x2132, new int[]{0x214E}); - CASE_FOLD.put(0x2160, new int[]{0x2170}); - CASE_FOLD.put(0x2161, new int[]{0x2171}); - CASE_FOLD.put(0x2162, new int[]{0x2172}); - CASE_FOLD.put(0x2163, new int[]{0x2173}); - CASE_FOLD.put(0x2164, new int[]{0x2174}); - CASE_FOLD.put(0x2165, new int[]{0x2175}); - CASE_FOLD.put(0x2166, new int[]{0x2176}); - CASE_FOLD.put(0x2167, new int[]{0x2177}); - CASE_FOLD.put(0x2168, new int[]{0x2178}); - CASE_FOLD.put(0x2169, new int[]{0x2179}); - CASE_FOLD.put(0x216A, new int[]{0x217A}); - CASE_FOLD.put(0x216B, new int[]{0x217B}); - CASE_FOLD.put(0x216C, new int[]{0x217C}); - CASE_FOLD.put(0x216D, new int[]{0x217D}); - CASE_FOLD.put(0x216E, new int[]{0x217E}); - CASE_FOLD.put(0x216F, new int[]{0x217F}); - CASE_FOLD.put(0x2183, new int[]{0x2184}); - CASE_FOLD.put(0x24B6, new int[]{0x24D0}); - CASE_FOLD.put(0x24B7, new int[]{0x24D1}); - CASE_FOLD.put(0x24B8, new int[]{0x24D2}); - CASE_FOLD.put(0x24B9, new int[]{0x24D3}); - CASE_FOLD.put(0x24BA, new int[]{0x24D4}); - CASE_FOLD.put(0x24BB, new int[]{0x24D5}); - CASE_FOLD.put(0x24BC, new int[]{0x24D6}); - CASE_FOLD.put(0x24BD, new int[]{0x24D7}); - CASE_FOLD.put(0x24BE, new int[]{0x24D8}); - CASE_FOLD.put(0x24BF, new int[]{0x24D9}); - CASE_FOLD.put(0x24C0, new int[]{0x24DA}); - CASE_FOLD.put(0x24C1, new int[]{0x24DB}); - CASE_FOLD.put(0x24C2, new int[]{0x24DC}); - CASE_FOLD.put(0x24C3, new int[]{0x24DD}); - CASE_FOLD.put(0x24C4, new int[]{0x24DE}); - CASE_FOLD.put(0x24C5, new int[]{0x24DF}); - CASE_FOLD.put(0x24C6, new int[]{0x24E0}); - CASE_FOLD.put(0x24C7, new int[]{0x24E1}); - CASE_FOLD.put(0x24C8, new int[]{0x24E2}); - CASE_FOLD.put(0x24C9, new int[]{0x24E3}); - CASE_FOLD.put(0x24CA, new int[]{0x24E4}); - CASE_FOLD.put(0x24CB, new int[]{0x24E5}); - CASE_FOLD.put(0x24CC, new int[]{0x24E6}); - CASE_FOLD.put(0x24CD, new int[]{0x24E7}); - CASE_FOLD.put(0x24CE, new int[]{0x24E8}); - CASE_FOLD.put(0x24CF, new int[]{0x24E9}); - CASE_FOLD.put(0x2C00, new int[]{0x2C30}); - CASE_FOLD.put(0x2C01, new int[]{0x2C31}); - CASE_FOLD.put(0x2C02, new int[]{0x2C32}); - CASE_FOLD.put(0x2C03, new int[]{0x2C33}); - CASE_FOLD.put(0x2C04, new int[]{0x2C34}); - CASE_FOLD.put(0x2C05, new int[]{0x2C35}); - CASE_FOLD.put(0x2C06, new int[]{0x2C36}); - CASE_FOLD.put(0x2C07, new int[]{0x2C37}); - CASE_FOLD.put(0x2C08, new int[]{0x2C38}); - CASE_FOLD.put(0x2C09, new int[]{0x2C39}); - CASE_FOLD.put(0x2C0A, new int[]{0x2C3A}); - CASE_FOLD.put(0x2C0B, new int[]{0x2C3B}); - CASE_FOLD.put(0x2C0C, new int[]{0x2C3C}); - CASE_FOLD.put(0x2C0D, new int[]{0x2C3D}); - CASE_FOLD.put(0x2C0E, new int[]{0x2C3E}); - CASE_FOLD.put(0x2C0F, new int[]{0x2C3F}); - CASE_FOLD.put(0x2C10, new int[]{0x2C40}); - CASE_FOLD.put(0x2C11, new int[]{0x2C41}); - CASE_FOLD.put(0x2C12, new int[]{0x2C42}); - CASE_FOLD.put(0x2C13, new int[]{0x2C43}); - CASE_FOLD.put(0x2C14, new int[]{0x2C44}); - CASE_FOLD.put(0x2C15, new int[]{0x2C45}); - CASE_FOLD.put(0x2C16, new int[]{0x2C46}); - CASE_FOLD.put(0x2C17, new int[]{0x2C47}); - CASE_FOLD.put(0x2C18, new int[]{0x2C48}); - CASE_FOLD.put(0x2C19, new int[]{0x2C49}); - CASE_FOLD.put(0x2C1A, new int[]{0x2C4A}); - CASE_FOLD.put(0x2C1B, new int[]{0x2C4B}); - CASE_FOLD.put(0x2C1C, new int[]{0x2C4C}); - CASE_FOLD.put(0x2C1D, new int[]{0x2C4D}); - CASE_FOLD.put(0x2C1E, new int[]{0x2C4E}); - CASE_FOLD.put(0x2C1F, new int[]{0x2C4F}); - CASE_FOLD.put(0x2C20, new int[]{0x2C50}); - CASE_FOLD.put(0x2C21, new int[]{0x2C51}); - CASE_FOLD.put(0x2C22, new int[]{0x2C52}); - CASE_FOLD.put(0x2C23, new int[]{0x2C53}); - CASE_FOLD.put(0x2C24, new int[]{0x2C54}); - CASE_FOLD.put(0x2C25, new int[]{0x2C55}); - CASE_FOLD.put(0x2C26, new int[]{0x2C56}); - CASE_FOLD.put(0x2C27, new int[]{0x2C57}); - CASE_FOLD.put(0x2C28, new int[]{0x2C58}); - CASE_FOLD.put(0x2C29, new int[]{0x2C59}); - CASE_FOLD.put(0x2C2A, new int[]{0x2C5A}); - CASE_FOLD.put(0x2C2B, new int[]{0x2C5B}); - CASE_FOLD.put(0x2C2C, new int[]{0x2C5C}); - CASE_FOLD.put(0x2C2D, new int[]{0x2C5D}); - CASE_FOLD.put(0x2C2E, new int[]{0x2C5E}); - CASE_FOLD.put(0x2C2F, new int[]{0x2C5F}); - CASE_FOLD.put(0x2C60, new int[]{0x2C61}); - CASE_FOLD.put(0x2C62, new int[]{0x026B}); - CASE_FOLD.put(0x2C63, new int[]{0x1D7D}); - CASE_FOLD.put(0x2C64, new int[]{0x027D}); - CASE_FOLD.put(0x2C67, new int[]{0x2C68}); - CASE_FOLD.put(0x2C69, new int[]{0x2C6A}); - CASE_FOLD.put(0x2C6B, new int[]{0x2C6C}); - CASE_FOLD.put(0x2C6D, new int[]{0x0251}); - CASE_FOLD.put(0x2C6E, new int[]{0x0271}); - CASE_FOLD.put(0x2C6F, new int[]{0x0250}); - CASE_FOLD.put(0x2C70, new int[]{0x0252}); - CASE_FOLD.put(0x2C72, new int[]{0x2C73}); - CASE_FOLD.put(0x2C75, new int[]{0x2C76}); - CASE_FOLD.put(0x2C7E, new int[]{0x023F}); - CASE_FOLD.put(0x2C7F, new int[]{0x0240}); - CASE_FOLD.put(0x2C80, new int[]{0x2C81}); - CASE_FOLD.put(0x2C82, new int[]{0x2C83}); - CASE_FOLD.put(0x2C84, new int[]{0x2C85}); - CASE_FOLD.put(0x2C86, new int[]{0x2C87}); - CASE_FOLD.put(0x2C88, new int[]{0x2C89}); - CASE_FOLD.put(0x2C8A, new int[]{0x2C8B}); - CASE_FOLD.put(0x2C8C, new int[]{0x2C8D}); - CASE_FOLD.put(0x2C8E, new int[]{0x2C8F}); - CASE_FOLD.put(0x2C90, new int[]{0x2C91}); - CASE_FOLD.put(0x2C92, new int[]{0x2C93}); - CASE_FOLD.put(0x2C94, new int[]{0x2C95}); - CASE_FOLD.put(0x2C96, new int[]{0x2C97}); - CASE_FOLD.put(0x2C98, new int[]{0x2C99}); - CASE_FOLD.put(0x2C9A, new int[]{0x2C9B}); - CASE_FOLD.put(0x2C9C, new int[]{0x2C9D}); - CASE_FOLD.put(0x2C9E, new int[]{0x2C9F}); - CASE_FOLD.put(0x2CA0, new int[]{0x2CA1}); - CASE_FOLD.put(0x2CA2, new int[]{0x2CA3}); - CASE_FOLD.put(0x2CA4, new int[]{0x2CA5}); - CASE_FOLD.put(0x2CA6, new int[]{0x2CA7}); - CASE_FOLD.put(0x2CA8, new int[]{0x2CA9}); - CASE_FOLD.put(0x2CAA, new int[]{0x2CAB}); - CASE_FOLD.put(0x2CAC, new int[]{0x2CAD}); - CASE_FOLD.put(0x2CAE, new int[]{0x2CAF}); - CASE_FOLD.put(0x2CB0, new int[]{0x2CB1}); - CASE_FOLD.put(0x2CB2, new int[]{0x2CB3}); - CASE_FOLD.put(0x2CB4, new int[]{0x2CB5}); - CASE_FOLD.put(0x2CB6, new int[]{0x2CB7}); - CASE_FOLD.put(0x2CB8, new int[]{0x2CB9}); - CASE_FOLD.put(0x2CBA, new int[]{0x2CBB}); - CASE_FOLD.put(0x2CBC, new int[]{0x2CBD}); - CASE_FOLD.put(0x2CBE, new int[]{0x2CBF}); - CASE_FOLD.put(0x2CC0, new int[]{0x2CC1}); - CASE_FOLD.put(0x2CC2, new int[]{0x2CC3}); - CASE_FOLD.put(0x2CC4, new int[]{0x2CC5}); - CASE_FOLD.put(0x2CC6, new int[]{0x2CC7}); - CASE_FOLD.put(0x2CC8, new int[]{0x2CC9}); - CASE_FOLD.put(0x2CCA, new int[]{0x2CCB}); - CASE_FOLD.put(0x2CCC, new int[]{0x2CCD}); - CASE_FOLD.put(0x2CCE, new int[]{0x2CCF}); - CASE_FOLD.put(0x2CD0, new int[]{0x2CD1}); - CASE_FOLD.put(0x2CD2, new int[]{0x2CD3}); - CASE_FOLD.put(0x2CD4, new int[]{0x2CD5}); - CASE_FOLD.put(0x2CD6, new int[]{0x2CD7}); - CASE_FOLD.put(0x2CD8, new int[]{0x2CD9}); - CASE_FOLD.put(0x2CDA, new int[]{0x2CDB}); - CASE_FOLD.put(0x2CDC, new int[]{0x2CDD}); - CASE_FOLD.put(0x2CDE, new int[]{0x2CDF}); - CASE_FOLD.put(0x2CE0, new int[]{0x2CE1}); - CASE_FOLD.put(0x2CE2, new int[]{0x2CE3}); - CASE_FOLD.put(0x2CEB, new int[]{0x2CEC}); - CASE_FOLD.put(0x2CED, new int[]{0x2CEE}); - CASE_FOLD.put(0x2CF2, new int[]{0x2CF3}); - CASE_FOLD.put(0xA640, new int[]{0xA641}); - CASE_FOLD.put(0xA642, new int[]{0xA643}); - CASE_FOLD.put(0xA644, new int[]{0xA645}); - CASE_FOLD.put(0xA646, new int[]{0xA647}); - CASE_FOLD.put(0xA648, new int[]{0xA649}); - CASE_FOLD.put(0xA64A, new int[]{0xA64B}); - CASE_FOLD.put(0xA64C, new int[]{0xA64D}); - CASE_FOLD.put(0xA64E, new int[]{0xA64F}); - CASE_FOLD.put(0xA650, new int[]{0xA651}); - CASE_FOLD.put(0xA652, new int[]{0xA653}); - CASE_FOLD.put(0xA654, new int[]{0xA655}); - CASE_FOLD.put(0xA656, new int[]{0xA657}); - CASE_FOLD.put(0xA658, new int[]{0xA659}); - CASE_FOLD.put(0xA65A, new int[]{0xA65B}); - CASE_FOLD.put(0xA65C, new int[]{0xA65D}); - CASE_FOLD.put(0xA65E, new int[]{0xA65F}); - CASE_FOLD.put(0xA660, new int[]{0xA661}); - CASE_FOLD.put(0xA662, new int[]{0xA663}); - CASE_FOLD.put(0xA664, new int[]{0xA665}); - CASE_FOLD.put(0xA666, new int[]{0xA667}); - CASE_FOLD.put(0xA668, new int[]{0xA669}); - CASE_FOLD.put(0xA66A, new int[]{0xA66B}); - CASE_FOLD.put(0xA66C, new int[]{0xA66D}); - CASE_FOLD.put(0xA680, new int[]{0xA681}); - CASE_FOLD.put(0xA682, new int[]{0xA683}); - CASE_FOLD.put(0xA684, new int[]{0xA685}); - CASE_FOLD.put(0xA686, new int[]{0xA687}); - CASE_FOLD.put(0xA688, new int[]{0xA689}); - CASE_FOLD.put(0xA68A, new int[]{0xA68B}); - CASE_FOLD.put(0xA68C, new int[]{0xA68D}); - CASE_FOLD.put(0xA68E, new int[]{0xA68F}); - CASE_FOLD.put(0xA690, new int[]{0xA691}); - CASE_FOLD.put(0xA692, new int[]{0xA693}); - CASE_FOLD.put(0xA694, new int[]{0xA695}); - CASE_FOLD.put(0xA696, new int[]{0xA697}); - CASE_FOLD.put(0xA698, new int[]{0xA699}); - CASE_FOLD.put(0xA69A, new int[]{0xA69B}); - CASE_FOLD.put(0xA722, new int[]{0xA723}); - CASE_FOLD.put(0xA724, new int[]{0xA725}); - CASE_FOLD.put(0xA726, new int[]{0xA727}); - CASE_FOLD.put(0xA728, new int[]{0xA729}); - CASE_FOLD.put(0xA72A, new int[]{0xA72B}); - CASE_FOLD.put(0xA72C, new int[]{0xA72D}); - CASE_FOLD.put(0xA72E, new int[]{0xA72F}); - CASE_FOLD.put(0xA732, new int[]{0xA733}); - CASE_FOLD.put(0xA734, new int[]{0xA735}); - CASE_FOLD.put(0xA736, new int[]{0xA737}); - CASE_FOLD.put(0xA738, new int[]{0xA739}); - CASE_FOLD.put(0xA73A, new int[]{0xA73B}); - CASE_FOLD.put(0xA73C, new int[]{0xA73D}); - CASE_FOLD.put(0xA73E, new int[]{0xA73F}); - CASE_FOLD.put(0xA740, new int[]{0xA741}); - CASE_FOLD.put(0xA742, new int[]{0xA743}); - CASE_FOLD.put(0xA744, new int[]{0xA745}); - CASE_FOLD.put(0xA746, new int[]{0xA747}); - CASE_FOLD.put(0xA748, new int[]{0xA749}); - CASE_FOLD.put(0xA74A, new int[]{0xA74B}); - CASE_FOLD.put(0xA74C, new int[]{0xA74D}); - CASE_FOLD.put(0xA74E, new int[]{0xA74F}); - CASE_FOLD.put(0xA750, new int[]{0xA751}); - CASE_FOLD.put(0xA752, new int[]{0xA753}); - CASE_FOLD.put(0xA754, new int[]{0xA755}); - CASE_FOLD.put(0xA756, new int[]{0xA757}); - CASE_FOLD.put(0xA758, new int[]{0xA759}); - CASE_FOLD.put(0xA75A, new int[]{0xA75B}); - CASE_FOLD.put(0xA75C, new int[]{0xA75D}); - CASE_FOLD.put(0xA75E, new int[]{0xA75F}); - CASE_FOLD.put(0xA760, new int[]{0xA761}); - CASE_FOLD.put(0xA762, new int[]{0xA763}); - CASE_FOLD.put(0xA764, new int[]{0xA765}); - CASE_FOLD.put(0xA766, new int[]{0xA767}); - CASE_FOLD.put(0xA768, new int[]{0xA769}); - CASE_FOLD.put(0xA76A, new int[]{0xA76B}); - CASE_FOLD.put(0xA76C, new int[]{0xA76D}); - CASE_FOLD.put(0xA76E, new int[]{0xA76F}); - CASE_FOLD.put(0xA779, new int[]{0xA77A}); - CASE_FOLD.put(0xA77B, new int[]{0xA77C}); - CASE_FOLD.put(0xA77D, new int[]{0x1D79}); - CASE_FOLD.put(0xA77E, new int[]{0xA77F}); - CASE_FOLD.put(0xA780, new int[]{0xA781}); - CASE_FOLD.put(0xA782, new int[]{0xA783}); - CASE_FOLD.put(0xA784, new int[]{0xA785}); - CASE_FOLD.put(0xA786, new int[]{0xA787}); - CASE_FOLD.put(0xA78B, new int[]{0xA78C}); - CASE_FOLD.put(0xA78D, new int[]{0x0265}); - CASE_FOLD.put(0xA790, new int[]{0xA791}); - CASE_FOLD.put(0xA792, new int[]{0xA793}); - CASE_FOLD.put(0xA796, new int[]{0xA797}); - CASE_FOLD.put(0xA798, new int[]{0xA799}); - CASE_FOLD.put(0xA79A, new int[]{0xA79B}); - CASE_FOLD.put(0xA79C, new int[]{0xA79D}); - CASE_FOLD.put(0xA79E, new int[]{0xA79F}); - CASE_FOLD.put(0xA7A0, new int[]{0xA7A1}); - CASE_FOLD.put(0xA7A2, new int[]{0xA7A3}); - CASE_FOLD.put(0xA7A4, new int[]{0xA7A5}); - CASE_FOLD.put(0xA7A6, new int[]{0xA7A7}); - CASE_FOLD.put(0xA7A8, new int[]{0xA7A9}); - CASE_FOLD.put(0xA7AA, new int[]{0x0266}); - CASE_FOLD.put(0xA7AB, new int[]{0x025C}); - CASE_FOLD.put(0xA7AC, new int[]{0x0261}); - CASE_FOLD.put(0xA7AD, new int[]{0x026C}); - CASE_FOLD.put(0xA7AE, new int[]{0x026A}); - CASE_FOLD.put(0xA7B0, new int[]{0x029E}); - CASE_FOLD.put(0xA7B1, new int[]{0x0287}); - CASE_FOLD.put(0xA7B2, new int[]{0x029D}); - CASE_FOLD.put(0xA7B3, new int[]{0xAB53}); - CASE_FOLD.put(0xA7B4, new int[]{0xA7B5}); - CASE_FOLD.put(0xA7B6, new int[]{0xA7B7}); - CASE_FOLD.put(0xA7B8, new int[]{0xA7B9}); - CASE_FOLD.put(0xA7BA, new int[]{0xA7BB}); - CASE_FOLD.put(0xA7BC, new int[]{0xA7BD}); - CASE_FOLD.put(0xA7BE, new int[]{0xA7BF}); - CASE_FOLD.put(0xA7C0, new int[]{0xA7C1}); - CASE_FOLD.put(0xA7C2, new int[]{0xA7C3}); - CASE_FOLD.put(0xA7C4, new int[]{0xA794}); - CASE_FOLD.put(0xA7C5, new int[]{0x0282}); - CASE_FOLD.put(0xA7C6, new int[]{0x1D8E}); - CASE_FOLD.put(0xA7C7, new int[]{0xA7C8}); - CASE_FOLD.put(0xA7C9, new int[]{0xA7CA}); - CASE_FOLD.put(0xA7D0, new int[]{0xA7D1}); - CASE_FOLD.put(0xA7D6, new int[]{0xA7D7}); - CASE_FOLD.put(0xA7D8, new int[]{0xA7D9}); - CASE_FOLD.put(0xA7F5, new int[]{0xA7F6}); - CASE_FOLD.put(0xAB70, new int[]{0x13A0}); - CASE_FOLD.put(0xAB71, new int[]{0x13A1}); - CASE_FOLD.put(0xAB72, new int[]{0x13A2}); - CASE_FOLD.put(0xAB73, new int[]{0x13A3}); - CASE_FOLD.put(0xAB74, new int[]{0x13A4}); - CASE_FOLD.put(0xAB75, new int[]{0x13A5}); - CASE_FOLD.put(0xAB76, new int[]{0x13A6}); - CASE_FOLD.put(0xAB77, new int[]{0x13A7}); - CASE_FOLD.put(0xAB78, new int[]{0x13A8}); - CASE_FOLD.put(0xAB79, new int[]{0x13A9}); - CASE_FOLD.put(0xAB7A, new int[]{0x13AA}); - CASE_FOLD.put(0xAB7B, new int[]{0x13AB}); - CASE_FOLD.put(0xAB7C, new int[]{0x13AC}); - CASE_FOLD.put(0xAB7D, new int[]{0x13AD}); - CASE_FOLD.put(0xAB7E, new int[]{0x13AE}); - CASE_FOLD.put(0xAB7F, new int[]{0x13AF}); - CASE_FOLD.put(0xAB80, new int[]{0x13B0}); - CASE_FOLD.put(0xAB81, new int[]{0x13B1}); - CASE_FOLD.put(0xAB82, new int[]{0x13B2}); - CASE_FOLD.put(0xAB83, new int[]{0x13B3}); - CASE_FOLD.put(0xAB84, new int[]{0x13B4}); - CASE_FOLD.put(0xAB85, new int[]{0x13B5}); - CASE_FOLD.put(0xAB86, new int[]{0x13B6}); - CASE_FOLD.put(0xAB87, new int[]{0x13B7}); - CASE_FOLD.put(0xAB88, new int[]{0x13B8}); - CASE_FOLD.put(0xAB89, new int[]{0x13B9}); - CASE_FOLD.put(0xAB8A, new int[]{0x13BA}); - CASE_FOLD.put(0xAB8B, new int[]{0x13BB}); - CASE_FOLD.put(0xAB8C, new int[]{0x13BC}); - CASE_FOLD.put(0xAB8D, new int[]{0x13BD}); - CASE_FOLD.put(0xAB8E, new int[]{0x13BE}); - CASE_FOLD.put(0xAB8F, new int[]{0x13BF}); - CASE_FOLD.put(0xAB90, new int[]{0x13C0}); - CASE_FOLD.put(0xAB91, new int[]{0x13C1}); - CASE_FOLD.put(0xAB92, new int[]{0x13C2}); - CASE_FOLD.put(0xAB93, new int[]{0x13C3}); - CASE_FOLD.put(0xAB94, new int[]{0x13C4}); - CASE_FOLD.put(0xAB95, new int[]{0x13C5}); - CASE_FOLD.put(0xAB96, new int[]{0x13C6}); - CASE_FOLD.put(0xAB97, new int[]{0x13C7}); - CASE_FOLD.put(0xAB98, new int[]{0x13C8}); - CASE_FOLD.put(0xAB99, new int[]{0x13C9}); - CASE_FOLD.put(0xAB9A, new int[]{0x13CA}); - CASE_FOLD.put(0xAB9B, new int[]{0x13CB}); - CASE_FOLD.put(0xAB9C, new int[]{0x13CC}); - CASE_FOLD.put(0xAB9D, new int[]{0x13CD}); - CASE_FOLD.put(0xAB9E, new int[]{0x13CE}); - CASE_FOLD.put(0xAB9F, new int[]{0x13CF}); - CASE_FOLD.put(0xABA0, new int[]{0x13D0}); - CASE_FOLD.put(0xABA1, new int[]{0x13D1}); - CASE_FOLD.put(0xABA2, new int[]{0x13D2}); - CASE_FOLD.put(0xABA3, new int[]{0x13D3}); - CASE_FOLD.put(0xABA4, new int[]{0x13D4}); - CASE_FOLD.put(0xABA5, new int[]{0x13D5}); - CASE_FOLD.put(0xABA6, new int[]{0x13D6}); - CASE_FOLD.put(0xABA7, new int[]{0x13D7}); - CASE_FOLD.put(0xABA8, new int[]{0x13D8}); - CASE_FOLD.put(0xABA9, new int[]{0x13D9}); - CASE_FOLD.put(0xABAA, new int[]{0x13DA}); - CASE_FOLD.put(0xABAB, new int[]{0x13DB}); - CASE_FOLD.put(0xABAC, new int[]{0x13DC}); - CASE_FOLD.put(0xABAD, new int[]{0x13DD}); - CASE_FOLD.put(0xABAE, new int[]{0x13DE}); - CASE_FOLD.put(0xABAF, new int[]{0x13DF}); - CASE_FOLD.put(0xABB0, new int[]{0x13E0}); - CASE_FOLD.put(0xABB1, new int[]{0x13E1}); - CASE_FOLD.put(0xABB2, new int[]{0x13E2}); - CASE_FOLD.put(0xABB3, new int[]{0x13E3}); - CASE_FOLD.put(0xABB4, new int[]{0x13E4}); - CASE_FOLD.put(0xABB5, new int[]{0x13E5}); - CASE_FOLD.put(0xABB6, new int[]{0x13E6}); - CASE_FOLD.put(0xABB7, new int[]{0x13E7}); - CASE_FOLD.put(0xABB8, new int[]{0x13E8}); - CASE_FOLD.put(0xABB9, new int[]{0x13E9}); - CASE_FOLD.put(0xABBA, new int[]{0x13EA}); - CASE_FOLD.put(0xABBB, new int[]{0x13EB}); - CASE_FOLD.put(0xABBC, new int[]{0x13EC}); - CASE_FOLD.put(0xABBD, new int[]{0x13ED}); - CASE_FOLD.put(0xABBE, new int[]{0x13EE}); - CASE_FOLD.put(0xABBF, new int[]{0x13EF}); - CASE_FOLD.put(0xFB00, new int[]{0x0066, 0x0066}); - CASE_FOLD.put(0xFB01, new int[]{0x0066, 0x0069}); - CASE_FOLD.put(0xFB02, new int[]{0x0066, 0x006C}); - CASE_FOLD.put(0xFB03, new int[]{0x0066, 0x0066, 0x0069}); - CASE_FOLD.put(0xFB04, new int[]{0x0066, 0x0066, 0x006C}); - CASE_FOLD.put(0xFB05, new int[]{0x0073, 0x0074}); - CASE_FOLD.put(0xFB06, new int[]{0x0073, 0x0074}); - CASE_FOLD.put(0xFB13, new int[]{0x0574, 0x0576}); - CASE_FOLD.put(0xFB14, new int[]{0x0574, 0x0565}); - CASE_FOLD.put(0xFB15, new int[]{0x0574, 0x056B}); - CASE_FOLD.put(0xFB16, new int[]{0x057E, 0x0576}); - CASE_FOLD.put(0xFB17, new int[]{0x0574, 0x056D}); - CASE_FOLD.put(0xFF21, new int[]{0xFF41}); - CASE_FOLD.put(0xFF22, new int[]{0xFF42}); - CASE_FOLD.put(0xFF23, new int[]{0xFF43}); - CASE_FOLD.put(0xFF24, new int[]{0xFF44}); - CASE_FOLD.put(0xFF25, new int[]{0xFF45}); - CASE_FOLD.put(0xFF26, new int[]{0xFF46}); - CASE_FOLD.put(0xFF27, new int[]{0xFF47}); - CASE_FOLD.put(0xFF28, new int[]{0xFF48}); - CASE_FOLD.put(0xFF29, new int[]{0xFF49}); - CASE_FOLD.put(0xFF2A, new int[]{0xFF4A}); - CASE_FOLD.put(0xFF2B, new int[]{0xFF4B}); - CASE_FOLD.put(0xFF2C, new int[]{0xFF4C}); - CASE_FOLD.put(0xFF2D, new int[]{0xFF4D}); - CASE_FOLD.put(0xFF2E, new int[]{0xFF4E}); - CASE_FOLD.put(0xFF2F, new int[]{0xFF4F}); - CASE_FOLD.put(0xFF30, new int[]{0xFF50}); - CASE_FOLD.put(0xFF31, new int[]{0xFF51}); - CASE_FOLD.put(0xFF32, new int[]{0xFF52}); - CASE_FOLD.put(0xFF33, new int[]{0xFF53}); - CASE_FOLD.put(0xFF34, new int[]{0xFF54}); - CASE_FOLD.put(0xFF35, new int[]{0xFF55}); - CASE_FOLD.put(0xFF36, new int[]{0xFF56}); - CASE_FOLD.put(0xFF37, new int[]{0xFF57}); - CASE_FOLD.put(0xFF38, new int[]{0xFF58}); - CASE_FOLD.put(0xFF39, new int[]{0xFF59}); - CASE_FOLD.put(0xFF3A, new int[]{0xFF5A}); - CASE_FOLD.put(0x10400, new int[]{0x10428}); - CASE_FOLD.put(0x10401, new int[]{0x10429}); - CASE_FOLD.put(0x10402, new int[]{0x1042A}); - CASE_FOLD.put(0x10403, new int[]{0x1042B}); - CASE_FOLD.put(0x10404, new int[]{0x1042C}); - CASE_FOLD.put(0x10405, new int[]{0x1042D}); - CASE_FOLD.put(0x10406, new int[]{0x1042E}); - CASE_FOLD.put(0x10407, new int[]{0x1042F}); - CASE_FOLD.put(0x10408, new int[]{0x10430}); - CASE_FOLD.put(0x10409, new int[]{0x10431}); - CASE_FOLD.put(0x1040A, new int[]{0x10432}); - CASE_FOLD.put(0x1040B, new int[]{0x10433}); - CASE_FOLD.put(0x1040C, new int[]{0x10434}); - CASE_FOLD.put(0x1040D, new int[]{0x10435}); - CASE_FOLD.put(0x1040E, new int[]{0x10436}); - CASE_FOLD.put(0x1040F, new int[]{0x10437}); - CASE_FOLD.put(0x10410, new int[]{0x10438}); - CASE_FOLD.put(0x10411, new int[]{0x10439}); - CASE_FOLD.put(0x10412, new int[]{0x1043A}); - CASE_FOLD.put(0x10413, new int[]{0x1043B}); - CASE_FOLD.put(0x10414, new int[]{0x1043C}); - CASE_FOLD.put(0x10415, new int[]{0x1043D}); - CASE_FOLD.put(0x10416, new int[]{0x1043E}); - CASE_FOLD.put(0x10417, new int[]{0x1043F}); - CASE_FOLD.put(0x10418, new int[]{0x10440}); - CASE_FOLD.put(0x10419, new int[]{0x10441}); - CASE_FOLD.put(0x1041A, new int[]{0x10442}); - CASE_FOLD.put(0x1041B, new int[]{0x10443}); - CASE_FOLD.put(0x1041C, new int[]{0x10444}); - CASE_FOLD.put(0x1041D, new int[]{0x10445}); - CASE_FOLD.put(0x1041E, new int[]{0x10446}); - CASE_FOLD.put(0x1041F, new int[]{0x10447}); - CASE_FOLD.put(0x10420, new int[]{0x10448}); - CASE_FOLD.put(0x10421, new int[]{0x10449}); - CASE_FOLD.put(0x10422, new int[]{0x1044A}); - CASE_FOLD.put(0x10423, new int[]{0x1044B}); - CASE_FOLD.put(0x10424, new int[]{0x1044C}); - CASE_FOLD.put(0x10425, new int[]{0x1044D}); - CASE_FOLD.put(0x10426, new int[]{0x1044E}); - CASE_FOLD.put(0x10427, new int[]{0x1044F}); - CASE_FOLD.put(0x104B0, new int[]{0x104D8}); - CASE_FOLD.put(0x104B1, new int[]{0x104D9}); - CASE_FOLD.put(0x104B2, new int[]{0x104DA}); - CASE_FOLD.put(0x104B3, new int[]{0x104DB}); - CASE_FOLD.put(0x104B4, new int[]{0x104DC}); - CASE_FOLD.put(0x104B5, new int[]{0x104DD}); - CASE_FOLD.put(0x104B6, new int[]{0x104DE}); - CASE_FOLD.put(0x104B7, new int[]{0x104DF}); - CASE_FOLD.put(0x104B8, new int[]{0x104E0}); - CASE_FOLD.put(0x104B9, new int[]{0x104E1}); - CASE_FOLD.put(0x104BA, new int[]{0x104E2}); - CASE_FOLD.put(0x104BB, new int[]{0x104E3}); - CASE_FOLD.put(0x104BC, new int[]{0x104E4}); - CASE_FOLD.put(0x104BD, new int[]{0x104E5}); - CASE_FOLD.put(0x104BE, new int[]{0x104E6}); - CASE_FOLD.put(0x104BF, new int[]{0x104E7}); - CASE_FOLD.put(0x104C0, new int[]{0x104E8}); - CASE_FOLD.put(0x104C1, new int[]{0x104E9}); - CASE_FOLD.put(0x104C2, new int[]{0x104EA}); - CASE_FOLD.put(0x104C3, new int[]{0x104EB}); - CASE_FOLD.put(0x104C4, new int[]{0x104EC}); - CASE_FOLD.put(0x104C5, new int[]{0x104ED}); - CASE_FOLD.put(0x104C6, new int[]{0x104EE}); - CASE_FOLD.put(0x104C7, new int[]{0x104EF}); - CASE_FOLD.put(0x104C8, new int[]{0x104F0}); - CASE_FOLD.put(0x104C9, new int[]{0x104F1}); - CASE_FOLD.put(0x104CA, new int[]{0x104F2}); - CASE_FOLD.put(0x104CB, new int[]{0x104F3}); - CASE_FOLD.put(0x104CC, new int[]{0x104F4}); - CASE_FOLD.put(0x104CD, new int[]{0x104F5}); - CASE_FOLD.put(0x104CE, new int[]{0x104F6}); - CASE_FOLD.put(0x104CF, new int[]{0x104F7}); - CASE_FOLD.put(0x104D0, new int[]{0x104F8}); - CASE_FOLD.put(0x104D1, new int[]{0x104F9}); - CASE_FOLD.put(0x104D2, new int[]{0x104FA}); - CASE_FOLD.put(0x104D3, new int[]{0x104FB}); - CASE_FOLD.put(0x10570, new int[]{0x10597}); - CASE_FOLD.put(0x10571, new int[]{0x10598}); - CASE_FOLD.put(0x10572, new int[]{0x10599}); - CASE_FOLD.put(0x10573, new int[]{0x1059A}); - CASE_FOLD.put(0x10574, new int[]{0x1059B}); - CASE_FOLD.put(0x10575, new int[]{0x1059C}); - CASE_FOLD.put(0x10576, new int[]{0x1059D}); - CASE_FOLD.put(0x10577, new int[]{0x1059E}); - CASE_FOLD.put(0x10578, new int[]{0x1059F}); - CASE_FOLD.put(0x10579, new int[]{0x105A0}); - CASE_FOLD.put(0x1057A, new int[]{0x105A1}); - CASE_FOLD.put(0x1057C, new int[]{0x105A3}); - CASE_FOLD.put(0x1057D, new int[]{0x105A4}); - CASE_FOLD.put(0x1057E, new int[]{0x105A5}); - CASE_FOLD.put(0x1057F, new int[]{0x105A6}); - CASE_FOLD.put(0x10580, new int[]{0x105A7}); - CASE_FOLD.put(0x10581, new int[]{0x105A8}); - CASE_FOLD.put(0x10582, new int[]{0x105A9}); - CASE_FOLD.put(0x10583, new int[]{0x105AA}); - CASE_FOLD.put(0x10584, new int[]{0x105AB}); - CASE_FOLD.put(0x10585, new int[]{0x105AC}); - CASE_FOLD.put(0x10586, new int[]{0x105AD}); - CASE_FOLD.put(0x10587, new int[]{0x105AE}); - CASE_FOLD.put(0x10588, new int[]{0x105AF}); - CASE_FOLD.put(0x10589, new int[]{0x105B0}); - CASE_FOLD.put(0x1058A, new int[]{0x105B1}); - CASE_FOLD.put(0x1058C, new int[]{0x105B3}); - CASE_FOLD.put(0x1058D, new int[]{0x105B4}); - CASE_FOLD.put(0x1058E, new int[]{0x105B5}); - CASE_FOLD.put(0x1058F, new int[]{0x105B6}); - CASE_FOLD.put(0x10590, new int[]{0x105B7}); - CASE_FOLD.put(0x10591, new int[]{0x105B8}); - CASE_FOLD.put(0x10592, new int[]{0x105B9}); - CASE_FOLD.put(0x10594, new int[]{0x105BB}); - CASE_FOLD.put(0x10595, new int[]{0x105BC}); - CASE_FOLD.put(0x10C80, new int[]{0x10CC0}); - CASE_FOLD.put(0x10C81, new int[]{0x10CC1}); - CASE_FOLD.put(0x10C82, new int[]{0x10CC2}); - CASE_FOLD.put(0x10C83, new int[]{0x10CC3}); - CASE_FOLD.put(0x10C84, new int[]{0x10CC4}); - CASE_FOLD.put(0x10C85, new int[]{0x10CC5}); - CASE_FOLD.put(0x10C86, new int[]{0x10CC6}); - CASE_FOLD.put(0x10C87, new int[]{0x10CC7}); - CASE_FOLD.put(0x10C88, new int[]{0x10CC8}); - CASE_FOLD.put(0x10C89, new int[]{0x10CC9}); - CASE_FOLD.put(0x10C8A, new int[]{0x10CCA}); - CASE_FOLD.put(0x10C8B, new int[]{0x10CCB}); - CASE_FOLD.put(0x10C8C, new int[]{0x10CCC}); - CASE_FOLD.put(0x10C8D, new int[]{0x10CCD}); - CASE_FOLD.put(0x10C8E, new int[]{0x10CCE}); - CASE_FOLD.put(0x10C8F, new int[]{0x10CCF}); - CASE_FOLD.put(0x10C90, new int[]{0x10CD0}); - CASE_FOLD.put(0x10C91, new int[]{0x10CD1}); - CASE_FOLD.put(0x10C92, new int[]{0x10CD2}); - CASE_FOLD.put(0x10C93, new int[]{0x10CD3}); - CASE_FOLD.put(0x10C94, new int[]{0x10CD4}); - CASE_FOLD.put(0x10C95, new int[]{0x10CD5}); - CASE_FOLD.put(0x10C96, new int[]{0x10CD6}); - CASE_FOLD.put(0x10C97, new int[]{0x10CD7}); - CASE_FOLD.put(0x10C98, new int[]{0x10CD8}); - CASE_FOLD.put(0x10C99, new int[]{0x10CD9}); - CASE_FOLD.put(0x10C9A, new int[]{0x10CDA}); - CASE_FOLD.put(0x10C9B, new int[]{0x10CDB}); - CASE_FOLD.put(0x10C9C, new int[]{0x10CDC}); - CASE_FOLD.put(0x10C9D, new int[]{0x10CDD}); - CASE_FOLD.put(0x10C9E, new int[]{0x10CDE}); - CASE_FOLD.put(0x10C9F, new int[]{0x10CDF}); - CASE_FOLD.put(0x10CA0, new int[]{0x10CE0}); - CASE_FOLD.put(0x10CA1, new int[]{0x10CE1}); - CASE_FOLD.put(0x10CA2, new int[]{0x10CE2}); - CASE_FOLD.put(0x10CA3, new int[]{0x10CE3}); - CASE_FOLD.put(0x10CA4, new int[]{0x10CE4}); - CASE_FOLD.put(0x10CA5, new int[]{0x10CE5}); - CASE_FOLD.put(0x10CA6, new int[]{0x10CE6}); - CASE_FOLD.put(0x10CA7, new int[]{0x10CE7}); - CASE_FOLD.put(0x10CA8, new int[]{0x10CE8}); - CASE_FOLD.put(0x10CA9, new int[]{0x10CE9}); - CASE_FOLD.put(0x10CAA, new int[]{0x10CEA}); - CASE_FOLD.put(0x10CAB, new int[]{0x10CEB}); - CASE_FOLD.put(0x10CAC, new int[]{0x10CEC}); - CASE_FOLD.put(0x10CAD, new int[]{0x10CED}); - CASE_FOLD.put(0x10CAE, new int[]{0x10CEE}); - CASE_FOLD.put(0x10CAF, new int[]{0x10CEF}); - CASE_FOLD.put(0x10CB0, new int[]{0x10CF0}); - CASE_FOLD.put(0x10CB1, new int[]{0x10CF1}); - CASE_FOLD.put(0x10CB2, new int[]{0x10CF2}); - CASE_FOLD.put(0x118A0, new int[]{0x118C0}); - CASE_FOLD.put(0x118A1, new int[]{0x118C1}); - CASE_FOLD.put(0x118A2, new int[]{0x118C2}); - CASE_FOLD.put(0x118A3, new int[]{0x118C3}); - CASE_FOLD.put(0x118A4, new int[]{0x118C4}); - CASE_FOLD.put(0x118A5, new int[]{0x118C5}); - CASE_FOLD.put(0x118A6, new int[]{0x118C6}); - CASE_FOLD.put(0x118A7, new int[]{0x118C7}); - CASE_FOLD.put(0x118A8, new int[]{0x118C8}); - CASE_FOLD.put(0x118A9, new int[]{0x118C9}); - CASE_FOLD.put(0x118AA, new int[]{0x118CA}); - CASE_FOLD.put(0x118AB, new int[]{0x118CB}); - CASE_FOLD.put(0x118AC, new int[]{0x118CC}); - CASE_FOLD.put(0x118AD, new int[]{0x118CD}); - CASE_FOLD.put(0x118AE, new int[]{0x118CE}); - CASE_FOLD.put(0x118AF, new int[]{0x118CF}); - CASE_FOLD.put(0x118B0, new int[]{0x118D0}); - CASE_FOLD.put(0x118B1, new int[]{0x118D1}); - CASE_FOLD.put(0x118B2, new int[]{0x118D2}); - CASE_FOLD.put(0x118B3, new int[]{0x118D3}); - CASE_FOLD.put(0x118B4, new int[]{0x118D4}); - CASE_FOLD.put(0x118B5, new int[]{0x118D5}); - CASE_FOLD.put(0x118B6, new int[]{0x118D6}); - CASE_FOLD.put(0x118B7, new int[]{0x118D7}); - CASE_FOLD.put(0x118B8, new int[]{0x118D8}); - CASE_FOLD.put(0x118B9, new int[]{0x118D9}); - CASE_FOLD.put(0x118BA, new int[]{0x118DA}); - CASE_FOLD.put(0x118BB, new int[]{0x118DB}); - CASE_FOLD.put(0x118BC, new int[]{0x118DC}); - CASE_FOLD.put(0x118BD, new int[]{0x118DD}); - CASE_FOLD.put(0x118BE, new int[]{0x118DE}); - CASE_FOLD.put(0x118BF, new int[]{0x118DF}); - CASE_FOLD.put(0x16E40, new int[]{0x16E60}); - CASE_FOLD.put(0x16E41, new int[]{0x16E61}); - CASE_FOLD.put(0x16E42, new int[]{0x16E62}); - CASE_FOLD.put(0x16E43, new int[]{0x16E63}); - CASE_FOLD.put(0x16E44, new int[]{0x16E64}); - CASE_FOLD.put(0x16E45, new int[]{0x16E65}); - CASE_FOLD.put(0x16E46, new int[]{0x16E66}); - CASE_FOLD.put(0x16E47, new int[]{0x16E67}); - CASE_FOLD.put(0x16E48, new int[]{0x16E68}); - CASE_FOLD.put(0x16E49, new int[]{0x16E69}); - CASE_FOLD.put(0x16E4A, new int[]{0x16E6A}); - CASE_FOLD.put(0x16E4B, new int[]{0x16E6B}); - CASE_FOLD.put(0x16E4C, new int[]{0x16E6C}); - CASE_FOLD.put(0x16E4D, new int[]{0x16E6D}); - CASE_FOLD.put(0x16E4E, new int[]{0x16E6E}); - CASE_FOLD.put(0x16E4F, new int[]{0x16E6F}); - CASE_FOLD.put(0x16E50, new int[]{0x16E70}); - CASE_FOLD.put(0x16E51, new int[]{0x16E71}); - CASE_FOLD.put(0x16E52, new int[]{0x16E72}); - CASE_FOLD.put(0x16E53, new int[]{0x16E73}); - CASE_FOLD.put(0x16E54, new int[]{0x16E74}); - CASE_FOLD.put(0x16E55, new int[]{0x16E75}); - CASE_FOLD.put(0x16E56, new int[]{0x16E76}); - CASE_FOLD.put(0x16E57, new int[]{0x16E77}); - CASE_FOLD.put(0x16E58, new int[]{0x16E78}); - CASE_FOLD.put(0x16E59, new int[]{0x16E79}); - CASE_FOLD.put(0x16E5A, new int[]{0x16E7A}); - CASE_FOLD.put(0x16E5B, new int[]{0x16E7B}); - CASE_FOLD.put(0x16E5C, new int[]{0x16E7C}); - CASE_FOLD.put(0x16E5D, new int[]{0x16E7D}); - CASE_FOLD.put(0x16E5E, new int[]{0x16E7E}); - CASE_FOLD.put(0x16E5F, new int[]{0x16E7F}); - CASE_FOLD.put(0x1E900, new int[]{0x1E922}); - CASE_FOLD.put(0x1E901, new int[]{0x1E923}); - CASE_FOLD.put(0x1E902, new int[]{0x1E924}); - CASE_FOLD.put(0x1E903, new int[]{0x1E925}); - CASE_FOLD.put(0x1E904, new int[]{0x1E926}); - CASE_FOLD.put(0x1E905, new int[]{0x1E927}); - CASE_FOLD.put(0x1E906, new int[]{0x1E928}); - CASE_FOLD.put(0x1E907, new int[]{0x1E929}); - CASE_FOLD.put(0x1E908, new int[]{0x1E92A}); - CASE_FOLD.put(0x1E909, new int[]{0x1E92B}); - CASE_FOLD.put(0x1E90A, new int[]{0x1E92C}); - CASE_FOLD.put(0x1E90B, new int[]{0x1E92D}); - CASE_FOLD.put(0x1E90C, new int[]{0x1E92E}); - CASE_FOLD.put(0x1E90D, new int[]{0x1E92F}); - CASE_FOLD.put(0x1E90E, new int[]{0x1E930}); - CASE_FOLD.put(0x1E90F, new int[]{0x1E931}); - CASE_FOLD.put(0x1E910, new int[]{0x1E932}); - CASE_FOLD.put(0x1E911, new int[]{0x1E933}); - CASE_FOLD.put(0x1E912, new int[]{0x1E934}); - CASE_FOLD.put(0x1E913, new int[]{0x1E935}); - CASE_FOLD.put(0x1E914, new int[]{0x1E936}); - CASE_FOLD.put(0x1E915, new int[]{0x1E937}); - CASE_FOLD.put(0x1E916, new int[]{0x1E938}); - CASE_FOLD.put(0x1E917, new int[]{0x1E939}); - CASE_FOLD.put(0x1E918, new int[]{0x1E93A}); - CASE_FOLD.put(0x1E919, new int[]{0x1E93B}); - CASE_FOLD.put(0x1E91A, new int[]{0x1E93C}); - CASE_FOLD.put(0x1E91B, new int[]{0x1E93D}); - CASE_FOLD.put(0x1E91C, new int[]{0x1E93E}); - CASE_FOLD.put(0x1E91D, new int[]{0x1E93F}); - CASE_FOLD.put(0x1E91E, new int[]{0x1E940}); - CASE_FOLD.put(0x1E91F, new int[]{0x1E941}); - CASE_FOLD.put(0x1E920, new int[]{0x1E942}); - CASE_FOLD.put(0x1E921, new int[]{0x1E943}); - } -} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java index 229263bcbad7..f42add383e20 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java @@ -40,20 +40,21 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; +import java.util.function.BiPredicate; + import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; import com.oracle.truffle.regex.tregex.nfa.QuantifierGuard; import com.oracle.truffle.regex.tregex.nodes.nfa.TRegexBacktrackingNFAExecutorNode; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.parser.JSRegexParser; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; import com.oracle.truffle.regex.tregex.parser.ast.visitors.NFATraversalRegexASTVisitor; -import java.util.Arrays; -import java.util.function.BiPredicate; - /** * An implementation of the Ruby regex flavor. * @@ -244,16 +245,6 @@ public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) } private static boolean equalsIgnoreCase(int codePointA, int codePointB) { - int[] foldedA = RubyCaseFolding.caseFold(codePointA); - int[] foldedB = RubyCaseFolding.caseFold(codePointB); - if (foldedA == null && foldedB == null) { - return codePointA == codePointB; - } else if (foldedA == null) { - return foldedB.length == 1 && codePointA == foldedB[0]; - } else if (foldedB == null) { - return foldedA.length == 1 && foldedA[0] == codePointB; - } else { - return Arrays.equals(foldedA, foldedB); - } + return MultiCharacterCaseFolding.equalsIgnoreCase(CaseFoldData.CaseFoldAlgorithm.Ruby, codePointA, codePointB); } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java index 9465267b7782..6a5582aeebfe 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java @@ -40,6 +40,8 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; +import static com.oracle.truffle.regex.tregex.parser.RegexLexer.isAscii; + import java.math.BigInteger; import java.util.ArrayDeque; import java.util.ArrayList; @@ -50,7 +52,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.function.BiConsumer; import java.util.function.Predicate; import org.graalvm.collections.Pair; @@ -64,12 +65,14 @@ import com.oracle.truffle.regex.UnsupportedRegexException; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; -import com.oracle.truffle.regex.charset.Range; import com.oracle.truffle.regex.charset.UnicodeProperties; import com.oracle.truffle.regex.errors.RbErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; import com.oracle.truffle.regex.tregex.buffer.IntArrayBuffer; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder; +import com.oracle.truffle.regex.tregex.parser.RegexLexer; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.Token; @@ -620,24 +623,6 @@ private RegexSyntaxException syntaxErrorAt(String message, int pos) { return RegexSyntaxException.createPattern(inSource, message, pos); } - // Character predicates - - private static boolean isOctDigit(int c) { - return c >= '0' && c <= '7'; - } - - private static boolean isDecDigit(int c) { - return c >= '0' && c <= '9'; - } - - private static boolean isHexDigit(int c) { - return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); - } - - static boolean isAscii(int c) { - return c < 128; - } - // First pass - identifying capture groups private void scanForCaptureGroups() { @@ -701,7 +686,7 @@ private void scanForCaptureGroups() { parseGroupReference('>', true, true, true, false); } else if (match("'")) { parseGroupReference('\'', true, true, true, false); - } else if (isDecDigit(curChar())) { + } else if (RegexLexer.isDecimalDigit(curChar())) { parseGroupReference(')', true, false, true, false); } } @@ -968,7 +953,7 @@ private void string(int firstCodepoint) { } if (getLocalFlags().isIgnoreCase()) { - RubyCaseFolding.caseFoldUnfoldString(codepointsBuffer.toArray(), inSource.getEncoding().getFullSet(), astBuilder); + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.Ruby, codepointsBuffer.toArray(), inSource.getEncoding().getFullSet(), astBuilder); } else { for (int i = 0; i < codepointsBuffer.length(); i++) { addChar(codepointsBuffer.get(i)); @@ -993,7 +978,7 @@ private void string(int firstCodepoint) { private void buildChar(int codepoint) { if (!silent) { if (getLocalFlags().isIgnoreCase()) { - RubyCaseFolding.caseFoldUnfoldString(new int[]{codepoint}, inSource.getEncoding().getFullSet(), astBuilder); + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.Ruby, new int[]{codepoint}, inSource.getEncoding().getFullSet(), astBuilder); } else { addChar(codepoint); } @@ -1038,10 +1023,10 @@ public boolean isQuantifierNext() { return false; } else { // lower bound - getMany(RubyRegexParser::isDecDigit); + getMany(RegexLexer::isDecimalDigit); // upper bound if (match(",")) { - getMany(RubyRegexParser::isDecDigit); + getMany(RegexLexer::isDecimalDigit); } if (!match("}")) { return false; @@ -1316,7 +1301,7 @@ private boolean backreference() { int restorePosition = position; if (curChar() >= '1' && curChar() <= '9') { // Joni only considers backreferences numbered <= 1000. - String number = getUpTo(4, RubyRegexParser::isDecDigit); + String number = getUpTo(4, RegexLexer::isDecimalDigit); int groupNumber = Integer.parseInt(number); if (groupNumber > 1000) { position = restorePosition; @@ -1354,7 +1339,7 @@ private boolean namedBackreference() { List groupNumbers = parseGroupReference('>', true, true, true, true); int nameEnd = position - 1; // named references cannot point forward, so filter out reference > groupIndex - buildNamedBackreference(groupNumbers.stream().filter(groupNumber -> groupNumber <= groupIndex).toArray(n -> new Integer[n]), inPattern.substring(nameStart, nameEnd)); + buildNamedBackreference(groupNumbers.stream().filter(groupNumber -> groupNumber <= groupIndex).toArray(Integer[]::new), inPattern.substring(nameStart, nameEnd)); return true; } else { return false; @@ -1365,12 +1350,12 @@ private List parseGroupReference(char terminator, boolean allowNumeric, String groupName; List groupNumbers = null; int beginPos = position; - if (curChar() == '-' || isDecDigit(curChar())) { + if (curChar() == '-' || RegexLexer.isDecimalDigit(curChar())) { if (!allowNumeric) { throw syntaxErrorHere(RbErrorMessages.INVALID_GROUP_NAME); } int sign = match("-") ? -1 : 1; - groupName = getMany(RubyRegexParser::isDecDigit); + groupName = getMany(RegexLexer::isDecimalDigit); int groupNumber; try { groupNumber = sign * Integer.parseInt(groupName); @@ -1413,7 +1398,7 @@ private List parseGroupReference(char terminator, boolean allowNumeric, } if (allowLevels && (curChar() == '+' || curChar() == '-')) { advance(); // consume sign - String level = getMany(RubyRegexParser::isDecDigit); + String level = getMany(RegexLexer::isDecimalDigit); if (level.isEmpty()) { throw syntaxErrorAt(RbErrorMessages.INVALID_GROUP_NAME, beginPos); } @@ -1560,7 +1545,7 @@ private boolean stringEscape() { if (match("u{")) { getMany(c -> ASCII_POSIX_CHAR_CLASSES.get("space").contains(c)); while (!match("}")) { - String code = getMany(RubyRegexParser::isHexDigit); + String code = getMany(RegexLexer::isHexDigit); try { int codePoint = Integer.parseInt(code, 16); if (codePoint > 0x10FFFF) { @@ -1570,7 +1555,7 @@ private boolean stringEscape() { } catch (NumberFormatException e) { throw syntaxErrorAt(RbErrorMessages.badEscape(code), beginPos); } - getMany(c -> WHITESPACE.get(c)); + getMany(WHITESPACE::get); } return true; } else { @@ -1666,7 +1651,7 @@ private Optional characterEscape() { switch (curChar()) { case 'x': { advance(); - String code = getUpTo(2, RubyRegexParser::isHexDigit); + String code = getUpTo(2, RegexLexer::isHexDigit); int byteValue = Integer.parseInt(code, 16); if (byteValue > 0x7F) { // This is a non-ASCII byte escape. The escaped character might be part of a @@ -1685,10 +1670,10 @@ private Optional characterEscape() { advance(); String code; if (match("{")) { - code = getMany(RubyRegexParser::isHexDigit); + code = getMany(RegexLexer::isHexDigit); mustMatch("}"); } else { - code = getUpTo(4, RubyRegexParser::isHexDigit); + code = getUpTo(4, RegexLexer::isHexDigit); if (code.length() < 4) { throw syntaxErrorAt(RbErrorMessages.incompleteEscape(code), beginPos); } @@ -1711,7 +1696,7 @@ private Optional characterEscape() { case '5': case '6': case '7': { - String code = getUpTo(3, RubyRegexParser::isOctDigit); + String code = getUpTo(3, c -> RegexLexer.isOctalDigit(c)); int codePoint = Integer.parseInt(code, 8); if (codePoint > 0xFF) { throw syntaxErrorAt(RbErrorMessages.TOO_BIG_NUMBER, beginPos); @@ -1741,7 +1726,7 @@ private void characterClass() { private void buildCharClass() { if (!silent) { if (getLocalFlags().isIgnoreCase()) { - List> multiCodePointExpansions = caseClosureMultiCodePoint(); + List> multiCodePointExpansions = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.Ruby, curCharClass); if (multiCodePointExpansions.size() > 0) { pushGroup(); addCharClass(curCharClass.toCodePointSet()); @@ -1750,7 +1735,7 @@ private void buildCharClass() { int from = pair.getLeft(); int[] to = pair.getRight(); boolean dropAsciiOnStart = !fullyFoldableCharacters.get().contains(from); - RubyCaseFolding.caseFoldUnfoldString(to, inSource.getEncoding().getFullSet(), dropAsciiOnStart, astBuilder); + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.Ruby, to, inSource.getEncoding().getFullSet(), dropAsciiOnStart, astBuilder); } popGroup(); } else { @@ -1976,87 +1961,14 @@ private PosixClassParseResult collectPosixCharClass() { } } - /** - * Calls the argument on any element of the character class which has a case-folding. - */ - private void caseFoldCharClass(BiConsumer caseFoldItem) { - if (curCharClass.get().size() < RubyCaseFoldingData.CASE_FOLD.size()) { - for (Range r : curCharClass) { - RubyCaseFoldingData.CASE_FOLD.subMap(r.lo, r.hi + 1).forEach((Integer from, int[] to) -> { - caseFoldItem.accept(from, to); - }); - } - } else { - RubyCaseFoldingData.CASE_FOLD.forEach((Integer from, int[] to) -> { - if (curCharClass.get().contains(from)) { - caseFoldItem.accept(from, to); - } - }); - } - } - private boolean acceptableCaseFold(int from, int to) { // Characters which are not "fully case-foldable" are only treated as equivalent if the // relation doesn't cross the ASCII boundary. return fullyFoldableCharacters.get().contains(from) || isAscii(from) == isAscii(to); } - /** - * This method modifies {@code curCharClass} to contains its closure on case mapping. - */ private void caseClosure() { - charClassTmp.clear(); - - caseFoldCharClass((from, to) -> { - if (to.length == 1) { - // Add the case-folded version to the character class... - if (acceptableCaseFold(from, to[0])) { - charClassTmp.addCodePoint(to[0]); - } - } - // ... and also any characters which case-fold to the same. - for (int unfolding : RubyCaseUnfoldingTrie.findSingleCharUnfoldings(to)) { - if (unfolding != from && acceptableCaseFold(from, unfolding)) { - charClassTmp.addCodePoint(unfolding); - } - } - }); - - // We also handle all the characters which might have no case-folding, i.e. they case-fold - // to themselves. - for (Range r : curCharClass) { - for (int codepoint = r.lo; codepoint <= r.hi; codepoint++) { - for (int unfolding : RubyCaseUnfoldingTrie.findSingleCharUnfoldings(codepoint)) { - if (acceptableCaseFold(codepoint, unfolding)) { - charClassTmp.addCodePoint(unfolding); - } - } - } - } - - // Only include characters that are admissible in the given encoding. - charClassTmp.intersectWith(inSource.getEncoding().getFullSet()); - - curCharClass.addSet(charClassTmp.get()); - } - - /** - * Finds any characters in {@link #curCharClass} that have multi-codepoint expansions. - * - * @return a list of pairs, with the first element being the expanded codepoint and the second - * element the expansion - */ - private List> caseClosureMultiCodePoint() { - List> multiCodePointExpansions = new ArrayList<>(); - - caseFoldCharClass((from, to) -> { - if (to.length > 1) { - assert !isAscii(from); - multiCodePointExpansions.add(Pair.create(from, to)); - } - }); - - return multiCodePointExpansions; + MultiCharacterCaseFolding.caseClosure(CaseFoldData.CaseFoldAlgorithm.Ruby, curCharClass, charClassTmp, this::acceptableCaseFold, inSource.getEncoding().getFullSet()); } /** @@ -2110,12 +2022,12 @@ private Quantifier parseQuantifier(int ch) { Optional lowerBound = Optional.empty(); Optional upperBound = Optional.empty(); boolean canBeNonGreedy = true; - String lower = getMany(RubyRegexParser::isDecDigit); + String lower = getMany(RegexLexer::isDecimalDigit); if (!lower.isEmpty()) { lowerBound = Optional.of(new BigInteger(lower)); } if (match(",")) { - String upper = getMany(RubyRegexParser::isDecDigit); + String upper = getMany(RegexLexer::isDecimalDigit); if (!upper.isEmpty()) { upperBound = Optional.of(new BigInteger(upper)); } @@ -2380,14 +2292,14 @@ private void conditionalBackReference() { List groupNumbers; boolean namedReference; if (match("<")) { - namedReference = curChar() != '-' && !isDecDigit(curChar()); + namedReference = curChar() != '-' && !RegexLexer.isDecimalDigit(curChar()); groupNumbers = parseGroupReference('>', true, true, true, true); mustMatch(")"); } else if (match("'")) { - namedReference = curChar() != '-' && !isDecDigit(curChar()); + namedReference = curChar() != '-' && !RegexLexer.isDecimalDigit(curChar()); groupNumbers = parseGroupReference('\'', true, true, true, true); mustMatch(")"); - } else if (isDecDigit(curChar())) { + } else if (RegexLexer.isDecimalDigit(curChar())) { namedReference = false; groupNumbers = parseGroupReference(')', true, false, true, true); } else { diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore b/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore new file mode 100644 index 000000000000..d81f12ed1b1c --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore @@ -0,0 +1,2 @@ +/target +/.idea diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock new file mode 100644 index 000000000000..8e5668087923 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock @@ -0,0 +1,1661 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "async-compression" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" +dependencies = [ + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "console" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.45.0", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "errno" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "backtrace", + "version_check", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "flate2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-core", + "futures-io", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gimli" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" + +[[package]] +name = "h2" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.4.9", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "icu_collator" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2223603c703f1f6395206b2c0196b4cacc70f3ac4560f92c9386c1a416f92ef6" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collator_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec0a6848b88d80435d8b5f960ff6310715ad7cfcf3e042b874532c3b6af11a8" + +[[package]] +name = "icu_collections" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3907b2246e8dd5a29ead8a965e7c0c8a90e9b928e614a4279257d45c5e553e91" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f284eb342dc49d3e9d9f3b188489d76b5d22dfb1d1a5e0d1941811253bac625c" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a741eba5431f75eb2f1f9022d3cffabcadda6771e54fb4e77c8ba8653e4da44" + +[[package]] +name = "icu_normalizer" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080fc33a720d50a7342b0c58df010fbcfb842d6f78ef81555f8b1ac6bba57d3c" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d22f74066c2e6442db2a9aa14950278e86719e811e304e48bae03094b369d" + +[[package]] +name = "icu_properties" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3477ae70f8ca8dc08ff7574b5398ed0a2f2e4e6b66bdff2558a92ed67e262be1" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59" + +[[package]] +name = "icu_provider" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68acdef80034b5e35d8524e9817479d389a4f9774f3f0cbe1bf3884d80fd5934" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2060258edfcfe32ca7058849bf0f146cb5c59aadbedf480333c0d0002f97bc99" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "indicatif" +version = "0.17.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "ipnet" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" + +[[package]] +name = "linux-raw-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" + +[[package]] +name = "litemap" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a1a2647d5b7134127971a6de0d533c49de2159167e7f259c427195f87168a1" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "object" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "openssl" +version = "0.10.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +dependencies = [ + "bitflags 2.4.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "oracle" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe80334af1fbaea016fbef0af77f5fa32452362e29a039389b8c93737585003" +dependencies = [ + "cc", + "lazy_static", + "oracle_procmacro", + "paste", +] + +[[package]] +name = "oracle_procmacro" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad247f3421d57de56a0d0408d3249d4b1048a522be2013656d92f022c3d8af27" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "portable-atomic" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b" + +[[package]] +name = "proc-macro2" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "reqwest" +version = "0.11.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +dependencies = [ + "async-compression", + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "system-configuration", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.38.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "schannel" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "serde_json" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" + +[[package]] +name = "socket2" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "socket2" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "unicode-xid", +] + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "tinystr" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d0e245e80bdc9b4e5356fc45a72184abbc3861992603f515270e9340f5a219" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2 0.5.4", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tregex-casefolding" +version = "0.1.0" +dependencies = [ + "csv", + "error-chain", + "icu_collator", + "icu_locid", + "indicatif", + "oracle", + "reqwest", +] + +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "url" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" + +[[package]] +name = "utf8_iter" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.38", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0af0c3d13faebf8dda0b5256fa7096a2d5ccb662f7b9f54a40fe201077ab1c2" + +[[package]] +name = "yoke" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e38c508604d6bbbd292dadb3c02559aa7fff6b654a078a36217cad871636e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5e19fb6ed40002bab5403ffa37e53e0e56f914a4450c8765f533018db1db35f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1194130c5b155bf8ae50ab16c86ab758cd695cf9ad176d2f870b744cbdbb572e" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acabf549809064225ff8878baedc4ce3732ac3b07e7c7ce6e5c2ccdbc485c324" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml new file mode 100644 index 000000000000..36709645e557 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "tregex-casefolding" +version = "0.1.0" +edition = "2021" + +[dependencies] +csv = "1.3.0" +error-chain = "0.12.4" +icu_collator = "1.3.2" +icu_locid = "1.3.2" +indicatif = "0.17.7" +oracle = "0.5.7" +reqwest = { version = "0.11.22", features = ["blocking", "gzip"] } diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs new file mode 100644 index 000000000000..87278dc1250e --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs @@ -0,0 +1,1987 @@ +/* + * Copyright (c) 2023, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +use core::cmp::Ordering; +use std::cmp::{max, min}; +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Display, Formatter}; +use std::fs; +use std::fs::File; +use std::io::Write; +use std::path::Path; +use std::process::Command; +use std::time::Instant; + +use csv::{Reader, StringRecord, Trim}; +use error_chain::error_chain; +use icu_collator::{CaseLevel, Collator, CollatorOptions, Strength}; +use icu_locid::Locale; +use indicatif::ProgressIterator; +use oracle::{Connection, Connector, Privilege}; +use oracle::sql_type::OracleType; +use reqwest::Url; + +use crate::OrderMapping::{IntegerOffset, LUT}; +use crate::UnicodeCaseFoldingVariant::{Full, Simple}; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(reqwest::Error); + CSV(csv::Error); + OracleDB(oracle::Error); + } +} + +const FILE_FORMAT_VERSION: u16 = 0; +const OUTPUT_FOLDER: &str = "./out"; +const PATH_GRAAL_REPO: &str = "../../../../../"; +const PATH_CASE_FOLD_DATA: &str = "regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java"; +const PATH_ORACLE_DB_CONSTANTS: &str = "regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java"; +const PATH_ORACLE_DB_TESTS: &str = "regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java"; +const GENERATED_CODE_MARKER_BEGIN: &str = " /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */"; +const GENERATED_CODE_MARKER_END: &str = " /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */"; + +#[derive(Debug, Clone)] +struct CollationElement { + string: String, +} + +#[derive(Debug, Clone)] +struct CollationElementIndex { + index_base: usize, + index_src: usize, + index_dst: usize, + element: CollationElement, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +enum EqMapping { + IntegerOffset(i32), + Set(usize), + AlternatingAL, + AlternatingUL, + Single(usize), +} + +impl EqMapping { + fn from_single_mapping(src: usize, dst: usize) -> EqMapping { + let offset = (dst as i32) - (src as i32); + if offset == 1 { + if src & 1 == 0 { EqMapping::AlternatingAL } else { EqMapping::AlternatingUL } + } else { + EqMapping::IntegerOffset(offset) + } + } +} + +#[derive(Debug)] +enum OrderMapping { + IntegerOffset(i32), + LUT(Vec), +} + +trait RangeMapping { + fn lo(&self) -> usize; + fn hi(&self) -> usize; + fn mapping(&self) -> &T; +} + +#[derive(Debug)] +struct OrderTableEntry { + lo: usize, + hi: usize, + mapping: OrderMapping, +} + +impl RangeMapping for OrderTableEntry { + fn lo(&self) -> usize { + self.lo + } + + fn hi(&self) -> usize { + self.hi + } + + fn mapping(&self) -> &OrderMapping { + &self.mapping + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +struct EqTableEntry { + lo: usize, + hi: usize, + mapping: EqMapping, +} + +impl EqTableEntry { + fn as_dummy(&self) -> EqTableEntry { + EqTableEntry { + lo: self.lo, + hi: self.hi, + mapping: EqMapping::IntegerOffset(0), + } + } + + fn with_hi(&self, hi: usize) -> EqTableEntry { + EqTableEntry { + lo: self.lo, + hi, + mapping: self.mapping.clone(), + } + } + + fn with_lo(&self, lo: usize) -> EqTableEntry { + EqTableEntry { + lo, + hi: self.hi, + mapping: self.mapping.clone(), + } + } + + fn with_mapping(&self, mapping: EqMapping) -> EqTableEntry { + EqTableEntry { + lo: self.lo, + hi: self.hi, + mapping, + } + } +} + +impl RangeMapping for EqTableEntry { + fn lo(&self) -> usize { + self.lo + } + + fn hi(&self) -> usize { + self.hi + } + + fn mapping(&self) -> &EqMapping { + &self.mapping + } +} + +trait RangeMappingTable> { + fn table(&self) -> &Vec; + + fn binary_search(&self, key: usize) -> Option<&M> { + let table = self.table(); + let mut lo: i32 = 0; + let mut hi: i32 = (table.len() as i32) - 1; + while lo <= hi { + let mid = (lo + hi) >> 1; + let mid_val = table[mid as usize].lo(); + if mid_val < key { + lo = mid + 1; + } else if mid_val > key { + hi = mid - 1; + } else { + assert!(table[mid as usize].lo() <= key && key <= table[mid as usize].hi(), "{:?}, key: {}", table[mid as usize], key); + return Some(&table[mid as usize]); + } + } + if lo > 0 && table[(lo - 1) as usize].lo() <= key && key <= table[(lo - 1) as usize].hi() { + return Some(&table[(lo - 1) as usize]); + } + return None; + } +} + +struct OrderTable { + table: Vec, +} + +impl RangeMappingTable for OrderTable { + fn table(&self) -> &Vec { + &self.table + } +} + +struct EqTable { + table: Vec, + sets: Vec>, +} + +impl RangeMappingTable for EqTable { + fn table(&self) -> &Vec { + &self.table + } +} + +impl OrderTable { + /// Creates a new compressed table from an exhaustive list of collation elements `full_map` mapping `index_src` to `index_dst`. + /// `full_map` must be sorted by `index_src`. + /// + fn create(full_map: &Vec, index_src: fn(&CollationElementIndex) -> usize, index_dst: fn(&CollationElementIndex) -> usize) -> OrderTable { + fn push_entry(full_map: &Vec, + index_src: fn(&CollationElementIndex) -> usize, + index_dst: fn(&CollationElementIndex) -> usize, + table: &mut Vec, last_range_end: usize, prev: usize, cur_index_src: usize) { + let last_index_src = index_src(&full_map[last_range_end]); + let prev_index_src = index_src(&full_map[prev]); + // if range size is 1, use a lookup table + if (cur_index_src - last_index_src) == 1 { + // if the last entry in the mapping is already a lookup table, append to it + if table.last_mut().map(|e| { + match &mut e.mapping { + LUT(lut) => { + assert_eq!(e.hi, prev_index_src - 1, "lookup table must be adjacent to current element"); + e.hi = prev_index_src; + lut.push(index_dst(&full_map[prev])); + false + } + _ => true + } + }).unwrap_or(true) { + // otherwise, create a new lookup table + table.push(OrderTableEntry { + lo: last_index_src, + hi: last_index_src, + mapping: LUT(vec![index_dst(&full_map[prev])]), + }); + } + } else { + // range size is greater than one, create an integer offset mapping + table.push(OrderTableEntry { + lo: last_index_src, + hi: prev_index_src, + mapping: IntegerOffset((index_dst(&full_map[last_range_end]) as i32) - (last_index_src as i32)), + }); + } + } + + let mut table: Vec = Vec::new(); + let mut last_range_end = 0; + // try to find consecutive ranges in the mapping that can be expressed with integer offsets, e.g. [1..4] -> [3..6] + for i in 1..full_map.len() { + let prev = i - 1; + if index_src(&full_map[prev]) != index_src(&full_map[i]) - 1 || index_dst(&full_map[prev]) != index_dst(&full_map[i]) - 1 { + push_entry(&full_map, index_src, index_dst, &mut table, last_range_end, prev, index_src(&full_map[i])); + last_range_end = i; + } + } + push_entry(&full_map, index_src, index_dst, &mut table, last_range_end, full_map.len() - 1, index_src(&full_map[full_map.len() - 1]) + 1); + OrderTable { table } + } + + /// Returns the `dst_index` for a given `src_index` + fn lookup(&self, key: usize) -> usize { + self.binary_search(key).map(|e| { + return match &e.mapping { + IntegerOffset(offset) => { + ((key as i32) + offset) as usize + } + LUT(lut) => { + lut[key - e.lo] + } + }; + }).unwrap_or(key) + } + + #[allow(dead_code)] + fn print_size(&self, name: &str) { + let size = self.table.iter().map(|e| { + match &e.mapping { + IntegerOffset(_) => { 12 } + LUT(lut) => { 8 + (lut.len() * 4) } + } + }).reduce(|a, b| a + b).unwrap_or(0); + println!("{:>25} size: {:>6} bytes", name, size); + } +} + +impl EqTable { + /// Creates a table mapping all equivalent collation elements in the given exhaustive list `full_map` to each other. + /// `full_map` must be sorted by the collator, so that equivalent elements are next to each other. + /// + fn create Ordering>(collator: F, full_map: &Vec) -> EqTable { + let mut eq_map_0: Vec = Vec::with_capacity(full_map.len()); + let mut eq_sets: Vec> = Vec::new(); + let mut buf: Vec = Vec::new(); + // first pass: find equivalent elements and create mappings + for i in 1..full_map.len() { + if collator(&full_map[i - 1].element.string, &full_map[i].element.string) == Ordering::Equal { + if buf.is_empty() { + buf.push(full_map[i - 1].index_base); + } + buf.push(full_map[i].index_base); + } else { + if !buf.is_empty() { + EqTable::eq_map_push_first_pass(&mut eq_map_0, &mut eq_sets, &buf); + buf.clear(); + } + } + } + if !buf.is_empty() { + EqTable::eq_map_push_first_pass(&mut eq_map_0, &mut eq_sets, &buf); + } + for eq_table in &mut eq_sets { + eq_table.sort(); + } + EqTable { table: EqTable::eq_map_merge_adjacent(&mut eq_map_0), sets: eq_sets } + } + + fn from_vec<'a>(mut equivalences: Vec>) -> EqTable { + for vec in equivalences.iter_mut() { + vec.sort(); + } + equivalences.sort(); + let mut eq_map_0: Vec = Vec::with_capacity(equivalences.len()); + let mut eq_sets: Vec> = Vec::new(); + // first pass: find equivalent elements and create mappings + for buf in equivalences { + if buf.len() > 1 { + EqTable::eq_map_push_first_pass(&mut eq_map_0, &mut eq_sets, &buf); + } + } + for eq_table in &mut eq_sets { + eq_table.sort(); + } + EqTable { table: EqTable::eq_map_merge_adjacent(&mut eq_map_0), sets: eq_sets } + } + + fn eq_map_push_first_pass(eq_map_0: &mut Vec, eq_tables: &mut Vec>, buf: &Vec) { + if buf.len() == 2 { + let offset = (buf[0] as i32) - (buf[1] as i32); + if offset.abs() == 1 { + // elements indices are adjacent, we can map them with AlternatingAL/UL + let min = min(buf[0], buf[1]); + let max = max(buf[0], buf[1]); + eq_map_0.push(EqTableEntry { + lo: min, + hi: max, + mapping: if min & 1 == 0 { EqMapping::AlternatingAL } else { EqMapping::AlternatingUL }, + }); + } else { + // indices are not adjacent, map both with integer offset + eq_map_0.push(EqTableEntry { + lo: buf[0], + hi: buf[0], + mapping: EqMapping::IntegerOffset((buf[1] as i32) - (buf[0] as i32)), + }); + eq_map_0.push(EqTableEntry { + lo: buf[1], + hi: buf[1], + mapping: EqMapping::IntegerOffset(offset), + }); + } + } else { + // more than two equivalent elements, we need a set + for i in buf { + eq_map_0.push(EqTableEntry { + lo: *i, + hi: *i, + mapping: EqMapping::Set(eq_tables.len()), + }); + } + eq_tables.push(buf.to_vec()); + } + } + + fn eq_map_merge_adjacent(eq_map_0: &mut Vec) -> Vec { + // merge adjacent mappings into range-based entries, e.g. `1 -> offset(10), 2 -> offset(10) becomes [1-2] -> offset(10) + eq_map_0.sort_by_key(|x| x.lo); + let mut eq_map: Vec = Vec::new(); + eq_map.push(eq_map_0[0].clone()); + for e in &eq_map_0[1..] { + let last = eq_map.last_mut().unwrap(); + if last.hi == e.lo - 1 && last.mapping == e.mapping { + last.hi = e.hi; + } else { + eq_map.push(e.clone()); + } + } + eq_map + } + + fn create_one_way_mapping(mappings: Vec<(usize, usize)>) -> EqTable { + fn can_use_single_mapping(last: &EqTableEntry, dst: usize) -> bool { + match last.mapping { + EqMapping::IntegerOffset(offset) => { + last.lo == last.hi && last.lo as i32 + offset == dst as i32 + } + EqMapping::Single(last_dst) => { + last_dst == dst + } + _ => false + } + } + + assert!(mappings.len() > 0); + let mut table: Vec = vec![]; + let (src_0, dst_0) = mappings[0]; + table.push(EqTableEntry { + lo: src_0, + hi: src_0, + mapping: EqMapping::from_single_mapping(src_0, dst_0), + }); + for (src, dst) in mappings[1..].iter().cloned() { + let last = table.last().unwrap(); + assert!(src > last.hi); + let mapping = EqMapping::from_single_mapping(src, dst); + if can_use_single_mapping(last, dst) { + table.last_mut().unwrap().mapping = EqMapping::Single(dst); + table.last_mut().unwrap().hi = src; + } else if mapping == last.mapping && src == last.hi + (match mapping { + EqMapping::IntegerOffset(_) => { 1 } + EqMapping::Set(_) => { 1 } + EqMapping::AlternatingAL => { 2 } + EqMapping::AlternatingUL => { 2 } + EqMapping::Single(_) => { 1 } + }) { + table.last_mut().unwrap().hi = src; + } else { + table.push(EqTableEntry { + lo: src, + hi: src, + mapping, + }); + } + } + return EqTable { table, sets: vec![] }; + } + + /// Creates a diff-based equivalence table from a given full table `child` and parent mapping `parent`, + /// such that mappings that are equal in both `parent` and `child` are removed from the new table. + /// + fn create_diff(parent: &EqTable, child: &EqTable) -> EqTable { + fn mapping_eq(parent: &EqTable, child: &EqTable, cur_parent: &EqTableEntry, cur_child: &EqTableEntry) -> bool { + match (&cur_parent.mapping, &cur_child.mapping) { + (EqMapping::Set(lut_parent), EqMapping::Set(lut_child)) => { + parent.sets[*lut_parent].eq(&child.sets[*lut_child]) + } + (a, b) => { a.eq(b) } + } + } + fn mapping_clone(child: &EqTable, cur_child: &EqTableEntry, eq_table_diff: &mut Vec, sets_diff: &mut Vec>, sets_map: &mut Vec>) { + if eq_table_diff.last().map(|last| last.hi == cur_child.hi).unwrap_or(false) { + return; + } + match cur_child.mapping { + EqMapping::Set(set_index) => { + match &sets_map[set_index] { + Some(mapped_index) => { + eq_table_diff.push(cur_child.with_mapping(EqMapping::Set(*mapped_index))); + } + None => { + eq_table_diff.push(cur_child.with_mapping(EqMapping::Set(sets_diff.len()))); + sets_map[set_index] = Some(sets_diff.len()); + sets_diff.push(child.sets[set_index].clone()); + } + } + } + _ => { + eq_table_diff.push(cur_child.clone()); + } + } + } + + let mut eq_table_diff: Vec = Vec::with_capacity(child.table.len()); + let mut lut_diff: Vec> = Vec::with_capacity(child.sets.len()); + let mut lut_map: Vec> = vec![None; child.sets.len()]; + let mut i_parent = parent.table.iter(); + let mut i_child = child.table.iter(); + let mut next_parent = i_parent.next(); + let mut next_child = i_child.next(); + let mut tmp; + loop { + match (next_parent, next_child) { + (Some(cur_parent), Some(cur_child)) => { + if cur_parent.hi < cur_child.lo { + // parent mapping not present in child - overwrite with dummy + eq_table_diff.push(cur_parent.as_dummy()); + next_parent = i_parent.next(); + } else if cur_child.hi < cur_parent.lo { + // child mapping not present in parent - keep + mapping_clone(child, cur_child, &mut eq_table_diff, &mut lut_diff, &mut lut_map); + next_child = i_child.next(); + } else { + // ranges intersect + if cur_parent.lo < cur_child.lo { + // parent mapping partially not present in child, overwrite non-intersecting lower range with dummy + assert!(cur_parent.hi >= cur_child.lo, "{:?}, {:?}", cur_parent, cur_child); + eq_table_diff.push(cur_parent.with_hi(cur_child.lo - 1).as_dummy()); + } + if cur_child.lo < cur_parent.lo || !mapping_eq(&parent, &child, &cur_parent, &cur_child) { + // child mapping partially not present in parent, or not equal, keep + mapping_clone(child, cur_child, &mut eq_table_diff, &mut lut_diff, &mut lut_map); + } + if cur_parent.hi > cur_child.hi { + // remove intersecting part of parent range + tmp = cur_parent.with_lo(cur_child.hi + 1); + next_parent = Some(&tmp); + next_child = i_child.next(); + } else if cur_child.hi > cur_parent.hi { + // remove intersecting part of child range + tmp = cur_child.with_lo(cur_parent.hi + 1); + next_child = Some(&tmp); + next_parent = i_parent.next(); + } else { + next_child = i_child.next(); + next_parent = i_parent.next(); + } + } + } + (Some(cur_parent), None) => { + // parent mapping not present in child - overwrite with dummy + eq_table_diff.push(cur_parent.as_dummy()); + next_parent = i_parent.next(); + } + (None, Some(cur_child)) => { + // child mapping not present in parent - keep + mapping_clone(child, cur_child, &mut eq_table_diff, &mut lut_diff, &mut lut_map); + next_child = i_child.next(); + } + (None, None) => { + break; + } + } + } + let diff = EqTable { table: eq_table_diff, sets: lut_diff }; + for e in &child.table { + for i in e.lo..e.hi { + let vec1: Vec = child.lookup(i).unwrap(); + let vec2: Vec = diff.lookup(i).unwrap_or_else(|| { parent.lookup(i).unwrap() }); + assert_eq!(HashSet::::from_iter(vec1), HashSet::::from_iter(vec2), ""); + } + } + diff + } + + fn lookup(&self, key: usize) -> Option> { + self.binary_search(key).map(|e| { + return match &e.mapping { + EqMapping::IntegerOffset(o) => { + vec![key, (o + (key as i32)) as usize] + } + EqMapping::Set(i) => { + self.sets[*i].clone() + } + EqMapping::AlternatingAL => { + vec![key, key ^ 1] + } + EqMapping::AlternatingUL => { + vec![key, ((key - 1) ^ 1) + 1] + } + EqMapping::Single(value) => { + vec![key, *value] + } + }; + }) + } + + #[allow(dead_code)] + fn print_size(&self) { + println!("{:>25} size: {:>6} bytes", "equivalence table", (self.table.len() * 3 + + self.sets.len() + + self.sets.iter().map(|x| x.len()).reduce(|a, b| a + b).unwrap_or(0)) * 4); + } + + fn dump_java(&self, out: &mut Vec, name: &str, parent: Option<&str>) -> Result<()> { + writeln!(out, "private static final CaseFoldEquivalenceTable {} = new CaseFoldEquivalenceTable({}, new CodePointSet[] {{", name, parent.unwrap_or("null"))?; + for set in &self.sets { + writeln!(out, "rangeSet({}),", list_to_ranges_str(set))?; + } + write!(out, "}},")?; + self.dump_java_table(out)?; + Ok(()) + } + + fn dump_java_one_way(&self, out: &mut Vec, name: &str, parent: Option<&str>) -> Result<()> { + write!(out, "private static final CaseFoldTable {} = new CaseFoldTable({}, ", name, parent.unwrap_or("null"))?; + assert!(self.sets.is_empty()); + self.dump_java_table(out)?; + Ok(()) + } + + fn dump_java_table(&self, out: &mut Vec) -> Result<()> { + writeln!(out, "new int[] {{")?; + for e in &self.table { + write!(out, "{:#08x}, {:#08x}, ", e.lo, e.hi)?; + match &e.mapping { + EqMapping::IntegerOffset(o) => { + writeln!(out, "INTEGER_OFFSET, {},", *o)?; + } + EqMapping::Set(i) => { + writeln!(out, "DIRECT_MAPPING, {},", *i)?; + } + EqMapping::AlternatingAL => { + writeln!(out, "ALTERNATING_AL, 0,")?; + } + EqMapping::AlternatingUL => { + writeln!(out, "ALTERNATING_UL, 0,")?; + } + EqMapping::Single(v) => { + writeln!(out, "DIRECT_SINGLE, {},", *v)?; + } + } + } + writeln!(out, "}});")?; + Ok(()) + } +} + +fn list_to_ranges(set: &Vec) -> Vec { + let mut ranges: Vec = vec![]; + if set.len() > 0 { + ranges.push(set[0]); + let mut last = set[0]; + for v in set[1..].iter().cloned() { + assert!(v >= last); + if v != last + 1 { + ranges.push(last); + ranges.push(v); + } + last = v; + } + ranges.push(last); + } + return ranges; +} + +fn list_to_ranges_str(set: &Vec) -> String { + list_to_ranges(set).iter().map(|v| format!("{:#08x}", v)).collect::>().join(", ") +} + +struct CollationMap<'a> { + full_map: Vec, + name: &'a str, + equality: EqTable, + equality_diff: Option, + order: OrderTable, + order_reverse: OrderTable, + parent: Option<&'a CollationMap<'a>>, +} + +impl CollationMap<'_> { + /// Sorts a given list of collation elements with a given collator and creates lookup tables that allow + /// - looking up the sorting index of a given element (table `order`) + /// - looking up the element corresponding to a given sorting index (table `order_reverse`) + /// - looking up the set of elements that are considered equivalent to a given element (table `equality`) + /// + fn create<'a, F: Fn(&str, &str) -> Ordering>(collator: F, name: &'a str, + base_map: &'a Vec, + collation_elements: &Vec, + parent: Option<&'a CollationMap<'a>>) -> CollationMap<'a> { + let mut full_map: Vec = base_map.to_vec(); + // sort by initial index first, to keep the order of equal elements stable + full_map.sort_by_key(|a| a.index_base); + full_map.sort_by(|a, b| collator(&a.element.string, &b.element.string)); + + let eq_table = EqTable::create(&collator, &full_map); + let eq_diff = parent.map(|p| { EqTable::create_diff(&p.equality, &eq_table) }); + + for i in 0..full_map.len() { + full_map[i].index_src = full_map[i].index_dst; + full_map[i].index_dst = i; + } + + let table_dst_src = OrderTable::create(&full_map, |e| e.index_dst, |e| e.index_src); + full_map.sort_by_key(|e| e.index_src); + + let table_src_dst = OrderTable::create(&full_map, |e| e.index_src, |e| e.index_dst); + + let map = CollationMap { name, full_map, equality: eq_table, equality_diff: eq_diff, order: table_src_dst, order_reverse: table_dst_src, parent }; + map.verify(&collator, collation_elements); + map + } + + fn equality_diff(&self) -> &EqTable { + match &self.equality_diff { + None => { &self.equality } + Some(diff) => { diff } + } + } + + fn verify Ordering>(&self, collator: F, collation_elements: &Vec) { + for e in &self.full_map { + assert_eq!(self.order.lookup(e.index_src), e.index_dst, "elem: {:?}, table entry: {:?}", e, self.order.binary_search(e.index_src)); + assert_eq!(self.order_reverse.lookup(e.index_dst), e.index_src, "elem: {:?}, table entry: {:?}", e, self.order_reverse.binary_search(e.index_dst)); + self.equality_diff().lookup(e.index_base).map(|x| { + for pair in x.windows(2) { + assert_eq!(collator(&collation_elements[pair[0]].string, &collation_elements[pair[1]].string), Ordering::Equal, "{:?} <=> {:?}", + collation_elements[pair[0]].string, collation_elements[pair[1]].string); + } + }); + } + } + + fn dump(&self, path: &Path) -> std::io::Result { + let mut file = File::create(path).expect("File open failed"); + file.write("TRGX".as_bytes())?; + file.write(&FILE_FORMAT_VERSION.to_le_bytes())?; + match self.parent { + Some(parent) => { + write_str(&mut file, parent.name)?; + } + None => { + write_usize(&mut file, 0)?; + } + } + write_usize(&mut file, self.equality_diff().sets.len())?; + for e in &self.equality_diff().sets { + write_usize(&mut file, e.len())?; + for v in e { + write_usize(&mut file, *v)?; + } + } + write_usize(&mut file, self.equality_diff().table.len())?; + for e in &self.equality_diff().table { + write_usize(&mut file, e.lo)?; + write_usize(&mut file, e.hi)?; + match &e.mapping { + EqMapping::IntegerOffset(o) => { + file.write(&[0])?; + write_i32(&mut file, *o)?; + } + EqMapping::Set(i) => { + file.write(&[1])?; + write_usize(&mut file, *i)?; + } + EqMapping::AlternatingAL => { + file.write(&[2])?; + } + EqMapping::AlternatingUL => { + file.write(&[3])?; + } + EqMapping::Single(v) => { + file.write(&[4])?; + write_usize(&mut file, *v)?; + } + } + } + Self::dump_order_table(&mut file, &self.order)?; + Self::dump_order_table(&mut file, &self.order_reverse) + } + + fn dump_order_table(mut file: &mut File, order_table: &OrderTable) -> std::io::Result { + write_usize(&mut file, order_table.table.len())?; + for e in &order_table.table { + write_usize(&mut file, e.lo)?; + write_usize(&mut file, e.hi)?; + match &e.mapping { + IntegerOffset(o) => { + file.write(&[0])?; + write_i32(&mut file, *o)?; + } + LUT(tbl) => { + file.write(&[1])?; + write_usize(&mut file, tbl.len())?; + for v in tbl { + write_usize(&mut file, *v)?; + } + } + } + } + Ok(0) + } + + #[allow(dead_code)] + fn print_size(&self) { + println!("collation \"{}\":", self.name); + self.equality_diff().print_size(); + self.order.print_size("order mapping"); + self.order_reverse.print_size("reverse order mapping"); + println!(); + } +} + +fn write_str(file: &mut File, string: &str) -> std::io::Result { + write_usize(file, string.len())?; + file.write(string.as_bytes()) +} + +fn write_i32(file: &mut File, i: i32) -> std::io::Result { + let bytes = &i.to_le_bytes(); + assert!(bytes[3] == 0 || bytes[3] == 0xff, "assumption broken: {:?} is larger than 0xff_ffff", i); + file.write(&bytes[0..3]) +} + +fn write_usize(file: &mut File, i: usize) -> std::io::Result { + assert!(i <= 0x7f_ffff, "assumption broken: {:?} is larger than 0x7f_ffff", i); + file.write(&i.to_le_bytes()[0..3]) +} + +enum CollatorSetting { + Default, + CI, + AI, + CIAI, +} + +impl Display for CollatorSetting { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + CollatorSetting::Default => { "DEFAULT" } + CollatorSetting::CI => { "CI" } + CollatorSetting::AI => { "AI" } + CollatorSetting::CIAI => { "CI_AI" } + }) + } +} + +fn get_collator_from_locale(locale: &Locale, collator_setting: &CollatorSetting) -> impl Fn(&str, &str) -> Ordering { + let mut options = CollatorOptions::new(); + match collator_setting { + CollatorSetting::Default => { + options.strength = Some(Strength::Secondary); + options.case_level = Some(CaseLevel::On); + } + CollatorSetting::CI => { + options.strength = Some(Strength::Secondary); + options.case_level = Some(CaseLevel::Off); + } + CollatorSetting::AI => { + options.strength = Some(Strength::Primary); + options.case_level = Some(CaseLevel::On); + } + CollatorSetting::CIAI => { + options.strength = Some(Strength::Primary); + options.case_level = Some(CaseLevel::Off); + } + } + let collator: Collator = Collator::try_new( + &locale.into(), + options, + ).unwrap(); + move |a: &str, b: &str| { + collator.compare(a, b) + } +} + +#[allow(dead_code)] +fn dump_collation<'a, F: Fn(&str, &str) -> Ordering>(collator: F, folder: &Path, name: &'a str, collation_elements: &Vec, parent_map: &'a CollationMap<'a>) -> CollationMap<'a> { + let map = CollationMap::create(collator, name, &parent_map.full_map, &collation_elements, Some(&parent_map)); + map.dump(folder.join(format!("{}.trtbl", name).as_str()).as_path()).expect("file dump failed"); + map +} + +#[allow(dead_code)] +fn dump_collations<'a>(base_map: &Vec, collation_elements: &Vec) -> std::io::Result<()> { + let time = Instant::now(); + + for collator_setting in [CollatorSetting::Default, CollatorSetting::CI, CollatorSetting::AI, CollatorSetting::CIAI] { + let folder = Path::new(OUTPUT_FOLDER).join(collator_setting.to_string()); + if !folder.exists() { + std::fs::create_dir(folder.as_path())?; + } + let map_ducet = CollationMap::create(get_collator_from_locale(&Locale::default(), &collator_setting), "ducet", &base_map, &collation_elements, None); + map_ducet.dump(folder.join("ducet.trtbl").as_path())?; + println!("A -> {:?}", map_ducet.equality_diff().lookup(0x41).map(|m| m.iter().map(|x| char::from_u32(*x as u32).unwrap()).collect::>()).unwrap_or(Vec::new())); + } + + println!("done, took {:?}", time.elapsed()); + Ok(()) +} + +fn main() -> Result<()> { + oracledb_start_docker_container(); + generate_case_fold_data()?; + oracledb_generate_posix_char_classes()?; + oracledb_generate_tests()?; + Ok(()) +} + +fn generate_case_fold_data() -> Result<()> { + let mut multi_character_strings: HashMap = HashMap::new(); + + let unicode_version = "15.0.0"; + let unicode_version_oracle_db = "12.1.0"; + let unicode_data_txt = fetch(format!("https://www.unicode.org/Public/{}/ucd/UnicodeData.txt", unicode_version))?; + let unicode_case_folding_txt = fetch(format!("https://www.unicode.org/Public/{}/ucd/CaseFolding.txt", unicode_version))?; + let unicode_case_folding_txt_oracle = fetch(format!("https://www.unicode.org/Public/{}/ucd/CaseFolding.txt", unicode_version_oracle_db))?; + let unicode_special_casing = fetch(format!("https://www.unicode.org/Public/{}/ucd/SpecialCasing.txt", unicode_version))?; + + let eq_unicode_simple = unicode_case_folding(&unicode_case_folding_txt, &mut multi_character_strings, Simple)?; + let eq_js_nu = js_non_unicode_case_folding(&unicode_data_txt, &unicode_special_casing, &mut multi_character_strings)?; + let eq_python = python_unicode_case_folding(&unicode_data_txt, &unicode_special_casing, &mut multi_character_strings)?; + let eq_ruby = unicode_case_folding_one_way(&unicode_case_folding_txt, &mut multi_character_strings, Full)?; + let eq_oracle = unicode_case_folding_one_way(&unicode_case_folding_txt_oracle, &mut multi_character_strings, Full)?; + let eq_oracle_ai = oracledb_extract_ai_case_fold_table(&mut multi_character_strings)?; + let foldable_chars: Vec = parse_case_folding_txt(&unicode_case_folding_txt, Simple)?.iter().map(|(src, _)| src.chars().next().unwrap() as usize).collect(); + + let mut out = vec![]; + writeln!(out)?; + writeln!(out)?; + + writeln!(out, "public static final String[] MULTI_CHAR_SEQUENCES = {{")?; + let mut strings_ordered = vec![""; multi_character_strings.len()]; + for (s, i) in multi_character_strings.iter() { + strings_ordered[*i - 0x11_0000] = s.as_str(); + } + for s in strings_ordered { + writeln!(out, "\"{}\",", java_string_escape(s))?; + } + writeln!(out, "}};")?; + let unicode_simple_name = format!("UNICODE_{}_SIMPLE", unicode_version.replace(".", "_")); + let unicode_full_name = format!("UNICODE_{}_FULL", unicode_version.replace(".", "_")); + eq_unicode_simple.dump_java(&mut out, unicode_simple_name.as_str(), None)?; + EqTable::create_diff(&eq_unicode_simple, &eq_js_nu).dump_java(&mut out, "JS_NON_UNICODE", Some(unicode_simple_name.as_str()))?; + EqTable::create_diff(&eq_unicode_simple, &eq_python).dump_java(&mut out, "PYTHON_UNICODE", Some(unicode_simple_name.as_str()))?; + eq_ruby.dump_java_one_way(&mut out, unicode_full_name.as_str(), None)?; + EqTable::create_diff(&eq_ruby, &eq_oracle).dump_java_one_way(&mut out, "ORACLE_DB", Some(unicode_full_name.as_str()))?; + eq_oracle_ai.dump_java_one_way(&mut out, "ORACLE_DB_AI", None)?; + writeln!(out, "public static final CodePointSet FOLDABLE_CHARACTERS = rangeSet({});", list_to_ranges_str(&foldable_chars))?; + + writeln!(out)?; + insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_CASE_FOLD_DATA).as_path(), &out)?; + Ok(()) +} + +fn java_string_escape(s: &str) -> String { + s.chars().map(|c| { + if c == '\\' { + return "\\\\".to_string(); + } + if ' ' <= c && c <= '~' { + return c.to_string(); + } + let mut buf = [0; 2]; + return c.encode_utf16(&mut buf).iter().map(|v| format!("\\u{:04x}", v)).collect::(); + }).collect::() +} + +fn insert_generated_code(path: &Path, code: &Vec) -> Result<()> { + let file_content = fs::read_to_string(path)?; + let pos_begin = file_content.find(GENERATED_CODE_MARKER_BEGIN).expect(format!("generated code begin marker not found in {}", path.to_str().unwrap()).as_str()); + let pos_end = file_content.find(GENERATED_CODE_MARKER_END).expect(format!("generated code end marker not found in {}", path.to_str().unwrap()).as_str()); + let mut f = File::create(path)?; + f.write(file_content[0..pos_begin + GENERATED_CODE_MARKER_BEGIN.len()].as_bytes())?; + f.write(code)?; + f.write(file_content[pos_end..file_content.len()].as_bytes())?; + Ok(()) +} + +fn fetch(url: String) -> Result { + println!("fetching {}", url); + let path = Path::new("tmp").join(Path::new(&Url::parse(url.as_str()).unwrap().path()[1..])); + fs::create_dir_all(path.parent().unwrap()).expect("mkdir failed"); + if path.exists() { + return Ok(fs::read_to_string(path)?); + } + let body = reqwest::blocking::get(url)?.text()?; + fs::write(path, &body).expect("write to download cache failed"); + Ok(body) +} + +fn unicode_table(file: &String) -> Result> { + Ok(csv::ReaderBuilder::new().has_headers(false).delimiter(b';').comment(Some(b'#')).trim(Trim::All).flexible(true).from_reader(file.as_bytes())) +} + +fn unicode_table_cell(record: &StringRecord, i: usize) -> String { + parse_hex_chars(record.get(i).unwrap()) +} + +fn parse_hex_chars(s: &str) -> String { + s.split(' ').map(|c| { + char::from_u32(u32::from_str_radix(c, 16).unwrap()).unwrap() + }).collect::() +} + +enum UnicodeCaseFoldingVariant { + Simple, + Full, +} + +impl UnicodeCaseFoldingVariant { + fn type_name(&self) -> &'static str { + match self { + Simple => { "S" } + Full => { "F" } + } + } +} + +fn parse_case_folding_txt(unicode_case_folding: &String, variant: UnicodeCaseFoldingVariant) -> Result> { + Ok(Vec::from_iter(unicode_table(unicode_case_folding)?.records().flat_map(|result| { + let record = result.ok()?; + let t = record.get(1).unwrap(); + if t == "C" || t == variant.type_name() { + return Some((unicode_table_cell(&record, 0), unicode_table_cell(&record, 2))); + } + None + }))) +} + +fn unicode_case_folding(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for (src, dst) in parse_case_folding_txt(unicode_case_folding, variant)? { + eq_builder.add_equivalence(src.as_str(), dst.as_str()); + } + Ok(eq_builder.create_eq_table()) +} + +fn unicode_case_folding_one_way(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + let mut mappings: Vec<(usize, usize)> = vec![]; + for (src, dst) in parse_case_folding_txt(unicode_case_folding, variant)? { + mappings.push((eq_builder.index(src.as_str()), eq_builder.index(dst.as_str()))); + } + Ok(EqTable::create_one_way_mapping(mappings)) +} + +fn js_non_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { + let mut upper_map: HashMap = HashMap::new(); + for result in unicode_table(unicode_data)?.records() { + let record = result?; + if record.get(12).unwrap() == "" { + // Drop entries without toUppercase mapping + continue; + } + upper_map.insert(unicode_table_cell(&record, 0), unicode_table_cell(&record, 12)); + } + for result in unicode_table(unicode_special_casing)?.records() { + let record = result?; + if record.len() > 5 { + // Drop entries with conditions + continue; + } + upper_map.insert(unicode_table_cell(&record, 0), unicode_table_cell(&record, 3)); + } + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for (chr, upper) in upper_map { + let c = chr.chars().next().unwrap(); + let u = upper.chars().next().unwrap(); + if upper.chars().count() > 1 || u >= '\u{10000}' { + // Only follow rules which give map to a single UTF-16 code unit + continue; + } + if c > '\u{7f}' && u <= '\u{7f}' { + // Do not allow non-ASCII characters to cross into ASCII. + continue; + } + if c == u { + // Drop trivial mappings + continue; + } + eq_builder.add_equivalence(chr.as_str(), upper.as_str()); + } + Ok(eq_builder.create_eq_table()) +} + +fn python_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { + fn read_data_file_mapping(unicode_data_file: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for result in unicode_table(unicode_data_file)?.records() { + let record = result?; + let dst = record.get(cell_dst).unwrap(); + if dst != "" { + eq_builder.add_equivalence(unicode_table_cell(&record, cell_src).as_str(), parse_hex_chars(dst).as_str()); + } + } + Ok(eq_builder.equivalences) + } + + fn read_special_casing_mapping(unicode_special_casing: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for result in unicode_table(unicode_special_casing)?.records() { + let record = result?; + if record.len() > 5 { + // Drop entries with conditions + continue; + } + let c = unicode_table_cell(&record, cell_src); + let dst = unicode_table_cell(&record, cell_dst); + if dst.chars().count() > 1 { + eq_builder.add_equivalence_src_only(c.as_str(), dst.as_str()); + } + } + Ok(eq_builder.equivalences) + } + + let eq_lower = read_data_file_mapping(unicode_data, multi_character_strings, 0, 12)?; + let eq_upper = read_data_file_mapping(unicode_data, multi_character_strings, 0, 13)?; + let eq_special_lower = read_special_casing_mapping(unicode_special_casing, multi_character_strings, 0, 1)?; + let eq_special_upper = read_special_casing_mapping(unicode_special_casing, multi_character_strings, 0, 3)?; + let merged = Vec::from_iter(merge_sets(merge_sets(eq_lower.values(), eq_special_lower.values()).iter(), merge_sets(eq_upper.values(), eq_special_upper.values()).iter()).iter().map(|set| Vec::from_iter(set.iter().cloned()))); + Ok(EqTable::from_vec(merged)) +} + +struct EquivalenceBuilder<'a> { + multi_character_strings: &'a mut HashMap, + equivalences: HashMap>, +} + +impl EquivalenceBuilder<'_> { + fn new(multi_character_strings: &mut HashMap) -> EquivalenceBuilder { + EquivalenceBuilder { multi_character_strings, equivalences: Default::default() } + } + + fn index(&mut self, s: &str) -> usize { + if s.chars().count() == 1 { + return s.chars().next().unwrap() as usize; + } + let next_id = self.multi_character_strings.len() + 0x11_0000; + return *self.multi_character_strings.entry(s.to_string()).or_insert(next_id); + } + + fn add_equivalence(&mut self, a: &str, b: &str) { + let i = self.index(a); + let j = self.index(b); + let buf = self.equivalences.entry(j).or_default(); + if buf.len() == 0 { + buf.push(j); + } + buf.push(i); + } + + fn add_equivalence_src_only(&mut self, a: &str, b: &str) { + let i = self.index(a); + let j = self.index(b); + self.equivalences.entry(j).or_default().push(i); + } + + fn create_eq_table(&mut self) -> EqTable { + EqTable::from_vec(Vec::from_iter(self.equivalences.values().cloned())) + } +} + +fn merge_sets<'a, I, Inner>(a: I, b: I) -> Vec> where I: Iterator, Inner: IntoIterator + Copy + Debug { + let sets: Vec> = Vec::from_iter(a.map(|x| HashSet::from_iter(x.into_iter().cloned()))); + let m: HashMap = HashMap::from_iter(sets.iter().enumerate().flat_map(|(i, set)| { + set.iter().cloned().map(move |v| (v, i)) + })); + let mut to_copy = vec![true; sets.len()]; + let mut ret: Vec> = b.map(|vec| { + HashSet::from_iter(vec.into_iter().flat_map(|v: &usize| m.get(v)).flat_map(|i| { + to_copy[*i] = false; + sets.get(*i) + }).flatten().cloned().chain(vec.into_iter().cloned())) + }).collect(); + for (i, copy) in to_copy.iter().enumerate() { + if *copy { + ret.push(sets.get(i).unwrap().clone()); + } + } + ret +} + +fn oracledb_start_docker_container() { + if String::from_utf8(Command::new("docker").args(["container", "ls", "--filter", "name=oracle-db", "--format", "{{json .Names}}"]).output().expect("docker ls failed").stdout).expect("could not decode output of 'docker ls'").trim() == "\"oracle-db\"" { + return; + } + if String::from_utf8(Command::new("docker").args(["container", "ls", "-a", "--filter", "name=oracle-db", "--format", "{{json .Names}}"]).output().expect("docker ls failed").stdout).expect("could not decode output of 'docker ls'").trim() != "\"oracle-db\"" { + Command::new("docker").args(["run", "-d", "--name", "oracle-db", "-p", "1521:1521", "-p", "5500:5500", "-e", "ORACLE_PWD=passwd", "container-registry.oracle.com/database/express:21.3.0-xe"]).output().expect("docker run failed"); + } + let docker_start = Command::new("docker").args(["start", "oracle-db"]).output().expect("docker start failed"); + if docker_start.status.code().unwrap() != 0 { + println!("{}", String::from_utf8(docker_start.stderr).unwrap()); + panic!("docker start failed"); + } + // wait for db startup + std::thread::sleep(std::time::Duration::from_secs(8)); +} + +fn oracledb_connect() -> std::result::Result { + Connector::new("sys", "passwd", "//localhost/XE").privilege(Privilege::Sysdba).connect().map_err(|error| { + match &error { + oracle::Error::OciError(db_error) => { + if db_error.code() == 12637 { + println!("Could not connect to docker container, you may have to add {{ \"userland-proxy\": false }} to /etc/docker/daemon.json"); + println!("see https://franckpachot.medium.com/19c-instant-client-and-docker-1566630ab20e"); + } + error + } + _ => error + } + }) +} + +fn oracledb_extract_ai_case_fold_table<'a>(multi_character_strings: &mut HashMap) -> Result { + let conn = oracledb_connect()?; + + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + let mut mappings: Vec<(usize, usize)> = vec![]; + + let query = "select nlssort(:c, 'nls_sort = binary_ai') from dual"; + println!("extracting accent insensitive mappings from OracleDB"); + let mut statement = conn.statement(query).build()?; + for s in (0u32..0xd800).chain(0xe000..0x110000).map(|i| String::from(char::from_u32(i).unwrap())).progress_count(0xd800 + (0x110000 - 0xe000)) { + assert_eq!(s.chars().count(), 1); + let base_chars_bytes = statement.query_row_as::>(&[&s]).unwrap(); + let base_chars_u16: Vec = base_chars_bytes.chunks_exact(2).into_iter().map(|a| u16::from_le_bytes([a[1], a[0]])).collect(); + let base_chars = String::from_utf16(base_chars_u16.as_slice()).unwrap(); + if base_chars != s { + mappings.push((eq_builder.index(s.as_str()), eq_builder.index(base_chars.as_str()))); + } + } + Ok(EqTable::create_one_way_mapping(mappings)) +} + +fn oracledb_create_chars_table(conn: &Connection) -> Result<()> { + match conn.query("select * from chars where v = 0", &[]) { + Ok(_) => { + Ok(()) + } + Err(oracle::Error::OciError(db_error)) if db_error.code() == 942 => { + // table does not exist + conn.execute("create table chars(v int, c varchar2(32))", &[])?; + let query = "insert into chars(v, c) values (:v, :c)"; + let mut statement = conn.batch(query, 0x1000).build()?; + for i in (0u32..0xd800).chain(0xe000..0x110000) { + statement.append_row(&[&i, &String::from(char::from_u32(i).unwrap())])?; + } + statement.execute()?; + conn.commit()?; + Ok(()) + } + Err(e) => Err(e.into()) + } +} + +fn oracledb_generate_posix_char_classes() -> Result<()> { + let conn = oracledb_connect()?; + oracledb_create_chars_table(&conn)?; + let query = "SELECT v from chars WHERE REGEXP_LIKE(c, :r, '') ORDER BY v"; + let mut statement = conn.statement(query).build()?; + let mut out = vec![]; + for name in ["alpha", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit"] { + let mut chars: Vec = vec![]; + for row_result in statement.query_as::(&[&format!("[[:{}:]]", name).as_str()])? { + chars.push(row_result?); + } + writeln!(out, "\n\nPOSIX_CHAR_CLASSES.put(\"{}\", CodePointSet.createNoDedup(", name)?; + writeln!(out, "{}));\n", list_to_ranges_str(&chars))?; + } + insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_ORACLE_DB_CONSTANTS).as_path(), &out)?; + Ok(()) +} + +fn oracledb_generate_tests() -> Result<()> { + fn count_groups(pattern: &str) -> i32 { + let mut par_open = 0; + let mut escaped = false; + let mut n = 1; + for c in pattern.chars() { + if !escaped { + if c == '(' { + par_open += 1; + } else if c == ')' { + if par_open > 0 { + par_open -= 1; + n += 1; + } + } + } + escaped = c == '\\'; + } + return min(n, 10); + } + + let conn = oracledb_connect()?; + let query = "SELECT REGEXP_INSTR(:input, :pattern, :fromIndex, :occurrence, :startOrEnd, :flags, :iGroup) from dual"; + let mut statement = conn.statement(query).build()?; + let mut out = vec![]; + writeln!(out)?; + for (pattern, flags, input) in [ + ("abracadabra$", "", "abracadabracadabra"), + ("a...b", "", "abababbb"), + ("XXXXXX", "", "..XXXXXX"), + ("\\)", "", "()"), + ("a]", "", "a]a"), + ("}", "", "}"), + ("\\}", "", "}"), + ("\\]", "", "]"), + ("]", "", "]"), + ("]", "", "]"), + ("{", "", "{"), + ("}", "", "}"), + ("^a", "", "ax"), + ("\\^a", "", "a^a"), + ("a\\^", "", "a^"), + ("a$", "", "aa"), + ("a\\$", "", "a$"), + ("a($)", "", "aa"), + ("a*(^a)", "", "aa"), + ("(..)*(...)*", "", "a"), + ("(..)*(...)*", "", "abcd"), + ("(ab|a)(bc|c)", "", "abc"), + ("(ab)c|abc", "", "abc"), + ("a{0}b", "", "ab"), + ("(a*)(b?)(b+)b{3}", "", "aaabbbbbbb"), + ("(a*)(b{0,1})(b{1,})b{3}", "", "aaabbbbbbb"), + ("a{9876543210}", "", "a"), + ("((a|a)|a)", "", "a"), + ("(a*)(a|aa)", "", "aaaa"), + ("a*(a.|aa)", "", "aaaa"), + ("a(b)|c(d)|a(e)f", "", "aef"), + ("(a|b)?.*", "", "b"), + ("(a|b)c|a(b|c)", "", "ac"), + ("(a|b)c|a(b|c)", "", "ab"), + ("(a|b)*c|(a|ab)*c", "", "abc"), + ("(a|b)*c|(a|ab)*c", "", "xc"), + ("(.a|.b).*|.*(.a|.b)", "", "xa"), + ("a?(ab|ba)ab", "", "abab"), + ("a?(ac{0}b|ba)ab", "", "abab"), + ("ab|abab", "", "abbabab"), + ("aba|bab|bba", "", "baaabbbaba"), + ("aba|bab", "", "baaabbbaba"), + ("(aa|aaa)*|(a|aaaaa)", "", "aa"), + ("(a.|.a.)*|(a|.a...)", "", "aa"), + ("ab|a", "", "xabc"), + ("ab|a", "", "xxabc"), + ("(Ab|cD)*", "", "aBcD"), + ("[^-]", "", "--a"), + ("[a-]*", "", "--a"), + ("[a-m-]*", "", "--amoma--"), + (":::1:::0:|:::1:1:0:", "", ":::0:::1:::1:::0:"), + (":::1:::0:|:::1:1:1:", "", ":::0:::1:::1:::0:"), + ("[[:upper:]]", "", "A"), + ("[[:lower:]]+", "", "`az{"), + ("[[:upper:]]+", "", "@AZ["), + ("[[-]]", "", "[[-]]"), + ("\\n", "", "\\n"), + ("\\n", "", "\\n"), + ("[^a]", "", "\\n"), + ("\\na", "", "\\na"), + ("(a)(b)(c)", "", "abc"), + ("xxx", "", "xxx"), + ("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 6,"), + ("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "2/7"), + ("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 1,Feb 6"), + ("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", "", "x"), + ("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", "", "xx"), + ("a?(ab|ba)*", "", "ababababababababababababababababababababababababababababababababababababababababa"), + ("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabbbbaa"), + ("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabaa"), + ("aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", "", "baaabbbabac"), + (".*", "", "\\x01\\xff"), + ("aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa"), + ("aaaa\\nbbbb\\ncccc\\nddddd\\neeeeee\\nfffffff\\ngggg\\nhhhh\\niiiii\\njjjjj\\nkkkkk\\nllll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa"), + ("a*a*a*a*a*b", "", "aaaaaaaaab"), + ("^", "", "a"), + ("$", "", "a"), + ("^$", "", "a"), + ("^a$", "", "a"), + ("abc", "", "abc"), + ("abc", "", "xabcy"), + ("abc", "", "ababc"), + ("ab*c", "", "abc"), + ("ab*bc", "", "abc"), + ("ab*bc", "", "abbc"), + ("ab*bc", "", "abbbbc"), + ("ab+bc", "", "abbc"), + ("ab+bc", "", "abbbbc"), + ("ab?bc", "", "abbc"), + ("ab?bc", "", "abc"), + ("ab?c", "", "abc"), + ("^abc$", "", "abc"), + ("^abc", "", "abcc"), + ("abc$", "", "aabc"), + ("^", "", "abc"), + ("$", "", "abc"), + ("a.c", "", "abc"), + ("a.c", "", "axc"), + ("a.*c", "", "axyzc"), + ("a[bc]d", "", "abd"), + ("a[b-d]e", "", "ace"), + ("a[b-d]", "", "aac"), + ("a[-b]", "", "a-"), + ("a[b-]", "", "a-"), + ("a]", "", "a]"), + ("a[]]b", "", "a]b"), + ("a[^bc]d", "", "aed"), + ("a[^-b]c", "", "adc"), + ("a[^]b]c", "", "adc"), + ("ab|cd", "", "abc"), + ("ab|cd", "", "abcd"), + ("a\\(b", "", "a(b"), + ("a\\(*b", "", "ab"), + ("a\\(*b", "", "a((b"), + ("((a))", "", "abc"), + ("(a)b(c)", "", "abc"), + ("a+b+c", "", "aabbabc"), + ("a*", "", "aaa"), + ("(a*)*", "", "-"), + ("(a*)+", "", "-"), + ("(a*|b)*", "", "-"), + ("(a+|b)*", "", "ab"), + ("(a+|b)+", "", "ab"), + ("(a+|b)?", "", "ab"), + ("[^ab]*", "", "cde"), + ("(^)*", "", "-"), + ("a*", "", "a"), + ("([abc])*d", "", "abbbcd"), + ("([abc])*bcd", "", "abcd"), + ("a|b|c|d|e", "", "e"), + ("(a|b|c|d|e)f", "", "ef"), + ("((a*|b))*", "", "-"), + ("abcd*efg", "", "abcdefg"), + ("ab*", "", "xabyabbbz"), + ("ab*", "", "xayabbbz"), + ("(ab|cd)e", "", "abcde"), + ("[abhgefdc]ij", "", "hij"), + ("(a|b)c*d", "", "abcd"), + ("(ab|ab*)bc", "", "abc"), + ("a([bc]*)c*", "", "abc"), + ("a([bc]*)(c*d)", "", "abcd"), + ("a([bc]+)(c*d)", "", "abcd"), + ("a([bc]*)(c+d)", "", "abcd"), + ("a[bcd]*dcdcde", "", "adcdcde"), + ("(ab|a)b*c", "", "abc"), + ("((a)(b)c)(d)", "", "abcd"), + ("[A-Za-z_][A-Za-z0-9_]*", "", "alpha"), + ("^a(bc+|b[eh])g|.h$", "", "abh"), + ("(bc+d$|ef*g.|h?i(j|k))", "", "effgz"), + ("(bc+d$|ef*g.|h?i(j|k))", "", "ij"), + ("(bc+d$|ef*g.|h?i(j|k))", "", "reffgz"), + ("(((((((((a)))))))))", "", "a"), + ("multiple words", "", "multiple words yeah"), + ("(.*)c(.*)", "", "abcde"), + ("abcd", "", "abcd"), + ("a(bc)d", "", "abcd"), + ("a[\u{0001}-\u{0003}]?c", "", "a\u{0002}c"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mo'ammar Gadhafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Kaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qadhafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gadafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moamar Gaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadhdhafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Khaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafy"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghadafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muamar Kaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Quathafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gheddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Khadafy"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Qudhafi"), + ("a+(b|c)*d+", "", "aabcdd"), + ("^.+$", "", "vivi"), + ("^(.+)$", "", "vivi"), + ("^([^!.]+).att.com!(.+)$", "", "gryphon.att.com!eby"), + ("^([^!]+!)?([^!]+)$", "", "bas"), + ("^([^!]+!)?([^!]+)$", "", "bar!bas"), + ("^([^!]+!)?([^!]+)$", "", "foo!bas"), + ("^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas"), + ("((foo)|(bar))!bas", "", "bar!bas"), + ("((foo)|(bar))!bas", "", "foo!bar!bas"), + ("((foo)|(bar))!bas", "", "foo!bas"), + ("((foo)|bar)!bas", "", "bar!bas"), + ("((foo)|bar)!bas", "", "foo!bar!bas"), + ("((foo)|bar)!bas", "", "foo!bas"), + ("(foo|(bar))!bas", "", "bar!bas"), + ("(foo|(bar))!bas", "", "foo!bar!bas"), + ("(foo|(bar))!bas", "", "foo!bas"), + ("(foo|bar)!bas", "", "bar!bas"), + ("(foo|bar)!bas", "", "foo!bar!bas"), + ("(foo|bar)!bas", "", "foo!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bar!bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bar!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bas"), + (".*(/XXX).*", "", "/XXX"), + (".*(\\\\XXX).*", "", "\\XXX"), + ("\\\\XXX", "", "\\XXX"), + (".*(/000).*", "", "/000"), + (".*(\\\\000).*", "", "\\000"), + ("\\\\000", "", "\\000"), + ("aa*", "", "xaxaax"), + ("(a*)(ab)*(b*)", "", "abc"), + ("(a*)(ab)*(b*)", "", "abc"), + ("((a*)(ab)*)((b*)(a*))", "", "aba"), + ("((a*)(ab)*)((b*)(a*))", "", "aba"), + ("(...?.?)*", "", "xxxxxx"), + ("(...?.?)*", "", "xxxxxx"), + ("(...?.?)*", "", "xxxxxx"), + ("(a|ab)(bc|c)", "", "abcabc"), + ("(a|ab)(bc|c)", "", "abcabc"), + ("(aba|a*b)(aba|a*b)", "", "ababa"), + ("(aba|a*b)(aba|a*b)", "", "ababa"), + ("a(b)*\\1", "", "a"), + ("a(b)*\\1", "", "a"), + ("a(b)*\\1", "", "abab"), + ("(a*){2}", "", "xxxxx"), + ("(a*){2}", "", "xxxxx"), + ("a(b)*\\1", "", "abab"), + ("a(b)*\\1", "", "abab"), + ("a(b)*\\1", "", "abab"), + ("(a*)*", "", "a"), + ("(a*)*", "", "ax"), + ("(a*)*", "", "a"), + ("(aba|a*b)*", "", "ababa"), + ("(aba|a*b)*", "", "ababa"), + ("(aba|a*b)*", "", "ababa"), + ("(a(b)?)+", "", "aba"), + ("(a(b)?)+", "", "aba"), + ("(a(b)*)*\\2", "", "abab"), + ("(a(b)*)*\\2", "", "abab"), + ("(a?)((ab)?)(b?)a?(ab)?b?", "", "abab"), + (".*(.*)", "", "ab"), + (".*(.*)", "", "ab"), + ("(a|ab)(c|bcd)", "", "abcd"), + ("(a|ab)(bcd|c)", "", "abcd"), + ("(ab|a)(c|bcd)", "", "abcd"), + ("(ab|a)(bcd|c)", "", "abcd"), + ("((a|ab)(c|bcd))(d*)", "", "abcd"), + ("((a|ab)(bcd|c))(d*)", "", "abcd"), + ("((ab|a)(c|bcd))(d*)", "", "abcd"), + ("((ab|a)(bcd|c))(d*)", "", "abcd"), + ("(a|ab)((c|bcd)(d*))", "", "abcd"), + ("(a|ab)((bcd|c)(d*))", "", "abcd"), + ("(ab|a)((c|bcd)(d*))", "", "abcd"), + ("(ab|a)((bcd|c)(d*))", "", "abcd"), + ("(a*)(b|abc)", "", "abc"), + ("(a*)(abc|b)", "", "abc"), + ("((a*)(b|abc))(c*)", "", "abc"), + ("((a*)(abc|b))(c*)", "", "abc"), + ("(a*)((b|abc)(c*))", "", "abc"), + ("(a*)((abc|b)(c*))", "", "abc"), + ("(a*)(b|abc)", "", "abc"), + ("(a*)(abc|b)", "", "abc"), + ("((a*)(b|abc))(c*)", "", "abc"), + ("((a*)(abc|b))(c*)", "", "abc"), + ("(a*)((b|abc)(c*))", "", "abc"), + ("(a*)((abc|b)(c*))", "", "abc"), + ("(a|ab)", "", "ab"), + ("(ab|a)", "", "ab"), + ("(a|ab)(b*)", "", "ab"), + ("(ab|a)(b*)", "", "ab"), + ("a+", "", "xaax"), + (".(a*).", "", "xaax"), + ("(a?)((ab)?)", "", "ab"), + ("(a?)((ab)?)(b?)", "", "ab"), + ("((a?)((ab)?))(b?)", "", "ab"), + ("(a?)(((ab)?)(b?))", "", "ab"), + ("(.?)", "", "x"), + ("(.?){1}", "", "x"), + ("(.?)(.?)", "", "x"), + ("(.?){2}", "", "x"), + ("(.?)*", "", "x"), + ("(.?.?)", "", "xxx"), + ("(.?.?){1}", "", "xxx"), + ("(.?.?)(.?.?)", "", "xxx"), + ("(.?.?){2}", "", "xxx"), + ("(.?.?)(.?.?)(.?.?)", "", "xxx"), + ("(.?.?){3}", "", "xxx"), + ("(.?.?)*", "", "xxx"), + ("a?((ab)?)(b?)", "", "ab"), + ("(a?)((ab)?)b?", "", "ab"), + ("a?((ab)?)b?", "", "ab"), + ("(a*){2}", "", "xxxxx"), + ("(ab?)(b?a)", "", "aba"), + ("(a|ab)(ba|a)", "", "aba"), + ("(a|ab|ba)", "", "aba"), + ("(a|ab|ba)(a|ab|ba)", "", "aba"), + ("(a|ab|ba)*", "", "aba"), + ("(aba|a*b)", "", "ababa"), + ("(aba|a*b)(aba|a*b)", "", "ababa"), + ("(aba|a*b)*", "", "ababa"), + ("(aba|ab|a)", "", "ababa"), + ("(aba|ab|a)(aba|ab|a)", "", "ababa"), + ("(aba|ab|a)*", "", "ababa"), + ("(a(b)?)", "", "aba"), + ("(a(b)?)(a(b)?)", "", "aba"), + ("(a(b)?)+", "", "aba"), + ("(.*)(.*)", "", "xx"), + (".*(.*)", "", "xx"), + ("(a.*z|b.*y)", "", "azbazby"), + ("(a.*z|b.*y)(a.*z|b.*y)", "", "azbazby"), + ("(a.*z|b.*y)*", "", "azbazby"), + ("(.|..)(.*)", "", "ab"), + ("((..)*(...)*)", "", "xxx"), + ("((..)*(...)*)((..)*(...)*)", "", "xxx"), + ("((..)*(...)*)*", "", "xxx"), + ("(a{0,1})*b\\1", "", "ab"), + ("(a*)*b\\1", "", "ab"), + ("(a*)b\\1*", "", "ab"), + ("(a*)*b\\1*", "", "ab"), + ("(a{0,1})*b(\\1)", "", "ab"), + ("(a*)*b(\\1)", "", "ab"), + ("(a*)b(\\1)*", "", "ab"), + ("(a*)*b(\\1)*", "", "ab"), + ("(a{0,1})*b\\1", "", "aba"), + ("(a*)*b\\1", "", "aba"), + ("(a*)b\\1*", "", "aba"), + ("(a*)*b\\1*", "", "aba"), + ("(a*)*b(\\1)*", "", "aba"), + ("(a{0,1})*b\\1", "", "abaa"), + ("(a*)*b\\1", "", "abaa"), + ("(a*)b\\1*", "", "abaa"), + ("(a*)*b\\1*", "", "abaa"), + ("(a*)*b(\\1)*", "", "abaa"), + // ("(a{0,1})*b\\1", "", "aab"), LXR bug + ("(a*)*b\\1", "", "aab"), + ("(a*)b\\1*", "", "aab"), + ("(a*)*b\\1*", "", "aab"), + ("(a*)*b(\\1)*", "", "aab"), + // ("(a{0,1})*b\\1", "", "aaba"), LXR bug + ("(a*)*b\\1", "", "aaba"), + ("(a*)b\\1*", "", "aaba"), + ("(a*)*b\\1*", "", "aaba"), + ("(a*)*b(\\1)*", "", "aaba"), + // ("(a{0,1})*b\\1", "", "aabaa"), LXR bug + ("(a*)*b\\1", "", "aabaa"), + ("(a*)b\\1*", "", "aabaa"), + ("(a*)*b\\1*", "", "aabaa"), + ("(a*)*b(\\1)*", "", "aabaa"), + ("(x)*a\\1", "", "a"), + ("(x)*a\\1*", "", "a"), + ("(x)*a(\\1)", "", "a"), + ("(x)*a(\\1)*", "", "a"), + ("(aa(b(b))?)+", "", "aabbaa"), + ("(a(b)?)+", "", "aba"), + ("([ab]+)([bc]+)([cd]*)", "", "abcd"), + ("([ab]*)([bc]*)([cd]*)\\1", "", "abcdaa"), + ("([ab]*)([bc]*)([cd]*)\\1", "", "abcdab"), + ("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdaa"), + ("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdab"), + ("^(A([^B]*))?(B(.*))?", "", "Aa"), + ("^(A([^B]*))?(B(.*))?", "", "Bb"), + (".*([AB]).*\\1", "", "ABA"), + ("[^A]*A", "", "\\nA"), + ("(a|ab)(c|bcd)(d*)", "", "abcd"), + ("(a|ab)(bcd|c)(d*)", "", "abcd"), + ("(ab|a)(c|bcd)(d*)", "", "abcd"), + ("(ab|a)(bcd|c)(d*)", "", "abcd"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a|ab)(c|bcd)(d|.*)", "", "abcd"), + ("(a|ab)(bcd|c)(d|.*)", "", "abcd"), + ("(ab|a)(c|bcd)(d|.*)", "", "abcd"), + ("(ab|a)(bcd|c)(d|.*)", "", "abcd"), + ("(a*)*", "", "a"), + ("(a*)*", "", "x"), + ("(a*)*", "", "aaaaaa"), + ("(a*)*", "", "aaaaaax"), + ("(a*)+", "", "a"), + ("(a*)+", "", "x"), + ("(a*)+", "", "aaaaaa"), + ("(a*)+", "", "aaaaaax"), + ("(a+)*", "", "a"), + ("(a+)*", "", "x"), + ("(a+)*", "", "aaaaaa"), + ("(a+)*", "", "aaaaaax"), + ("(a+)+", "", "a"), + ("(a+)+", "", "x"), + ("(a+)+", "", "aaaaaa"), + ("(a+)+", "", "aaaaaax"), + ("([a]*)*", "", "a"), + ("([a]*)*", "", "x"), + ("([a]*)*", "", "aaaaaa"), + ("([a]*)*", "", "aaaaaax"), + ("([a]*)+", "", "a"), + ("([a]*)+", "", "x"), + ("([a]*)+", "", "aaaaaa"), + ("([a]*)+", "", "aaaaaax"), + ("([^b]*)*", "", "a"), + ("([^b]*)*", "", "b"), + ("([^b]*)*", "", "aaaaaa"), + ("([^b]*)*", "", "aaaaaab"), + ("([ab]*)*", "", "a"), + ("([ab]*)*", "", "aaaaaa"), + ("([ab]*)*", "", "ababab"), + ("([ab]*)*", "", "bababa"), + ("([ab]*)*", "", "b"), + ("([ab]*)*", "", "bbbbbb"), + ("([ab]*)*", "", "aaaabcde"), + ("([^a]*)*", "", "b"), + ("([^a]*)*", "", "bbbbbb"), + ("([^a]*)*", "", "aaaaaa"), + ("([^ab]*)*", "", "ccccxx"), + ("([^ab]*)*", "", "ababab"), + ("((z)+|a)*", "", "zabcde"), + ("a+?", "", "aaaaaa"), + ("(a)", "", "aaa"), + ("(a*?)", "", "aaa"), + ("(a)*?", "", "aaa"), + ("(a*?)*?", "", "aaa"), + ("(a*)*(x)", "", "x"), + ("(a*)*(x)", "", "ax"), + ("(a*)*(x)", "", "axa"), + ("(a*)*(x)(\\1)", "", "x"), + ("(a*)*(x)(\\1)", "", "ax"), + ("(a*)*(x)(\\1)", "", "axa"), + ("(a*)*(x)(\\1)(x)", "", "axax"), + ("(a*)*(x)(\\1)(x)", "", "axxa"), + ("(a*)*(x)", "", "x"), + ("(a*)*(x)", "", "ax"), + ("(a*)*(x)", "", "axa"), + ("(a*)+(x)", "", "x"), + ("(a*)+(x)", "", "ax"), + ("(a*)+(x)", "", "axa"), + ("(a*){2}(x)", "", "x"), + ("(a*){2}(x)", "", "ax"), + ("(a*){2}(x)", "", "axa"), + ("((..)|(.))", "", "a"), + ("((..)|(.))((..)|(.))", "", "a"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "a"), + ("((..)|(.)){1}", "", "a"), + ("((..)|(.)){2}", "", "a"), + ("((..)|(.)){3}", "", "a"), + ("((..)|(.))*", "", "a"), + ("((..)|(.))", "", "aa"), + ("((..)|(.))((..)|(.))", "", "aa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aa"), + ("((..)|(.)){1}", "", "aa"), + ("((..)|(.)){2}", "", "aa"), + ("((..)|(.)){3}", "", "aa"), + ("((..)|(.))*", "", "aa"), + ("((..)|(.))", "", "aaa"), + ("((..)|(.))((..)|(.))", "", "aaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaa"), + ("((..)|(.)){1}", "", "aaa"), + ("((..)|(.)){2}", "", "aaa"), + ("((..)|(.)){3}", "", "aaa"), + ("((..)|(.))*", "", "aaa"), + ("((..)|(.))", "", "aaaa"), + ("((..)|(.))((..)|(.))", "", "aaaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaaa"), + ("((..)|(.)){1}", "", "aaaa"), + ("((..)|(.)){2}", "", "aaaa"), + ("((..)|(.)){3}", "", "aaaa"), + ("((..)|(.))*", "", "aaaa"), + ("((..)|(.))", "", "aaaaa"), + ("((..)|(.))((..)|(.))", "", "aaaaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaa"), + ("((..)|(.)){1}", "", "aaaaa"), + ("((..)|(.)){2}", "", "aaaaa"), + ("((..)|(.)){3}", "", "aaaaa"), + ("((..)|(.))*", "", "aaaaa"), + ("((..)|(.))", "", "aaaaaa"), + ("((..)|(.))((..)|(.))", "", "aaaaaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaaa"), + ("((..)|(.)){1}", "", "aaaaaa"), + ("((..)|(.)){2}", "", "aaaaaa"), + ("((..)|(.)){3}", "", "aaaaaa"), + ("((..)|(.))*", "", "aaaaaa"), + ("X(.?){0,}Y", "", "X1234567Y"), + ("X(.?){1,}Y", "", "X1234567Y"), + ("X(.?){2,}Y", "", "X1234567Y"), + ("X(.?){3,}Y", "", "X1234567Y"), + ("X(.?){4,}Y", "", "X1234567Y"), + ("X(.?){5,}Y", "", "X1234567Y"), + ("X(.?){6,}Y", "", "X1234567Y"), + ("X(.?){7,}Y", "", "X1234567Y"), + ("X(.?){8,}Y", "", "X1234567Y"), + ("X(.?){0,8}Y", "", "X1234567Y"), + ("X(.?){1,8}Y", "", "X1234567Y"), + ("X(.?){2,8}Y", "", "X1234567Y"), + ("X(.?){3,8}Y", "", "X1234567Y"), + ("X(.?){4,8}Y", "", "X1234567Y"), + ("X(.?){5,8}Y", "", "X1234567Y"), + ("X(.?){6,8}Y", "", "X1234567Y"), + ("X(.?){7,8}Y", "", "X1234567Y"), + ("X(.?){8,8}Y", "", "X1234567Y"), + ("(a|ab|c|bcd){0,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){1,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){2,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){3,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){4,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){0,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){1,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){2,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){3,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){4,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd)*(d*)", "", "ababcd"), + ("(a|ab|c|bcd)+(d*)", "", "ababcd"), + ("(ab|a|c|bcd){0,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){1,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){2,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){3,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){4,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){0,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){1,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){2,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){3,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){4,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd)*(d*)", "", "ababcd"), + ("(ab|a|c|bcd)+(d*)", "", "ababcd"), + ("(a|ab)(c|bcd)(d*)", "", "abcd"), + ("(a|ab)(bcd|c)(d*)", "", "abcd"), + ("(ab|a)(c|bcd)(d*)", "", "abcd"), + ("(ab|a)(bcd|c)(d*)", "", "abcd"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a|ab)(c|bcd)(d|.*)", "", "abcd"), + ("(a|ab)(bcd|c)(d|.*)", "", "abcd"), + ("(ab|a)(c|bcd)(d|.*)", "", "abcd"), + ("(ab|a)(bcd|c)(d|.*)", "", "abcd"), + ("(a|ab)(c|bcd)(d*)", "", "abcd"), + ("(a|ab)(bcd|c)(d*)", "", "abcd"), + ("(ab|a)(c|bcd)(d*)", "", "abcd"), + ("(ab|a)(bcd|c)(d*)", "", "abcd"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a|ab)(c|bcd)(d|.*)", "", "abcd"), + ("(a|ab)(bcd|c)(d|.*)", "", "abcd"), + ("(ab|a)(c|bcd)(d|.*)", "", "abcd"), + ("(ab|a)(bcd|c)(d|.*)", "", "abcd"), + ("\u{fb00}", "i", "FF"), + ("(\u{fb00})\\1", "i", "FFFF"), + ("(\u{fb00})\\1", "i", "FF\u{fb00}"), + ("(\u{fb00})\\1", "i", "\u{fb00}FF"), + ("\u{fb01}", "i", "FI"), + ("(\u{fb01})\\1", "i", "FIFI"), + ("\u{fb02}", "i", "FL"), + ("\u{fb03}", "i", "FFI"), + ("\u{fb04}", "i", "FFL"), + ("\u{fb00}I", "i", "\u{fb03}"), + ("\u{fb03}", "i", "\u{fb00}I"), + ("F\u{fb01}", "i", "\u{fb03}"), + ("\u{fb03}", "i", "F\u{fb01}"), + ("\u{fb00}L", "i", "\u{fb04}"), + ("\u{fb04}", "i", "\u{fb00}L"), + ("F\u{fb02}", "i", "\u{fb04}"), + ("\u{fb04}", "i", "F\u{fb02}"), + ("[\u{fb04}[=a=]o]+", "i", "F\u{fb02}aÄö"), + ("\u{1f50}", "i", "\u{03c5}\u{0313}"), + ("\u{1f52}", "i", "\u{03c5}\u{0313}\u{0300}"), + ("\u{1f54}", "i", "\u{03c5}\u{0313}\u{0301}"), + ("\u{1f56}", "i", "\u{03c5}\u{0313}\u{0342}"), + ("\u{1f50}\u{0300}", "i", "\u{1f52}"), + ("\u{1f52}", "i", "\u{1f50}\u{0300}"), + ("\u{1f50}\u{0301}", "i", "\u{1f54}"), + ("\u{1f54}", "i", "\u{1f50}\u{0301}"), + ("\u{1f50}\u{0342}", "i", "\u{1f56}"), + ("\u{1f56}", "i", "\u{1f50}\u{0342}"), + ("\u{1fb6}", "i", "\u{03b1}\u{0342}"), + ("\u{1fb7}", "i", "\u{03b1}\u{0342}\u{03b9}"), + ("\u{1fb6}\u{03b9}", "i", "\u{1fb7}"), + ("\u{1fb7}", "i", "\u{1fb6}\u{03b9}"), + ("\u{1fc6}", "i", "\u{03b7}\u{0342}"), + ("\u{1fc7}", "i", "\u{03b7}\u{0342}\u{03b9}"), + ("\u{1fc6}\u{03b9}", "i", "\u{1fc7}"), + ("\u{1fc7}", "i", "\u{1fc6}\u{03b9}"), + ("\u{1ff6}", "i", "\u{03c9}\u{0342}"), + ("\u{1ff7}", "i", "\u{03c9}\u{0342}\u{03b9}"), + ("\u{1ff6}\u{03b9}", "i", "\u{1ff7}"), + ("\u{1ff7}", "i", "\u{1ff6}\u{03b9}"), + ("f*", "i", "ff"), + ("f*", "i", "\u{fb00}"), + ("f+", "i", "ff"), + ("f+", "i", "\u{fb00}"), + ("f{1,}", "i", "ff"), + ("f{1,}", "i", "\u{fb00}"), + ("f{1,2}", "i", "ff"), + ("f{1,2}", "i", "\u{fb00}"), + ("f{,2}", "i", "ff"), + ("f{,2}", "i", "\u{fb00}"), + ("ff?", "i", "ff"), + ("ff?", "i", "\u{fb00}"), + ("f{2}", "i", "ff"), + ("f{2}", "i", "\u{fb00}"), + ("f{2,2}", "i", "ff"), + ("f{2,2}", "i", "\u{fb00}"), + ("K", "i", "\u{212a}"), + ("k", "i", "\u{212a}"), + ("\\w", "i", "\u{212a}"), + ("\\W", "i", "\u{212a}"), + ("[\\w]", "i", "\u{212a}"), + ("[\\w]+", "i", "a\\wWc"), + ("[\\W]+", "i", "a\\wWc"), + ("[\\d]+", "i", "0\\dD9"), + ("[\\D]+", "i", "a\\dDc"), + ("[\\s]+", "i", " \\sS\t"), + ("[\\S]+", "i", " \\sS\t"), + ("[kx]", "i", "\u{212a}"), + ("ff", "i", "\u{fb00}"), + ("[f]f", "i", "\u{fb00}"), + ("f[f]", "i", "\u{fb00}"), + ("[f][f]", "i", "\u{fb00}"), + ("(?:f)f", "i", "\u{fb00}"), + ("f(?:f)", "i", "\u{fb00}"), + ("(?:f)(?:f)", "i", "\u{fb00}"), + ("\\A[\u{fb00}]\\z", "i", "\u{fb00}"), + ("\\A[\u{fb00}]\\z", "i", "ff"), + ("\\A[^\u{fb00}]\\z", "i", "\u{fb00}"), + ("\\A[^\u{fb00}]\\z", "i", "ff"), + ("\\A[^[^\u{fb00}]]\\z", "i", "\u{fb00}"), + ("\\A[^[^\u{fb00}]]\\z", "i", "ff"), + ("\\A[[^[^\u{fb00}]]]\\z", "i", "\u{fb00}"), + ("\\A[[^[^\u{fb00}]]]\\z", "i", "ff"), + ("[^a-c]", "i", "A"), + ("[[^a-c]]", "i", "A"), + ("[^a]", "i", "a"), + ("[[^a]]", "i", "a"), + ("\\A\\W\\z", "i", "\u{fb00}"), + ("\\A\\W\\z", "i", "ff"), + ("\\A[\\p{L}]\\z", "i", "\u{fb00}"), + ("\\A[\\p{L}]\\z", "i", "ff"), + ("\\A\\W\\z", "i", "\u{fb03}"), + ("\\A\\W\\z", "i", "ffi"), + ("\\A\\W\\z", "i", "\u{fb00}i"), + ("\\A[\\p{L}]\\z", "i", "\u{fb03}"), + ("\\A[\\p{L}]\\z", "i", "ffi"), + ("\\A[\\p{L}]\\z", "i", "\u{fb00}i"), + ("([[=a=]])\\1", "i", "aA"), + ("([[=a=]])\\1", "i", "Aa"), + ("([[=a=]])\\1", "i", "a\u{00e4}"), + ("([[=a=]])\\1", "i", "a\u{00c4}"), + ("([[=a=]])\\1", "i", "\u{00e4}a"), + ("([[=a=]])\\1", "i", "\u{00c4}a"), + ("([[=a=]])\\1", "i", "\u{00c4}A"), + ] { + let from_index = 1; + let occurrence = 1; + let n_groups = count_groups(pattern); + let mut groups = vec![]; + for i_group in 0..n_groups { + for start_or_end in [0, 1] { + // explicit type for flags string: the client library will set the data type of strings to NVARCHAR2, but REGEXP_INSTR only accepts VARCHAR or CHAR on the flags parameter + let i = statement.query_row_as::(&[&input, &pattern, &from_index, &occurrence, &start_or_end, &(&flags, &OracleType::Char(10)), &i_group])?; + groups.push(i - 1); + } + } + let is_match = *groups.get(0).unwrap() >= 0; + write!(out, "test(\"{}\", \"{}\", \"{}\", {}, {}", java_string_escape(pattern), flags, java_string_escape(input), from_index - 1, is_match)?; + if is_match { + write!(out, ", {}", groups.iter().map(|v| format!("{}", v)).collect::>().join(", "))?; + } + writeln!(out, ");")?; + } + insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_ORACLE_DB_TESTS).as_path(), &out)?; + Ok(()) +} diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj b/regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj deleted file mode 100755 index 97f2d75dfaf1..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj +++ /dev/null @@ -1,406 +0,0 @@ -; ------------------------------------------------------------------------------ -; Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. -; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -; -; The Universal Permissive License (UPL), Version 1.0 -; -; Subject to the condition set forth below, permission is hereby granted to any -; person obtaining a copy of this software, associated documentation and/or -; data (collectively the "Software"), free of charge and under any and all -; copyright rights in the Software, and any and all patent rights owned or -; freely licensable by each licensor hereunder covering either (i) the -; unmodified Software as contributed to or provided by such licensor, or (ii) -; the Larger Works (as defined below), to deal in both -; -; (a) the Software, and -; -; (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -; one is included with the Software each a "Larger Work" to which the Software -; is contributed by such licensors), -; -; without restriction, including without limitation the rights to copy, create -; derivative works of, display, perform, and distribute the Software and make, -; use, sell, offer for sale, import, export, have made, and have sold the -; Software and the Larger Work(s), and to sublicense the foregoing rights on -; either these or other terms. -; -; This license is subject to the following condition: -; -; The above copyright notice and either this complete permission notice or at a -; minimum a reference to the UPL must be included in all copies or substantial -; portions of the Software. -; -; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -; SOFTWARE. -; ------------------------------------------------------------------------------ - -;; In order to run this script, install Boot as described in -;; https://github.com/boot-clj/boot#install or simply evaluate the code below in -;; any Clojure REPL and then call the `-main` function. - -;; This script assumes that the current working directory contains a folder "dat" -;; with the files NonUnicodeFoldTable.txt, UnicodeFoldTable.txt, -;; PythonFoldTable.txt and CaseFolding.txt. - -(ns generate-case-fold-table - (:require [clojure.set :as set] - [clojure.string :as str])) - -(defn pairwise - "Given a sequence `x_1`, `x_2`, `x_3`..., returns the sequence of pairs `[x_1 x_2]`, `[x_2 x_3]`..." - [xs] - (map vector xs (rest xs))) - -(defn parse-hex - [hex-string] - (Long/parseLong hex-string 16)) - -(defn parse-relation-file - "Parses a binary relation from the file at `path` and returns it as a sorted - set." - [path] - (into (sorted-set) - (apply concat - (for [line (str/split-lines (slurp path))] - (let [codepoints-str (str/split line #";") - codepoints (map parse-hex codepoints-str)] - (pairwise codepoints)))))) - -(defn parse-case-folding-file - "Parses Unicode's CaseFolding.txt from the file at `path` and returns it as a - sorted set." - [path] - (into (sorted-set) - (for [line (str/split-lines (slurp path)) - :when (not (or (str/blank? line) (str/starts-with? line "#"))) - :let [[code status mapping] (str/split line #"\s*;\s*")] - :when (#{"C" "S"} status)] - [(parse-hex code) (parse-hex mapping)]))) - -(defn maps-to - "Given a binary relation `rel`, represented as a sorted set, finds the set of - elements Y such that X `rel` Y." - [rel elem] - (map second (subseq rel > [elem 0] < [(inc elem) 0]))) - -(defn swap - "Swaps the elements in a pair." - [[a b]] - [b a]) - -(defn remove-reflexive-entries - "Remove pairs of the form [x x] from a given set of pairs." - [rel] - (set/select #(not= (first %) (second %)) rel)) - -(defn symmetric-closure - "Calculates the symmetric closure of the binary relation `rel`." - [rel] - (let [symmetric-rel (into (sorted-set) (map swap rel))] - (set/union rel symmetric-rel))) - -(defn load-relation - "Loads an equivalence relation from a file and makes it symmetric. - - We do not want a reflexive closure because we want the entries in the relation - to correspond to equivalent pairs that still need to be encoded in the case - fold table (and we do not want to include reflexive entries in the case fold - table). We do not need transitivity because we handle all equivalence classes - of size larger than 3 in the first step, `extract-large-classes`, and in that - step we traverse the relation graph recursively." - [path] - (-> path - parse-relation-file - remove-reflexive-entries - symmetric-closure)) - -(def python-ascii-relation - "The case-folding equivalence relation for Python ascii regular expressions." - (->> (map vector (range (int \a) (inc (int \z))) - (range (int \A) (inc (int \Z)))) - (into (sorted-set)) - symmetric-closure)) - -(defn collect-eq-classes - "Given some equivalence relation `rel`, finds the equivalence classes. - - NB: This function assumes that `rel` is only symmetric. Transitive pairs need - not be included since the graph is being searched." - [rel] - (let [find-class (fn [rel start-elem] - (let [visited? (atom (sorted-set))] - (letfn [(traverse [elem] - (when-not (@visited? elem) - (swap! visited? conj elem) - (doseq [eq-elem (maps-to rel elem)] - (traverse eq-elem))))] - (do (traverse start-elem) - @visited?))))] - (loop [rel rel - from [0 0] - classes []] - (if-let [next-pair (first (subseq rel > from))] - (let [class (find-class rel (first next-pair))] - (recur (set/select #(not-any? class %) rel) - next-pair - (conj classes class))) - classes)))) - -(defn encode-classes - "Given a list of equivalence classes, generates case fold table entries that - encode them. For classes of size 2, we encode them as two entries, - deltaPositive and deltaNegative (:kind :delta). For classes of larger size, we - use directMapping (:kind :class)." - [classes] - (let [;; `class-as-ranges` represents a `class` as a union of closed - ;; intervals (ranges). This representation is then used inside the - ;; CHARACTER_SET_TABLE generated by `show-classes`.` - class-as-ranges (fn [class] - (loop [class class - cur-range nil - ranges []] - (if-let [elem (first class)] - (if cur-range - (if (= (inc (:hi cur-range)) elem) - (recur (rest class) (update cur-range :hi inc) ranges) - (recur (rest class) {:lo elem, :hi elem} (conj ranges cur-range))) - (recur (rest class) {:lo elem, :hi elem} ranges)) - (if cur-range - (conj ranges cur-range) - ranges)))) - encode-class (fn [class] - (cond - (<= (count class) 1) - [] - (= (count class) 2) - (let [lower (first class) - higher (second class)] - [{:lo lower - :hi lower - :delta (- higher lower) - :kind :delta} - {:lo higher - :hi higher - :delta (- lower higher) - :kind :delta}]) - :otherwise - (let [class-ranges (class-as-ranges class)] - (for [range class-ranges] - {:lo (:lo range) - :hi (:hi range) - :class class-ranges - :kind :class}))))] - (mapcat encode-class classes))) - -(defn extract-large-classes - "This is the first step in encoding the equivalence relation `rel` into a list - of case fold table entries. This step finds any equivalence classes of size >= - 3 and encodes them using directMapping (:kind :class) because the other - heuristics only deal well with equivalence classes of size 2. - - NB: directMapping is a case fold table entry which assigns to a range of code - points a specific set of equivalent code points." - [rel] - (let [large-classes (filter #(>= (count %) 3) (collect-eq-classes rel)) - processed-elems (apply set/union large-classes) - entries (encode-classes large-classes)] - {:todo-rel (set/select #(not-any? processed-elems %) rel) - :entries entries})) - -(defn extract-runs - "This is a helper function for `extract-delta-runs` and - `extract-alternating-runs`." - [rel find-run encode-run allow-singletons] - (loop [todo-rel rel - from [0 0] - entries []] - (if-let [next-pair (first (subseq todo-rel > from))] - (let [run (find-run rel next-pair)] - (if (or allow-singletons (> (count run) 1)) - (recur (set/difference todo-rel run) - next-pair - (conj entries (encode-run run))) - (recur todo-rel next-pair entries))) - {:todo-rel todo-rel - :entries entries}))) - -(defn extract-delta-runs - "This is the second step in encoding the equivalence relation `rel` into a - list of code table entries. This step finds ranges of characters which are - case-equivalent, character by character, to other ranges of characters, e.g. - the ASCII ranges [a-z] and [A-Z]. These are then encoded via the entries - deltaPositive and deltaNegative (:kind :delta)." - [allow-singletons rel] - (letfn [(find-delta-run [rel start-pair] - (let [next-pair [(inc (first start-pair)) (inc (second start-pair))]] - (cons start-pair (when (rel next-pair) - (find-delta-run rel next-pair))))) - (encode-delta-run [run] - {:lo (first (first run)) - :hi (first (last run)) - :delta (- (second (first run)) (first (first run))) - :kind :delta})] - (extract-runs rel find-delta-run encode-delta-run allow-singletons))) - -(defn extract-alternating-runs - "This is the third step in encoding the equivalence relation `rel` into a list - of code table entries. This step finds ranges of characters in which - lower-case and upper-case variants are alternated, e.g., as in the Latin - Extended-A range from 0x0100 to 0x012f. These are encoded using the entries - alternatingAL and alternatingUL (:kind :alternating)." - [rel] - (letfn [(find-alternating-run [rel start-pair] - (when (= (inc (first start-pair)) (second start-pair)) - (let [next-pair [(+ 2 (first start-pair)) (+ 2 (second start-pair))]] - (cons start-pair (cons (swap start-pair) (when (rel next-pair) - (find-alternating-run rel next-pair))))))) - (encode-alternating-run [run] - {:lo (first (first run)) - :hi (first (last run)) - :aligned (even? (first (first run))) - :kind :alternating})] - (extract-runs rel find-alternating-run encode-alternating-run false))) - -(defn generate-entries-for-eq-relation - "Given an equivalence relation, calculates its encoding in terms of case fold - table entries." - [rel] - (let [{rel :todo-rel, large-class-entries :entries} (extract-large-classes rel) - {rel :todo-rel, delta-entries :entries} (extract-delta-runs false rel) - {rel :todo-rel, alternating-entries :entries} (extract-alternating-runs rel) - remaining-classes (collect-eq-classes rel) - remaining-class-entries (encode-classes remaining-classes)] - (sort-by (fn [e] [(:lo e) (:hi e)]) (concat large-class-entries delta-entries alternating-entries remaining-class-entries)))) - -(defn generate-entries-for-function - "Given a functional relation, calculates its encoding in terms of case fold - table entries." - [rel] - (let [{rel :todo-rel, delta-entries :entries} (extract-delta-runs true rel)] - (sort-by (fn [e] [(:lo e) (:hi e)]) delta-entries))) - -(defn identify-classes - "Replaces the references to equivalence classes (:class field) in - directMapping case fold table entries (:kind :class) with numeric - identifiers (:class-id field). The numeric identifiers are being allocated - starting from the value of `num-classes-ref` and the mapping from classes to - identifiers is being stored in `class-ids-ref`." - [entries num-classes-ref class-ids-ref] - (doall (for [entry entries] - (if (= :class (:kind entry)) - (let [class (:class entry)] - (if-let [class-id (@class-ids-ref class)] - (assoc entry :class-id class-id) - (let [class-id @num-classes-ref] - (do (swap! class-ids-ref assoc class class-id) - (swap! num-classes-ref inc) - (assoc entry :class-id class-id))))) - entry)))) - -(defn show-hex - "Prints a number in hexadecimal format. Hexadecimal is the conventional base - in which to write down values of Unicode code points. Also, it is the same - base as was used in the original case fold table, meaning we can keep the diff - after updating the table minimal." - [n] - (format "0x%04x" n)) - -(defn show-hex6 - "Prints a number in hexadecimal format. Hexadecimal is the conventional base - in which to write down values of Unicode code points. Also, it is the same - base as was used in the original case fold table, meaning we can keep the diff - after updating the table minimal." - [n] - (format "0x%06x" n)) - -(defn show-classes - "Renders the CHARACTER_SET_TABLE in Java code. The CHARACTER_SET_TABLE - contains the definitions of codepoint equivalence classes that are used in - directMapping (:kind :class) entries of the case fold table." - [classes] - (let [header " private static final CodePointSet[] CHARACTER_SET_TABLE = new CodePointSet[]{\n" - item-prefix " " - item-sep ",\n" - footer "};\n" - show-class (fn [class] - (let [range-sep ", " - show-range (fn [range] - (str (show-hex6 (:lo range)) ", " (show-hex6 (:hi range)) ))] - (str "rangeSet(" (apply str (interpose ", " (map show-range class))) ")"))) - body (apply str (interpose item-sep (map #(str item-prefix (show-class %)) classes)))] - (str header body footer))) - -(defn show-entries - "Renders a case fold table with name `table-name`. This is the main product of - this script." - [entries table-name] - (let [header (str " public static final CaseFoldTableImpl " table-name " = new CaseFoldTableImpl(new int[]{\n") - item-prefix " " - item-sep ",\n" - footer "\n });\n" - method-name-and-args (fn [entry] - (case (:kind entry) - :delta {:lo (:lo entry) - :hi (:hi entry) - :method-name "INTEGER_OFFSET" - :arg (:delta entry)} - :alternating {:lo (:lo entry) - :hi (:hi entry) - :method-name (if (:aligned entry) - "ALTERNATING_AL" - "ALTERNATING_UL") - :arg 0 } - :class {:lo (:lo entry) - :hi (:hi entry) - :method-name "DIRECT_MAPPING" - :arg (:class-id entry)})) - show-entry (fn [entry] - (let [{:keys [lo hi method-name arg]} (method-name-and-args entry) - arg-sep ", "] - (str (show-hex6 lo) ", " (show-hex6 hi) ", " method-name ", " arg))) - body (apply str (interpose item-sep (map #(str item-prefix (show-entry %)) entries)))] - (str header body footer))) - -(defn do-the-job - "The main function of the script. It loads the definitions of the - equivalence relations for the cases when the RegExp flag 'u' is not set (file - NonUnicodeFoldTable.txt) and when it is set (file UnicodeFoldTable.txt). It - then generates the case fold table entries to be used in the CaseFoldTable - Java class in TRegex. - - NB: The CHARACTER_SET_TABLE is shared among the two case fold tables because - there is significant overlap between the two." - [] - (let [non-unicode-relation (load-relation "dat/NonUnicodeFoldTable.txt") - unicode-relation (load-relation "dat/UnicodeFoldTable.txt") - python-unicode-relation (load-relation "dat/PythonFoldTable.txt") - simple-case-folding (parse-case-folding-file "dat/CaseFolding.txt") - num-classes (atom 0) - class-ids (atom {}) - non-unicode-entries (identify-classes (generate-entries-for-eq-relation non-unicode-relation) num-classes class-ids) - unicode-entries (identify-classes (generate-entries-for-eq-relation unicode-relation) num-classes class-ids) - python-ascii-entries (identify-classes (generate-entries-for-eq-relation python-ascii-relation) num-classes class-ids) - python-unicode-entries (identify-classes (generate-entries-for-eq-relation python-unicode-relation) num-classes class-ids) - case-folding-entries (generate-entries-for-function simple-case-folding) - classes (map second (sort (map swap @class-ids)))] - (str (show-classes classes) - "\n" - (show-entries non-unicode-entries "NON_UNICODE_TABLE_ENTRIES") - "\n" - (show-entries unicode-entries "UNICODE_TABLE_ENTRIES") - "\n" - (show-entries python-ascii-entries "PYTHON_ASCII_TABLE_ENTRIES") - "\n" - (show-entries python-unicode-entries "PYTHON_UNICODE_TABLE_ENTRIES") - "\n" - (show-entries case-folding-entries "SIMPLE_CASE_FOLDING_ENTRIES")))) - -(defn -main - "This gets evaluated when we run the script." - [& args] - (print (do-the-job))) diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py b/regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py deleted file mode 100755 index ccddc6456e7a..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - - -# This reads dat/UnicodeData.txt and dat/SpecialCasing.txt and produces a file -# that gives all the non-trivial pairs of inputs-outputs of the ECMAScript -# Canonicalize when Unicode is false and IgnoreCase is true. - -upper_map = {} -for line in open("dat/UnicodeData.txt"): - tokens = line.split(";") - # Drop entries without toUppercase mapping - if tokens[12].strip() == "": - continue - char = int(tokens[0].strip(), 16) - upper = int(tokens[12].strip(), 16) - upper_map[char] = [upper] - -for line in open("dat/SpecialCasing.txt"): - # Drop comments and empty lines - if line.startswith("#") or line.strip() == "": - continue - tokens = line.split(";") - # Drop entries with conditions - if len(tokens) > 5: - continue - char = int(tokens[0].strip(), 16) - upper = [int(c, 16) for c in tokens[3].split()] - upper_map[char] = upper - -for (char, upper) in upper_map.items(): - # Only follow rules which give map to a single code unit - if len(upper) > 1 or upper[0] >= 0x10000: - continue - # Do not allow non-ASCII characters to cross into ASCII. - if char >= 128 and upper[0] < 128: - continue - # Drop trivial mappings - if (char == upper[0]): - continue - print("%X;%X" % (char, upper[0])) diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py b/regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py deleted file mode 100755 index 36443b2e4ca7..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re -import os - -os.chdir('dat') - -regex = re.compile(r'^([0-9A-Z]+);\s*[CF];((?:\s*[0-9A-Z])+)') - -data_file = open("CaseFolding.txt", "r") - -entries = [] - -for line in data_file: - m = regex.match(line) - if m is not None: - key = "0x{0}".format(m.group(1)) - cp_list = ", ".join([ "0x{0}".format(cp) for cp in m.group(2).strip().split() ]) - value = "new int[]{{{0}}}".format(cp_list) - entries.append(" CASE_FOLD.put({key}, {value});".format(key = key, value = value)) - -print(r"""/* - * Copyright (c) 2021, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.truffle.regex.tregex.parser.flavors; - -import java.util.SortedMap; -import java.util.TreeMap; - -public class RubyCaseFoldingData {{ - - public static final SortedMap CASE_FOLD; - - static {{ - CASE_FOLD = new TreeMap<>(); - -{} - }} -}}""".format("\n".join(entries))) diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py b/regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py deleted file mode 100755 index 57159136754d..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# This script generates a list of case-equivalent Unicode code points using the -# SpecialCasing.txt file from Unicode. It expects this file to be in a folder -# called "dat". Two codepoints are considered equivalent if they map to the same -# sequence of codepoints using either the Lowercase or Uppercase function. Such -# cases are handled by including a special list of exceptions in sre_compile.py. - -inv_map = {} - -def add_mapping(codepoint, mapping): - if mapping not in inv_map: - inv_map[mapping] = [] - inv_map[mapping].append(codepoint) - -for line in open('dat/SpecialCasing.txt'): - if line.strip() == '' or line.startswith('#'): - continue - codepoint, lower, title, upper, *tail = [field.strip() for field in line.split(';')] - if len(tail) > 1: - # skip conditional mapping - continue - if ' ' in lower: - add_mapping(codepoint, lower) - if ' ' in upper: - add_mapping(codepoint, upper) - -for eq_class in inv_map.values(): - rep = eq_class[0] - for elem in eq_class[1:]: - print('{};{}'.format(rep, elem)) diff --git a/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh b/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh index 592a40647f84..eace11f75fd4 100755 --- a/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh +++ b/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh @@ -53,9 +53,6 @@ EMOJI_VERSION=15.0 mkdir -p ./dat -wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/UnicodeData.txt -O dat/UnicodeData.txt -wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/CaseFolding.txt -O dat/CaseFolding.txt -wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/SpecialCasing.txt -O dat/SpecialCasing.txt wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/PropertyAliases.txt -O dat/PropertyAliases.txt wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/PropertyValueAliases.txt -O dat/PropertyValueAliases.txt wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NameAliases.txt -O dat/NameAliases.txt @@ -68,19 +65,16 @@ unzip -d dat dat/ucd.nounihan.flat.zip ./generate_unicode_properties.py > ../src/com/oracle/truffle/regex/charset/UnicodePropertyData.java -./unicode-script.sh - -clojure -Sdeps '{:paths ["."]}' -M --main generate-case-fold-table > dat/case-fold-table.txt - -./update_case_fold_table.py - -./generate_ruby_case_folding.py > ../src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java - ./generate_name_alias_table.py > ../src/com/oracle/truffle/regex/chardata/UnicodeCharacterAliases.java rm -r ./dat +pushd casefolding +cargo build --release && ./target/release/tregex-casefolding +rm -r ./tmp +popd + mx build -mx java -cp `mx paths regex:TREGEX`:`mx paths truffle:TRUFFLE_API`:`mx paths sdk:GRAAL_SDK` com.oracle.truffle.regex.charset.UnicodeGeneralCategoriesGenerator > ../src/com/oracle/truffle/regex/charset/UnicodeGeneralCategories.java +mx java -cp `mx paths regex:TREGEX`:`mx paths truffle:TRUFFLE_API`:`mx paths sdk:COLLECTIONS` com.oracle.truffle.regex.charset.UnicodeGeneralCategoriesGenerator > ../src/com/oracle/truffle/regex/charset/UnicodeGeneralCategories.java mx eclipseformat --primary || true diff --git a/regex/src/com.oracle.truffle.regex/tools/unicode-script.sh b/regex/src/com.oracle.truffle.regex/tools/unicode-script.sh deleted file mode 100755 index a0f39f340664..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/unicode-script.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - -set -e - - -# This script takes the CaseFolding.txt and UnicodeData.txt files of the Unicode -# character database and extracts from them the files UnicodeFoldTable.txt and -# NonUnicodeFoldTable.txt. These files contain definitions of the Canonicalize -# abstract function used in the ECMAScript spec to define case folding in -# regular expressions. UnicodeFoldTable.txt contains the definition of case -# folding for when the Unicode ('u') flag is present and NonUnicodeFoldTable.txt -# contains the definition of case folding for when the Unicode flag is missing. -# These two files are then picked up by the generate_case_fold_table.clj script -# which produces Java code that can be put into the CaseFoldTable class in -# TRegex. - -# We produce the table for the Canonicalize abstract function when the Unicode -# flag is present. The function is based on the contents of CodeFolding.txt. We -# remove any comments and empty lines from the file. We also remove items -# belonging from the full (F) and Turkic (T) mapping and only keep the simple -# (S) and common (C) ones. -cat dat/CaseFolding.txt \ - | sed -e '/^#/d' \ - -e '/^$/d' \ - -e '/; [FT]; /d' \ - -e 's/; /;/g' \ - | cut -d\; -f1,3 \ - > dat/UnicodeFoldTable.txt - -# We produce the table for the Canonicalize abstract function when the Unicode -# flag is not present. We extract the Unicode Case Conversion table from the -# UnicodeData.txt and SpecialCasing.txt files. We remove entries which map from -# non-ASCII code points (>= 128) to ASCII code points (< 128), as per the -# ECMAScript spec. We also drop the special entries which produce strings of more -# than one UTF-16 code unit. -./generate_nonunicode_fold_table.py > dat/NonUnicodeFoldTable.txt - - -# In Python's case insensitive regular expressions, characters are considered -# equivalent if they have the same Lowercase mapping. However, in some cases -# concerning character classes with non-BMP characters, Python also tries to -# match characters by considering their Uppercase mapping. In recent revisions of -# CPython 3, this is supplemented by an explicit list of equivalence classes of -# lowercase characters which are to be considered equal since they have the same -# Uppercase mapping. - -# Instead of relying on a list of exceptions, we generate the equivalence -# by considering any two characters equivalent if they map to each other or to -# some common target using either the Lowercase or Uppercase mapping (including -# complex cases from SpecialCasing.txt). - -# We make characters equivalent to their simple Uppercase and Lowercase -# mappings. We filter out the codepoint and the two character mappings, remove -# any empty fields by collapsing neighboring or terminating semicolons and -# finally removing any lines consisting of a single codepoint (the case when a -# character has no cased mappings). -cat dat/UnicodeData.txt \ - | cut -d\; -f1,13,14 \ - | sed -e 's/;\+/;/g' \ - -e 's/;$//' \ - -e '/^[^;]*$/d' \ - > dat/PythonSimpleCasing.txt - -./generate_special_casing_equivalences.py > dat/PythonExtendedCasing.txt - -# We produce the Python case fold table by merging the equivalences due to both -# the simple case mappings and the extended case mappings. -cat dat/PythonSimpleCasing.txt dat/PythonExtendedCasing.txt > dat/PythonFoldTable.txt diff --git a/regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py b/regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py deleted file mode 100755 index b827597f8f46..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import sys -import os.path - - -def check_file_exists(path): - if not os.path.exists(path): - error(f'file "${path}" not found') - - -def error(msg): - print('ERROR: ' + msg) - sys.exit(1) - - -def main(): - file_name = 'CaseFoldTable.java' - file_path = '../src/com/oracle/truffle/regex/tregex/parser/' + file_name - replacement_file = './dat/case-fold-table.txt' - marker_begin = 'GENERATED CODE BEGIN' - marker_end = 'GENERATED CODE END' - - check_file_exists(file_path) - check_file_exists(replacement_file) - - with open(file_path, 'r') as f, open(replacement_file, 'r') as rf: - content = f.read() - i_begin = content.find(marker_begin) - i_end = content.find(marker_end) - if i_begin < 0: - error(f'could not find insertion marker "${marker_begin}" in ${file_name}') - if i_end < 0: - error(f'could not find end of insertion marker "${marker_begin}" in ${file_name}') - replacement = content[0:content.find('\n', i_begin) + 1] + '\n' + rf.read() + content[content.rfind('\n', i_begin, i_end):] - - with open(file_path, 'w') as f: - f.write(replacement) - - -main() From f5ee095109cbbe798df8e9a1eda0543e5ce31642 Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Tue, 17 Oct 2023 11:03:10 +0200 Subject: [PATCH 04/17] TRegex: OracleDB flavor cleanup and bugfixes --- .../regex/tregex/test/OracleDBTests.java | 13 +++ .../truffle/regex/charset/Constants.java | 7 +- .../regex/tregex/parser/CaseFoldData.java | 9 +- .../regex/tregex/parser/JSRegexLexer.java | 2 +- .../regex/tregex/parser/JSRegexParser.java | 5 +- .../regex/tregex/parser/RegexLexer.java | 27 ++--- .../truffle/regex/tregex/parser/Token.java | 16 ++- .../parser/flavors/OracleDBRegexLexer.java | 14 +-- .../parser/flavors/OracleDBRegexParser.java | 16 +-- .../parser/flavors/PythonRegexLexer.java | 2 +- .../parser/flavors/PythonRegexParser.java | 9 +- .../tools/casefolding/.gitignore | 1 - .../tools/casefolding/src/main.rs | 101 ++++++++++++------ 13 files changed, 137 insertions(+), 85 deletions(-) diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java index b57c4b8fdda0..63bb7d8d6cf4 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java @@ -879,6 +879,19 @@ public void generatedTests() { test("([[=a=]])\\1", "i", "\u00e4a", 0, false); test("([[=a=]])\\1", "i", "\u00c4a", 0, false); test("([[=a=]])\\1", "i", "\u00c4A", 0, false); + test("[[=a=]o]+", "i", "\u00e4O\u00f6", 0, true, 0, 2); + test("[[=a=]o]+", "i", "\u00e4O\u00f6", 0, true, 0, 2); + test("[[=\u00df=]o]+", "i", "s", 0, false); + test("[[=\u00df=]o]+", "i", "ss", 0, true, 0, 2); + test("[[=\u00df=]o]+", "", "s", 0, false); + test("[[=\u00df=]o]+", "", "ss", 0, true, 0, 2); + test("[\u0132]+", "", "ij", 0, false); + test("[\u0132]+", "i", "ij", 0, false); + test("[[=\u0132=]]+", "", "ij", 0, true, 0, 2); + test("[[=\u0132=]o]+", "", "ij", 0, true, 0, 2); + test("[[=\u0132=]o]+", "i", "ij", 0, true, 0, 2); + expectSyntaxError("[\\s-r]+", "", "invalid range in regular expression"); + test("[\\s-v]+", "", "\\stu", 0, true, 0, 4); /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java index b927b17ebd8f..488884dec276 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java @@ -253,12 +253,7 @@ public final class Constants { HEX_CHARS }; - public static final CodePointSet WORD_CHARS_UNICODE_SETS_IGNORE_CASE; - - static { - CodePointSetAccumulator tmp = new CodePointSetAccumulator(); - WORD_CHARS_UNICODE_SETS_IGNORE_CASE = CaseFoldData.simpleCaseFold(WORD_CHARS, tmp); - } + public static final CodePointSet WORD_CHARS_UNICODE_SETS_IGNORE_CASE = CaseFoldData.simpleCaseFold(WORD_CHARS, new CodePointSetAccumulator()); public static final CodePointSet NON_WORD_CHARS_UNICODE_SETS_IGNORE_CASE = WORD_CHARS_UNICODE_SETS_IGNORE_CASE.createInverse(CaseFoldData.FOLDABLE_CHARACTERS, new CompilationBuffer(Encodings.UTF_16)); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java index 1eaddb621203..ad1c3800154b 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java @@ -51,7 +51,6 @@ import com.oracle.truffle.regex.charset.Range; import com.oracle.truffle.regex.charset.RangesBuffer; import com.oracle.truffle.regex.charset.SortedListOfRanges; -import com.oracle.truffle.regex.tregex.buffer.IntRangesBuffer; import com.oracle.truffle.regex.tregex.string.Encodings; public class CaseFoldData { @@ -207,7 +206,8 @@ private void apply(CodePointSetAccumulator codePointSet, int tblEntryIndex, int break; case DIRECT_MAPPING: CodePointSet set = directMappings[ranges[tblEntryIndex * 4 + 3]]; - codePointSet.addSet(set.createIntersection(Encodings.UTF_8.getFullSet(), new IntRangesBuffer())); + assert set.getMax() <= Character.MAX_CODE_POINT : "CaseFoldEquivalenceTable is currently used for single-character mappings only"; + codePointSet.addSet(set); break; case ALTERNATING_UL: int loUL = Math.min(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); @@ -229,9 +229,8 @@ private void apply(CodePointSetAccumulator codePointSet, int tblEntryIndex, int } private static void addRange(CodePointSetAccumulator codePointSet, int lo, int hi) { - if (lo < 0x11_0000) { - codePointSet.addRange(lo, Math.min(hi, 0x10_ffff)); - } + assert lo <= Character.MAX_CODE_POINT : "CaseFoldEquivalenceTable is currently used for single-character mappings only"; + codePointSet.addRange(lo, hi); } boolean equalsIgnoreCase(int codePointA, int codePointB) { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java index 232c1a6422ba..aaa2644a5c51 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java @@ -202,7 +202,7 @@ protected int getMaxBackReferenceDigits() { } @Override - protected CodePointSet getPredefinedCharClass(char c, boolean inCharClass) { + protected CodePointSet getPredefinedCharClass(char c) { switch (c) { case 's': if (source.getOptions().isU180EWhitespace()) { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java index 4e6c03426264..5a405667b800 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java @@ -55,7 +55,6 @@ import com.oracle.truffle.regex.RegexOptions; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; -import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.errors.JsErrorMessages; @@ -219,9 +218,7 @@ private RegexAST parse(boolean rootCapture) throws RegexSyntaxException { curCharClass.clear(); break; case charClassAtom: - ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents(); - assert contents.isCodePointSetOnly(); - curCharClass.addSet(contents.getCodePointSet()); + curCharClass.addSet(((Token.CharacterClassAtom) token).getContents()); break; case charClassEnd: boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar(); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java index 7b739fdf95b5..e16ddb9d45d3 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java @@ -66,7 +66,7 @@ public abstract class RegexLexer { - private static final TBitSet PREDEFINED_CHAR_CLASSES = TBitSet.valueOf('D', 'S', 'W', 'd', 's', 'w'); + protected static final TBitSet PREDEFINED_CHAR_CLASSES = TBitSet.valueOf('D', 'S', 'W', 'd', 's', 'w'); protected static final TBitSet DEFAULT_WHITESPACE = TBitSet.valueOf('\t', '\n', '\u000b', '\f', '\r', ' '); public final RegexSource source; /** @@ -246,6 +246,14 @@ public RegexLexer(RegexSource source, CompilationBuffer compilationBuffer) { */ protected abstract int getMaxBackReferenceDigits(); + /** + * Returns {@code true} iff the given character is a predefined character class when preceded + * with a backslash (e.g. \d). + */ + protected boolean isPredefCharClass(char c) { + return PREDEFINED_CHAR_CLASSES.get(c); + } + /** * Returns the CodePointSet associated with the given predefined character class (e.g. * {@code \d}). @@ -253,7 +261,7 @@ public RegexLexer(RegexSource source, CompilationBuffer compilationBuffer) { * Note that the CodePointSet returned by this function has already been case-folded and * negated. */ - protected abstract CodePointSet getPredefinedCharClass(char c, boolean inCharClass); + protected abstract CodePointSet getPredefinedCharClass(char c); /** * The maximum value allowed while parsing bounded quantifiers. Larger values will cause a call @@ -750,9 +758,7 @@ private Token charClass(CodePointSet codePointSet) { curCharClass.clear(); curCharClass.addSet(codePointSet); boolean wasSingleChar = curCharClass.matchesSingleChar(); - if (featureEnabledIgnoreCase()) { - caseFoldUnfold(curCharClass); - } + caseFoldUnfold(curCharClass); return Token.createCharClass(curCharClass.toCodePointSet(), wasSingleChar); } else { return Token.createCharClass(codePointSet); @@ -768,7 +774,8 @@ private Token getNext() throws RegexSyntaxException { curCharClassStartIndex = -1; return Token.createCharacterClassEnd(); } - return Token.createCharacterClassAtom(parseCharClassAtom(c)); + ClassSetContents atom = parseCharClassAtom(c); + return Token.createCharacterClassAtom(atom.getCodePointSet(), atom.isPosixCollationEquivalenceClass()); } switch (c) { case '.': @@ -849,7 +856,7 @@ private Token parseEscape() throws RegexSyntaxException { // the case-folding step in the `charClass` method and call `Token::createCharClass` // directly. if (isPredefCharClass(c)) { - return Token.createCharClass(getPredefinedCharClass(c, false)); + return Token.createCharClass(getPredefinedCharClass(c)); } else if (featureEnabledUnicodePropertyEscapes() && (c == 'p' || c == 'P')) { ClassSetContents unicodePropertyContents = parseUnicodeCharacterProperty(c == 'P'); if (featureEnabledClassSetExpressions()) { @@ -1185,7 +1192,7 @@ private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException private ClassSetContents parseEscapeCharClass(char c) throws RegexSyntaxException { if (isPredefCharClass(c)) { - return ClassSetContents.createCharacterClass(getPredefinedCharClass(c, true)); + return ClassSetContents.createCharacterClass(getPredefinedCharClass(c)); } else if (featureEnabledUnicodePropertyEscapes() && (c == 'p' || c == 'P')) { return parseUnicodeCharacterProperty(c == 'P'); } else { @@ -1468,10 +1475,6 @@ public RegexSyntaxException syntaxError(String msg) { return RegexSyntaxException.createPattern(source, msg, getLastAtomPosition()); } - private static boolean isPredefCharClass(char c) { - return PREDEFINED_CHAR_CLASSES.get(c); - } - public static boolean isDecimalDigit(int c) { return '0' <= c && c <= '9'; } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java index afa570e820d4..2b698594f469 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java @@ -183,8 +183,8 @@ public static Token createCharacterClassBegin() { return CHAR_CLASS_BEGIN; } - public static Token createCharacterClassAtom(ClassSetContents contents) { - return new CharacterClassAtom(contents); + public static Token createCharacterClassAtom(CodePointSet contents, boolean isPosixCollationEquivalenceClass) { + return new CharacterClassAtom(contents, isPosixCollationEquivalenceClass); } public static Token createCharacterClassEnd() { @@ -403,11 +403,13 @@ public int getCodePoint() { public static final class CharacterClassAtom extends Token { - private final ClassSetContents contents; + private final CodePointSet contents; + private final boolean isPosixCollationEquivalenceClass; - public CharacterClassAtom(ClassSetContents contents) { + public CharacterClassAtom(CodePointSet contents, boolean isPosixCollationEquivalenceClass) { super(Kind.charClassAtom); this.contents = contents; + this.isPosixCollationEquivalenceClass = isPosixCollationEquivalenceClass; } @TruffleBoundary @@ -416,9 +418,13 @@ public JsonObject toJson() { return super.toJson().append(Json.prop("contents", contents)); } - public ClassSetContents getContents() { + public CodePointSet getContents() { return contents; } + + public boolean isPosixCollationEquivalenceClass() { + return isPosixCollationEquivalenceClass; + } } public static final class CharacterClass extends Token { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java index 04beda84ec91..dfe1d5001db6 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java @@ -227,12 +227,14 @@ protected int getMaxBackReferenceDigits() { } @Override - protected CodePointSet getPredefinedCharClass(char c, boolean inCharClass) { - if (inCharClass) { - // OracleDB ignores \s \d \w inside character classes, and interprets them as literal - // characters instead - return '\\' < c ? CodePointSet.create('\\', '\\', c, c) : CodePointSet.create(c, c, '\\', '\\'); - } + protected boolean isPredefCharClass(char c) { + // OracleDB ignores \s \d \w inside character classes, and interprets them as literal + // characters instead + return !inCharacterClass() && PREDEFINED_CHAR_CLASSES.get(c); + } + + @Override + protected CodePointSet getPredefinedCharClass(char c) { CodePointSet cps = getPOSIXCharClass(c); if (isLowerCase(c)) { return cps; diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java index d82d5aa3fe3d..86f36981a785 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java @@ -51,7 +51,6 @@ import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; -import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.errors.OracleDBErrorMessages; @@ -213,12 +212,11 @@ public RegexAST parse() throws RegexSyntaxException { curCharClassPosixEquivalenceClasses.clear(); break; case charClassAtom: - ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents(); - assert contents.isCodePointSetOnly(); - if (contents.isPosixCollationEquivalenceClass()) { - curCharClassPosixEquivalenceClasses.addSet(contents.getCodePointSet()); + CodePointSet contents = ((Token.CharacterClassAtom) token).getContents(); + if (((Token.CharacterClassAtom) token).isPosixCollationEquivalenceClass()) { + curCharClassPosixEquivalenceClasses.addSet(contents); } else { - curCharClass.addSet(contents.getCodePointSet()); + curCharClass.addSet(contents); } break; case charClassEnd: @@ -249,7 +247,8 @@ private void addCharClass() { } if (flags.isIgnoreCase()) { List> multiCodePointExpansions = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClass); - List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClassPosixEquivalenceClasses); + List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDBAI, + curCharClassPosixEquivalenceClasses); if (!multiCodePointExpansions.isEmpty() || !multiCodePointExpansionsPEC.isEmpty()) { astBuilder.pushGroup(); astBuilder.addCharClass(curCharClass.toCodePointSet()); @@ -260,7 +259,8 @@ private void addCharClass() { astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); } } else if (!curCharClassPosixEquivalenceClasses.isEmpty()) { - List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClassPosixEquivalenceClasses); + List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDBAI, + curCharClassPosixEquivalenceClasses); if (!multiCodePointExpansionsPEC.isEmpty()) { astBuilder.pushGroup(); astBuilder.addCharClass(curCharClass.toCodePointSet()); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java index ff477414b300..a00c86e0f79e 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java @@ -430,7 +430,7 @@ protected ClassSetContents caseFoldClassSetAtom(ClassSetContents classSetContent } @Override - protected CodePointSet getPredefinedCharClass(char c, boolean inCharClass) { + protected CodePointSet getPredefinedCharClass(char c) { if (getLocalFlags().isUnicode(mode)) { return UNICODE_CHAR_CLASS_SETS.get(c); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java index 33ecdefbda5d..ce905237f6dd 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java @@ -50,7 +50,6 @@ import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; -import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.charset.Constants; @@ -163,7 +162,7 @@ public RegexAST parse() throws RegexSyntaxException { break; } if (getLocalFlags().isUnicode(mode)) { - astBuilder.addWordBoundaryAssertion(lexer.getPredefinedCharClass('w', false), lexer.getPredefinedCharClass('W', false)); + astBuilder.addWordBoundaryAssertion(lexer.getPredefinedCharClass('w'), lexer.getPredefinedCharClass('W')); } else if (getLocalFlags().isLocale()) { astBuilder.addWordBoundaryAssertion(lexer.getLocaleData().getWordCharacters(), lexer.getLocaleData().getNonWordCharacters()); } else { @@ -179,7 +178,7 @@ public RegexAST parse() throws RegexSyntaxException { break; } if (getLocalFlags().isUnicode(mode)) { - astBuilder.addWordNonBoundaryAssertionPython(lexer.getPredefinedCharClass('w', false), lexer.getPredefinedCharClass('W', false)); + astBuilder.addWordNonBoundaryAssertionPython(lexer.getPredefinedCharClass('w'), lexer.getPredefinedCharClass('W')); } else if (getLocalFlags().isLocale()) { astBuilder.addWordNonBoundaryAssertionPython(lexer.getLocaleData().getWordCharacters(), lexer.getLocaleData().getNonWordCharacters()); } else { @@ -241,9 +240,7 @@ public RegexAST parse() throws RegexSyntaxException { curCharClass.clear(); break; case charClassAtom: - ClassSetContents contents = ((Token.CharacterClassAtom) token).getContents(); - assert contents.isCodePointSetOnly(); - curCharClass.addSet(contents.getCodePointSet()); + curCharClass.addSet(((Token.CharacterClassAtom) token).getContents()); break; case charClassEnd: boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar(); diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore b/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore index d81f12ed1b1c..ea8c4bf7f35f 100644 --- a/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore @@ -1,2 +1 @@ /target -/.idea diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs index 87278dc1250e..5823893d722f 100644 --- a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs @@ -50,11 +50,11 @@ use std::process::Command; use std::time::Instant; use csv::{Reader, StringRecord, Trim}; -use error_chain::error_chain; +use error_chain::{bail, error_chain}; use icu_collator::{CaseLevel, Collator, CollatorOptions, Strength}; use icu_locid::Locale; use indicatif::ProgressIterator; -use oracle::{Connection, Connector, Privilege}; +use oracle::{Connection, Connector, Privilege, Statement}; use oracle::sql_type::OracleType; use reqwest::Url; @@ -1287,24 +1287,55 @@ fn oracledb_generate_posix_char_classes() -> Result<()> { } fn oracledb_generate_tests() -> Result<()> { - fn count_groups(pattern: &str) -> i32 { - let mut par_open = 0; - let mut escaped = false; - let mut n = 1; - for c in pattern.chars() { - if !escaped { - if c == '(' { - par_open += 1; - } else if c == ')' { - if par_open > 0 { - par_open -= 1; - n += 1; + enum TestResult { + Match(Vec), + NoMatch, + SyntaxError(String), + } + + fn run_test(statement: &mut Statement, pattern: &str, flags: &str, input: &str, from_index: i32) -> Result { + fn count_groups(pattern: &str) -> i32 { + let mut par_open = 0; + let mut escaped = false; + let mut n = 1; + for c in pattern.chars() { + if !escaped { + if c == '(' { + par_open += 1; + } else if c == ')' { + if par_open > 0 { + par_open -= 1; + n += 1; + } } } + escaped = c == '\\'; } - escaped = c == '\\'; + return min(n, 10); } - return min(n, 10); + let occurrence = 1; + let n_groups = count_groups(pattern); + let mut groups = vec![]; + for i_group in 0..n_groups { + for start_or_end in [0, 1] { + // explicit type for flags string: the client library will set the data type of strings to NVARCHAR2, but REGEXP_INSTR only accepts VARCHAR or CHAR on the flags parameter + match statement.query_row_as::(&[&input, &pattern, &from_index, &occurrence, &start_or_end, &(&flags, &OracleType::Char(10)), &i_group]) { + Ok(i) => { + if i_group == 0 && i == 0 { + return Ok(TestResult::NoMatch); + } + groups.push(i - 1); + } + Err(oracle::Error::OciError(e)) => { + return Ok(TestResult::SyntaxError(e.message()[(e.message().find(": ").unwrap() + 2)..].to_string())); + } + Err(e) => { + bail!(e); + } + } + } + } + return Ok(TestResult::Match(groups)); } let conn = oracledb_connect()?; @@ -1963,24 +1994,34 @@ fn oracledb_generate_tests() -> Result<()> { ("([[=a=]])\\1", "i", "\u{00e4}a"), ("([[=a=]])\\1", "i", "\u{00c4}a"), ("([[=a=]])\\1", "i", "\u{00c4}A"), + ("[[=a=]o]+", "i", "\u{00e4}O\u{00f6}"), + ("[[=a=]o]+", "i", "\u{00e4}O\u{00f6}"), + ("[[=\u{00df}=]o]+", "i", "s"), + ("[[=\u{00df}=]o]+", "i", "ss"), + ("[[=\u{00df}=]o]+", "", "s"), + ("[[=\u{00df}=]o]+", "", "ss"), + ("[\u{0132}]+", "", "ij"), + ("[\u{0132}]+", "i", "ij"), + ("[[=\u{0132}=]]+", "", "ij"), + ("[[=\u{0132}=]o]+", "", "ij"), + ("[[=\u{0132}=]o]+", "i", "ij"), + ("[\\s-r]+", "", "\\stu"), + ("[\\s-v]+", "", "\\stu"), ] { let from_index = 1; - let occurrence = 1; - let n_groups = count_groups(pattern); - let mut groups = vec![]; - for i_group in 0..n_groups { - for start_or_end in [0, 1] { - // explicit type for flags string: the client library will set the data type of strings to NVARCHAR2, but REGEXP_INSTR only accepts VARCHAR or CHAR on the flags parameter - let i = statement.query_row_as::(&[&input, &pattern, &from_index, &occurrence, &start_or_end, &(&flags, &OracleType::Char(10)), &i_group])?; - groups.push(i - 1); + let e_pattern = java_string_escape(pattern); + let e_input = java_string_escape(input); + match run_test(&mut statement, &pattern, &flags, &input, from_index)? { + TestResult::Match(groups) => { + writeln!(out, "test(\"{}\", \"{}\", \"{}\", {}, true, {});", e_pattern, flags, e_input, from_index - 1, groups.iter().map(|v| format!("{}", v)).collect::>().join(", "))?; + } + TestResult::NoMatch => { + writeln!(out, "test(\"{}\", \"{}\", \"{}\", {}, false);", e_pattern, flags, e_input, from_index - 1)?; + } + TestResult::SyntaxError(message) => { + writeln!(out, "expectSyntaxError(\"{}\", \"{}\", \"{}\");", e_pattern, flags, java_string_escape(message.as_str()))?; } } - let is_match = *groups.get(0).unwrap() >= 0; - write!(out, "test(\"{}\", \"{}\", \"{}\", {}, {}", java_string_escape(pattern), flags, java_string_escape(input), from_index - 1, is_match)?; - if is_match { - write!(out, ", {}", groups.iter().map(|v| format!("{}", v)).collect::>().join(", "))?; - } - writeln!(out, ");")?; } insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_ORACLE_DB_TESTS).as_path(), &out)?; Ok(()) From bf9b6b232614ef597c7f86735d582ca2713c2f88 Mon Sep 17 00:00:00 2001 From: Christian Haeubl Date: Tue, 17 Oct 2023 15:14:28 +0200 Subject: [PATCH 05/17] Ignore crashed threads. --- .../svm/core/stack/JavaStackWalker.java | 6 ++++ .../com/oracle/svm/core/thread/Safepoint.java | 35 ------------------- .../com/oracle/svm/core/thread/VMThreads.java | 9 +++-- 3 files changed, 13 insertions(+), 37 deletions(-) diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java index 84ab46d13600..a8923223c386 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java @@ -45,6 +45,7 @@ import com.oracle.svm.core.heap.RestrictHeapAccess; import com.oracle.svm.core.log.Log; import com.oracle.svm.core.thread.VMOperation; +import com.oracle.svm.core.thread.VMThreads.SafepointBehavior; import com.oracle.svm.core.util.VMError; /** @@ -134,6 +135,11 @@ public static boolean initWalk(JavaStackWalk walk, IsolateThread thread) { assert thread.notEqual(CurrentIsolate.getCurrentThread()) : "Cannot walk the current stack with this method, it would miss all frames after the last frame anchor"; assert VMOperation.isInProgressAtSafepoint() : "Walking the stack of another thread is only safe when that thread is stopped at a safepoint"; + if (SafepointBehavior.isCrashedThread(thread)) { + /* Skip crashed threads because they may no longer have a stack. */ + return false; + } + JavaFrameAnchor anchor = JavaFrameAnchors.getFrameAnchor(thread); boolean result = anchor.isNonNull(); Pointer sp = WordFactory.nullPointer(); diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java index 154ede835acb..49ed1d06b494 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java @@ -876,42 +876,7 @@ public UnsignedWord getSafepointId() { return safepointId; } - /** A sample method to execute in a VMOperation. */ public static class TestingBackdoor { - - public static int countingVMOperation() { - final Log trace = Log.log().string("[Safepoint.Master.TestingBackdoor.countingVMOperation:").newline(); - int atSafepoint = 0; - int ignoreSafepoints = 0; - int notAtSafepoint = 0; - - for (IsolateThread vmThread = VMThreads.firstThread(); vmThread.isNonNull(); vmThread = VMThreads.nextThread(vmThread)) { - int safepointBehavior = SafepointBehavior.getSafepointBehaviorVolatile(vmThread); - int status = StatusSupport.getStatusVolatile(vmThread); - if (safepointBehavior == SafepointBehavior.PREVENT_VM_FROM_REACHING_SAFEPOINT) { - notAtSafepoint++; - } else if (safepointBehavior == SafepointBehavior.THREAD_CRASHED) { - ignoreSafepoints += 1; - } else { - assert safepointBehavior == SafepointBehavior.ALLOW_SAFEPOINT; - // Check if the thread is at a safepoint or in native code. - switch (status) { - case StatusSupport.STATUS_IN_SAFEPOINT: - atSafepoint += 1; - break; - default: - notAtSafepoint += 1; - break; - } - } - } - trace.string(" atSafepoint: ").signed(atSafepoint) - .string(" ignoreSafepoints: ").signed(ignoreSafepoints) - .string(" notAtSafepoint: ").signed(notAtSafepoint); - trace.string("]").newline(); - return atSafepoint; - } - @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) public static int getCurrentThreadSafepointRequestedCount() { return getSafepointRequested(CurrentIsolate.getCurrentThread()); diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java index dc9b724b2fc9..f7557013a637 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java @@ -888,13 +888,13 @@ public static class SafepointBehavior { * The thread won't freeze at a safepoint, and will actively prevent the VM from reaching a * safepoint (regardless of the thread status). */ - static final int PREVENT_VM_FROM_REACHING_SAFEPOINT = 1; + public static final int PREVENT_VM_FROM_REACHING_SAFEPOINT = 1; /** * The thread won't freeze at a safepoint and the safepoint handling will ignore the thread. * So, the VM will be able to reach a safepoint regardless of the status of this thread. */ - static final int THREAD_CRASHED = 2; + public static final int THREAD_CRASHED = 2; @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) public static boolean ignoresSafepoints() { @@ -945,6 +945,11 @@ public static void markThreadAsCrashed() { safepointBehaviorTL.setVolatile(THREAD_CRASHED); } + @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) + public static boolean isCrashedThread(IsolateThread thread) { + return safepointBehaviorTL.getVolatile(thread) == THREAD_CRASHED; + } + @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) public static String toString(int safepointBehavior) { switch (safepointBehavior) { From ad2a494da02cddf752f75f143d8a9f3e4cfb2ea5 Mon Sep 17 00:00:00 2001 From: Carlo Refice Date: Wed, 25 Oct 2023 12:37:34 +0200 Subject: [PATCH 06/17] Improve canonicalization of InstanceOfNode When the intersection between an InstanceOfNode's input stamp and checked stamp is null, the check will only succeed if the input is null, and can thus be simplified to a null check. --- compiler/mx.compiler/mx_compiler.py | 12 ++++----- .../compiler/nodes/java/InstanceOfNode.java | 25 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/compiler/mx.compiler/mx_compiler.py b/compiler/mx.compiler/mx_compiler.py index c313b6351669..87d204f0d6f9 100644 --- a/compiler/mx.compiler/mx_compiler.py +++ b/compiler/mx.compiler/mx_compiler.py @@ -566,14 +566,14 @@ def compiler_gate_benchmark_runner(tasks, extraVMarguments=None, prefix='', task if t: for name in dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_daily_scaling_factor): + for _ in range(default_iterations * scala_daily_scaling_factor): _gate_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) with mx_gate.Task('Dacapo benchmark weekly workload', tasks, tags=['dacapo_weekly'], report=task_report_component) as t: if t: for name in dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_weekly_scaling_factor): + for _ in range(default_iterations * scala_weekly_scaling_factor): _gate_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) # ensure we can also run on C2 @@ -603,14 +603,14 @@ def compiler_gate_benchmark_runner(tasks, extraVMarguments=None, prefix='', task if t: for name in scala_dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(scala_dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_dacapo_daily_scaling_factor): + for _ in range(default_iterations * scala_dacapo_daily_scaling_factor): _gate_scala_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) with mx_gate.Task('ScalaDacapo benchmark weekly workload', tasks, tags=['scala_dacapo_weekly'], report=task_report_component) as t: if t: for name in scala_dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(scala_dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_dacapo_weekly_scaling_factor): + for _ in range(default_iterations * scala_dacapo_weekly_scaling_factor): _gate_scala_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) # run Renaissance benchmarks # @@ -630,14 +630,14 @@ def compiler_gate_benchmark_runner(tasks, extraVMarguments=None, prefix='', task if t: for name in renaissance_suite.benchmarkList(bmSuiteArgs): iterations = int(renaissance_suite.renaissanceIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations): + for _ in range(default_iterations): _gate_renaissance(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + enable_assertions) with mx_gate.Task('Renaissance benchmark weekly workload', tasks, tags=['renaissance_weekly'], report=task_report_component) as t: if t: for name in renaissance_suite.benchmarkList(bmSuiteArgs): iterations = int(renaissance_suite.renaissanceIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * daily_weekly_jobs_ratio): + for _ in range(default_iterations * daily_weekly_jobs_ratio): _gate_renaissance(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + enable_assertions) # run benchmark with non default setup # diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java index 395ba8fdb791..28d6e9e5cce6 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java @@ -50,7 +50,6 @@ import jdk.graal.compiler.nodes.spi.CanonicalizerTool; import jdk.graal.compiler.nodes.spi.Lowerable; import jdk.graal.compiler.nodes.type.StampTool; - import jdk.vm.ci.meta.JavaKind; import jdk.vm.ci.meta.JavaTypeProfile; import jdk.vm.ci.meta.ResolvedJavaType; @@ -138,17 +137,19 @@ public static LogicNode findSynonym(ObjectStamp checkedStamp, ValueNode object, if (joinedStamp.isEmpty()) { // The check can never succeed, the intersection of the two stamps is empty. return LogicConstantNode.contradiction(); + } else if (joinedStamp.equals(inputStamp)) { + // The check will always succeed, the intersection of the two stamps is equal to the + // input stamp. + return LogicConstantNode.tautology(); + } else if (joinedStamp.alwaysNull()) { + // The intersection of the two stamps is always null => simplify the check. + return IsNullNode.create(object); } else { ObjectStamp meetStamp = (ObjectStamp) checkedStamp.meet(inputStamp); - if (checkedStamp.equals(meetStamp)) { - // The check will always succeed, the union of the two stamps is equal to the - // checked stamp. - return LogicConstantNode.tautology(); - } else if (checkedStamp.alwaysNull()) { - return IsNullNode.create(object); - } else if (Objects.equals(checkedStamp.type(), meetStamp.type()) && checkedStamp.isExactType() == meetStamp.isExactType() && checkedStamp.alwaysNull() == meetStamp.alwaysNull()) { + if (Objects.equals(checkedStamp.type(), meetStamp.type()) && checkedStamp.isExactType() == meetStamp.isExactType() && checkedStamp.alwaysNull() == meetStamp.alwaysNull()) { assert checkedStamp.nonNull() != inputStamp.nonNull(); - // The only difference makes the null-ness of the value => simplify the check. + // The only difference between the two stamps is their null-ness => simplify the + // check. if (checkedStamp.nonNull()) { return LogicNegationNode.create(IsNullNode.create(object)); } else { @@ -182,8 +183,7 @@ public Stamp getSucceedingStampForValue(boolean negated) { @Override public TriState tryFold(Stamp valueStamp) { - if (valueStamp instanceof ObjectStamp) { - ObjectStamp inputStamp = (ObjectStamp) valueStamp; + if (valueStamp instanceof ObjectStamp inputStamp) { ObjectStamp joinedStamp = (ObjectStamp) checkedStamp.join(inputStamp); if (joinedStamp.isEmpty()) { @@ -232,8 +232,7 @@ public static boolean intrinsify(GraphBuilderContext b, ResolvedJavaType type, V @Override public TriState implies(boolean thisNegated, LogicNode other) { - if (other instanceof InstanceOfNode) { - InstanceOfNode instanceOfNode = (InstanceOfNode) other; + if (other instanceof InstanceOfNode instanceOfNode) { if (instanceOfNode.getValue() == getValue()) { if (thisNegated) { // !X => Y From 0edabf4fb094819fce960a2058bfc99e1f8c6f4a Mon Sep 17 00:00:00 2001 From: Christian Haeubl Date: Wed, 25 Oct 2023 17:41:11 +0200 Subject: [PATCH 07/17] Improve crash log so that we print the instructions of the caller if a null function pointer is called. --- .../oracle/svm/core/SubstrateDiagnostics.java | 105 +++++++++++------- 1 file changed, 66 insertions(+), 39 deletions(-) diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java index 0498fcbf22d5..f359c4b72437 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java @@ -30,16 +30,6 @@ import java.util.Arrays; import org.graalvm.collections.EconomicMap; -import jdk.graal.compiler.api.replacements.Fold; -import jdk.graal.compiler.core.common.NumUtil; -import jdk.graal.compiler.core.common.SuppressFBWarnings; -import jdk.graal.compiler.nodes.PauseNode; -import jdk.graal.compiler.nodes.java.ArrayLengthNode; -import jdk.graal.compiler.options.Option; -import jdk.graal.compiler.options.OptionKey; -import jdk.graal.compiler.options.OptionType; -import jdk.graal.compiler.word.ObjectAccess; -import jdk.graal.compiler.word.Word; import org.graalvm.nativeimage.CurrentIsolate; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.IsolateThread; @@ -101,6 +91,17 @@ import com.oracle.svm.core.util.TimeUtils; import com.oracle.svm.core.util.VMError; +import jdk.graal.compiler.api.replacements.Fold; +import jdk.graal.compiler.core.common.NumUtil; +import jdk.graal.compiler.core.common.SuppressFBWarnings; +import jdk.graal.compiler.nodes.PauseNode; +import jdk.graal.compiler.nodes.java.ArrayLengthNode; +import jdk.graal.compiler.options.Option; +import jdk.graal.compiler.options.OptionKey; +import jdk.graal.compiler.options.OptionType; +import jdk.graal.compiler.word.ObjectAccess; +import jdk.graal.compiler.word.Word; + public class SubstrateDiagnostics { private static final int MAX_THREADS_TO_PRINT = 100_000; private static final int MAX_FRAME_ANCHORS_TO_PRINT_PER_THREAD = 1000; @@ -506,6 +507,31 @@ private static boolean matches(String text, int t, String pattern, int p) { return patternPos == pattern.length(); } + /* Scan the stack until we find a valid return address. We may encounter false-positives. */ + private static Pointer findPotentialReturnAddressPosition(Pointer originalSp) { + UnsignedWord stackBase = VMThreads.StackBase.get(); + if (stackBase.equal(0)) { + /* We don't know the stack boundaries, so only search within 32 bytes. */ + stackBase = originalSp.add(32); + } + + int wordSize = ConfigurationValues.getTarget().wordSize; + Pointer pos = originalSp; + while (pos.belowThan(stackBase)) { + CodePointer possibleIp = pos.readWord(0); + if (pointsIntoNativeImageCode(possibleIp)) { + return pos; + } + pos = pos.add(wordSize); + } + return WordFactory.nullPointer(); + } + + @Uninterruptible(reason = "Prevent the GC from freeing the CodeInfo.") + private static boolean pointsIntoNativeImageCode(CodePointer possibleIp) { + return CodeInfoTable.lookupCodeInfo(possibleIp).isNonNull(); + } + public static class FatalErrorState { AtomicWord diagnosticThread; volatile int diagnosticThunkIndex; @@ -593,15 +619,31 @@ public int maxInvocationCount() { @RestrictHeapAccess(access = RestrictHeapAccess.Access.NO_ALLOCATION, reason = "Must not allocate while printing diagnostics.") public void printDiagnostics(Log log, ErrorContext context, int maxDiagnosticLevel, int invocationCount) { CodePointer ip = context.getInstructionPointer(); - log.string("Printing instructions (ip=").zhex(ip).string("):").indent(true); - if (ip.isNull()) { - // can't print any instructions - } else if (invocationCount < 4) { - // print 512, 128, or 32 instruction bytes. + log.string("Printing instructions (ip=").zhex(ip).string("):"); + + if (((Pointer) ip).belowThan(VirtualMemoryProvider.get().getGranularity())) { + /* IP points into the first page of the virtual address space. */ + Pointer originalSp = context.getStackPointer(); + log.string(" IP is invalid"); + + Pointer returnAddressPos = findPotentialReturnAddressPosition(originalSp); + if (returnAddressPos.isNull()) { + log.string(", instructions cannot be printed.").newline(); + return; + } + + ip = returnAddressPos.readWord(0); + Pointer sp = returnAddressPos.add(FrameAccess.returnAddressSize()); + log.string(", printing instructions (ip=").zhex(ip).string(") of the most likely caller (sp + ").unsigned(sp.subtract(originalSp)).string(") instead"); + } + + log.indent(true); + if (invocationCount < 4) { + /* Print 512, 128, or 32 instruction bytes. */ int bytesToPrint = 1024 >> (invocationCount * 2); hexDump(log, ip, bytesToPrint, bytesToPrint); } else if (invocationCount == 4) { - // just print one word starting at the ip + /* Just print one word starting at the ip. */ hexDump(log, ip, 0, ConfigurationValues.getTarget().wordSize); } log.indent(false).newline(); @@ -992,31 +1034,16 @@ public void printDiagnostics(Log log, ErrorContext context, int maxDiagnosticLev } private static void startStackWalkInMostLikelyCaller(Log log, int invocationCount, Pointer originalSp) { - UnsignedWord stackBase = VMThreads.StackBase.get(); - if (stackBase.equal(0)) { - /* We don't know the stack boundaries, so only search within 32 bytes. */ - stackBase = originalSp.add(32); - } - - /* Search until we find a valid return address. We may encounter false-positives. */ - int wordSize = ConfigurationValues.getTarget().wordSize; - Pointer pos = originalSp; - while (pos.belowThan(stackBase)) { - CodePointer possibleIp = pos.readWord(0); - if (pointsIntoNativeImageCode(possibleIp)) { - Pointer sp = pos.add(wordSize); - log.newline(); - log.string("Starting the stack walk in a possible caller:").newline(); - ThreadStackPrinter.printStacktrace(sp, possibleIp, printVisitors[invocationCount - 1].reset(), log); - break; - } - pos = pos.add(wordSize); + Pointer returnAddressPos = findPotentialReturnAddressPosition(originalSp); + if (returnAddressPos.isNull()) { + return; } - } - @Uninterruptible(reason = "Prevent the GC from freeing the CodeInfo.") - private static boolean pointsIntoNativeImageCode(CodePointer possibleIp) { - return CodeInfoTable.lookupCodeInfo(possibleIp).isNonNull(); + CodePointer possibleIp = returnAddressPos.readWord(0); + Pointer sp = returnAddressPos.add(FrameAccess.returnAddressSize()); + log.newline(); + log.string("Starting the stack walk in a possible caller (sp + ").unsigned(sp.subtract(originalSp)).string("):").newline(); + ThreadStackPrinter.printStacktrace(sp, possibleIp, printVisitors[invocationCount - 1].reset(), log); } } From 65977d2b04a0372a0731adb1326192a79020bdc0 Mon Sep 17 00:00:00 2001 From: Carlo Refice Date: Thu, 26 Oct 2023 11:09:53 +0200 Subject: [PATCH 08/17] Add test for improved InstanceOfNode canonicalization --- .../test/InstanceOfCanonicalizationTest.java | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java diff --git a/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java new file mode 100644 index 000000000000..ea222bdefd64 --- /dev/null +++ b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.graal.compiler.core.test; + +import org.junit.Assert; +import org.junit.Test; + +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.StructuredGraph.AllowAssumptions; +import jdk.graal.compiler.nodes.calc.IsNullNode; +import jdk.graal.compiler.nodes.java.InstanceOfNode; + +public class InstanceOfCanonicalizationTest extends GraalCompilerTest { + + public static boolean checkCastIncompatibleTypes(Object arr) { + // Cast first to a byte array, then to a boolean array. This only succeeds if arr is null. + byte[] barr = (byte[]) arr; + boolean[] bbarr = (boolean[]) (Object) barr; + return true; + } + + public static int unsatisfiableInstanceOf(byte[] barr) { + // Plain instanceof does not allow null, so this will never succeed. + if ((Object) barr instanceof boolean[]) { + return -1; + } + return 1; + } + + @Test + public void testCheckCastIncompatibleTypes() { + StructuredGraph g = parseEager("checkCastIncompatibleTypes", AllowAssumptions.NO, getInitialOptions()); + createCanonicalizerPhase().apply(g, getDefaultHighTierContext()); + + // The second check-cast against boolean[] should canonicalize to a null check + Assert.assertEquals(1, g.getNodes().filter(InstanceOfNode.class).count()); + Assert.assertEquals(1, g.getNodes().filter(IsNullNode.class).count()); + + testAgainstExpected(g.method(), new Result(checkCastIncompatibleTypes(null), null), null, new Object[]{null}); + testAgainstExpected(g.method(), new Result(null, new ClassCastException()), null, new Object[]{new byte[10]}); + testAgainstExpected(g.method(), new Result(null, new ClassCastException()), null, new Object[]{new boolean[10]}); + } + + @Test + public void testUnsatisfiableInstanceOf() { + StructuredGraph g = parseEager("unsatisfiableInstanceOf", AllowAssumptions.NO, getInitialOptions()); + createCanonicalizerPhase().apply(g, getDefaultHighTierContext()); + + // Tested condition can never be true, so it should canonicalize to a constant. + Assert.assertEquals(0, g.getNodes().filter(InstanceOfNode.class).count()); + + testAgainstExpected(g.method(), new Result(unsatisfiableInstanceOf(null), null), null, new Object[]{null}); + testAgainstExpected(g.method(), new Result(unsatisfiableInstanceOf(new byte[10]), null), null, new Object[]{new byte[10]}); + } +} From f4d19beb949db71e10dc1605a43bed4cec07fa2e Mon Sep 17 00:00:00 2001 From: David Kozak Date: Wed, 11 Oct 2023 10:09:21 +0200 Subject: [PATCH 09/17] fix CEntryPointCallStubMethod signature --- .../code/CEntryPointCallStubMethod.java | 82 +++++++++++++------ 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java b/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java index 1a0f857332a1..e1e51480dac9 100644 --- a/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java +++ b/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java @@ -28,24 +28,6 @@ import java.util.Arrays; import java.util.Iterator; -import jdk.graal.compiler.core.common.calc.FloatConvert; -import jdk.graal.compiler.core.common.type.StampFactory; -import jdk.graal.compiler.debug.DebugContext; -import jdk.graal.compiler.graph.NodeSourcePosition; -import jdk.graal.compiler.nodes.CallTargetNode.InvokeKind; -import jdk.graal.compiler.nodes.ConstantNode; -import jdk.graal.compiler.nodes.DeadEndNode; -import jdk.graal.compiler.nodes.FrameState; -import jdk.graal.compiler.nodes.InvokeWithExceptionNode; -import jdk.graal.compiler.nodes.ParameterNode; -import jdk.graal.compiler.nodes.StructuredGraph; -import jdk.graal.compiler.nodes.ValueNode; -import jdk.graal.compiler.nodes.calc.FloatConvertNode; -import jdk.graal.compiler.nodes.calc.IntegerEqualsNode; -import jdk.graal.compiler.nodes.calc.SignExtendNode; -import jdk.graal.compiler.nodes.calc.ZeroExtendNode; -import jdk.graal.compiler.nodes.extended.BranchProbabilityNode; -import jdk.graal.compiler.nodes.java.ExceptionObjectNode; import org.graalvm.nativeimage.Isolate; import org.graalvm.nativeimage.IsolateThread; import org.graalvm.nativeimage.c.constant.CEnum; @@ -58,6 +40,7 @@ import com.oracle.graal.pointsto.infrastructure.WrappedJavaMethod; import com.oracle.graal.pointsto.meta.AnalysisMetaAccess; import com.oracle.graal.pointsto.meta.AnalysisMethod; +import com.oracle.graal.pointsto.meta.AnalysisType; import com.oracle.graal.pointsto.meta.HostedProviders; import com.oracle.svm.core.SubstrateUtil; import com.oracle.svm.core.Uninterruptible; @@ -81,6 +64,24 @@ import com.oracle.svm.hosted.phases.CInterfaceEnumTool; import com.oracle.svm.hosted.phases.HostedGraphKit; +import jdk.graal.compiler.core.common.calc.FloatConvert; +import jdk.graal.compiler.core.common.type.StampFactory; +import jdk.graal.compiler.debug.DebugContext; +import jdk.graal.compiler.graph.NodeSourcePosition; +import jdk.graal.compiler.nodes.ConstantNode; +import jdk.graal.compiler.nodes.DeadEndNode; +import jdk.graal.compiler.nodes.FrameState; +import jdk.graal.compiler.nodes.InvokeWithExceptionNode; +import jdk.graal.compiler.nodes.ParameterNode; +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.ValueNode; +import jdk.graal.compiler.nodes.CallTargetNode.InvokeKind; +import jdk.graal.compiler.nodes.calc.FloatConvertNode; +import jdk.graal.compiler.nodes.calc.IntegerEqualsNode; +import jdk.graal.compiler.nodes.calc.SignExtendNode; +import jdk.graal.compiler.nodes.calc.ZeroExtendNode; +import jdk.graal.compiler.nodes.extended.BranchProbabilityNode; +import jdk.graal.compiler.nodes.java.ExceptionObjectNode; import jdk.vm.ci.code.BytecodeFrame; import jdk.vm.ci.meta.ConstantPool; import jdk.vm.ci.meta.JavaKind; @@ -88,25 +89,52 @@ import jdk.vm.ci.meta.MetaAccessProvider; import jdk.vm.ci.meta.ResolvedJavaMethod; import jdk.vm.ci.meta.ResolvedJavaType; +import jdk.vm.ci.meta.Signature; public final class CEntryPointCallStubMethod extends EntryPointCallStubMethod { static CEntryPointCallStubMethod create(AnalysisMethod targetMethod, CEntryPointData entryPointData, AnalysisMetaAccess metaAccess) { - ResolvedJavaMethod unwrappedMethod = targetMethod.getWrapped(); MetaAccessProvider unwrappedMetaAccess = metaAccess.getWrapped(); ResolvedJavaType declaringClass = unwrappedMetaAccess.lookupJavaType(IsolateEnterStub.class); ConstantPool constantPool = IsolateEnterStub.getConstantPool(unwrappedMetaAccess); - return new CEntryPointCallStubMethod(entryPointData, unwrappedMethod, declaringClass, constantPool); + return new CEntryPointCallStubMethod(entryPointData, targetMethod, declaringClass, constantPool, metaAccess.getUniverse().getWordKind(), unwrappedMetaAccess); } private static final JavaKind cEnumParameterKind = JavaKind.Int; private final CEntryPointData entryPointData; private final ResolvedJavaMethod targetMethod; + private final Signature targetSignature; - private CEntryPointCallStubMethod(CEntryPointData entryPointData, ResolvedJavaMethod targetMethod, ResolvedJavaType holderClass, ConstantPool holderConstantPool) { - super(SubstrateUtil.uniqueStubName(targetMethod), holderClass, targetMethod.getSignature(), holderConstantPool); + private CEntryPointCallStubMethod(CEntryPointData entryPointData, AnalysisMethod targetMethod, ResolvedJavaType holderClass, ConstantPool holderConstantPool, JavaKind wordKind, + MetaAccessProvider metaAccess) { + super(SubstrateUtil.uniqueStubName(targetMethod.getWrapped()), holderClass, createSignature(targetMethod, wordKind, metaAccess), holderConstantPool); this.entryPointData = entryPointData; - this.targetMethod = targetMethod; + this.targetMethod = targetMethod.getWrapped(); + this.targetSignature = targetMethod.getSignature(); + } + + /** + * This method creates a new signature for the stub in which all @CEnum values are converted + * into their corresponding primitive type. In correspondence to how the @CEnum values are + * actually handled, parameters are transformed to the type specified by cEnumParameterKind and + * return type is transformed into the word type. + * + * @see CEnum + * @see CEntryPointCallStubMethod#adaptParameterTypes(HostedProviders, NativeLibraries, + * HostedGraphKit, JavaType[], JavaType[]) + * @see CEntryPointCallStubMethod#adaptReturnValue(ResolvedJavaMethod, HostedProviders, Purpose, + * HostedGraphKit, ValueNode) + */ + private static SimpleSignature createSignature(AnalysisMethod targetMethod, JavaKind wordKind, MetaAccessProvider metaAccess) { + JavaType[] paramTypes = Arrays.stream(targetMethod.toParameterTypes()) + .map(it -> ((AnalysisType) it)) + .map(type -> type.getAnnotation(CEnum.class) != null ? metaAccess.lookupJavaType(cEnumParameterKind.toJavaClass()) : type.getWrapped()) + .toArray(JavaType[]::new); + ResolvedJavaType returnType = ((AnalysisType) targetMethod.getSignature().getReturnType(null)).getWrapped(); + if (returnType.getAnnotation(CEnum.class) != null) { + returnType = metaAccess.lookupJavaType(wordKind.toJavaClass()); + } + return new SimpleSignature(paramTypes, returnType); } @Override @@ -145,7 +173,7 @@ public StructuredGraph buildGraph(DebugContext debug, ResolvedJavaMethod method, NativeLibraries nativeLibraries = CEntryPointCallStubSupport.singleton().getNativeLibraries(); HostedGraphKit kit = new HostedGraphKit(debug, providers, method, purpose); - JavaType[] parameterTypes = method.toParameterTypes(); + JavaType[] parameterTypes = targetSignature.toParameterTypes(null); JavaType[] parameterLoadTypes = Arrays.copyOf(parameterTypes, parameterTypes.length); EnumInfo[] parameterEnumInfos; @@ -336,8 +364,8 @@ private EnumInfo[] adaptParameterTypes(HostedProviders providers, NativeLibrarie assert !matchingNodes.hasNext() && parameterNode.usages().filter(n -> n != initialState).isEmpty(); parameterNode.setStamp(StampFactory.forKind(cEnumParameterKind)); } else { - throw UserError.abort("Entry point method parameter types are restricted to primitive types, word types and enumerations (@%s): %s", - CEnum.class.getSimpleName(), targetMethod); + throw UserError.abort("Entry point method parameter types are restricted to primitive types, word types and enumerations (@%s): %s, given type was %s", + CEnum.class.getSimpleName(), targetMethod, parameterTypes[i]); } } } @@ -550,7 +578,7 @@ private ValueNode adaptReturnValue(ResolvedJavaMethod method, HostedProviders pr if (returnValue.getStackKind().isPrimitive()) { return returnValue; } - JavaType returnType = method.getSignature().getReturnType(null); + JavaType returnType = targetSignature.getReturnType(null); NativeLibraries nativeLibraries = CEntryPointCallStubSupport.singleton().getNativeLibraries(); ElementInfo typeInfo = nativeLibraries.findElementInfo((ResolvedJavaType) returnType); if (typeInfo instanceof EnumInfo) { From c52269ddb2904281e9dd0fd3614c2fe25d916f6c Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Thu, 21 Sep 2023 18:09:56 +0000 Subject: [PATCH 10/17] [GR-47361] Properly handle negative instance sizes on HotSpot PullRequest: graal/15010 --- compiler/mx.compiler/suite.py | 1 + .../test/HumongousReferenceObjectTest.java | 386 ++++++++++++++++++ .../HotSpotAllocationSnippets.java | 16 +- .../replacements/AllocationSnippets.java | 5 +- .../snippets/SubstrateAllocationSnippets.java | 4 +- .../hotspot/HotSpotTruffleRuntime.java | 2 +- 6 files changed, 400 insertions(+), 14 deletions(-) create mode 100644 compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java diff --git a/compiler/mx.compiler/suite.py b/compiler/mx.compiler/suite.py index bfa72c513957..f747096154ba 100644 --- a/compiler/mx.compiler/suite.py +++ b/compiler/mx.compiler/suite.py @@ -256,6 +256,7 @@ "jdk.internal.module", "jdk.internal.misc", "jdk.internal.util", + "jdk.internal.vm.annotation", ], "java.instrument" : [ "sun.instrument", diff --git a/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java new file mode 100644 index 000000000000..49b4368c330c --- /dev/null +++ b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.graal.compiler.hotspot.test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import jdk.graal.compiler.core.test.SubprocessTest; +import org.junit.Test; + +import jdk.internal.vm.annotation.Contended; + +@SuppressWarnings("unused") +public class HumongousReferenceObjectTest extends SubprocessTest { + /* + * Due to 300 fields with 8K @Contended padding around each field, it takes 2.4M bytes per + * instance. With small G1 regions, it is bound to cross regions. G1 should properly (card) mark + * the object nevertheless. With 128M heap, it is enough to allocate ~100 of these objects to + * provoke at least one GC. + */ + + static volatile Object instance; + + public static void testSnippet() { + for (int c = 0; c < 100; c++) { + instance = new HumongousReferenceObjectTest(); + } + } + + public void runSubprocessTest(String... args) throws IOException, InterruptedException { + List newArgs = new ArrayList<>(); + Collections.addAll(newArgs, args); + // Filter out any explicitly selected GC + newArgs.remove("-XX:+UseZGC"); + newArgs.remove("-XX:+UseG1GC"); + newArgs.remove("-XX:+UseParallelGC"); + + launchSubprocess(() -> { + test("testSnippet"); + }, newArgs.toArray(new String[0])); + + // Test without assertions as well + newArgs.add("-da"); + launchSubprocess(() -> { + test("testSnippet"); + }, newArgs.toArray(new String[0])); + } + + @Test + public void testG1() throws IOException, InterruptedException { + String[] sizes = {"-XX:G1HeapRegionSize=1M", "-XX:G1HeapRegionSize=2M", "-XX:G1HeapRegionSize=4M", "-XX:G1HeapRegionSize=8M"}; + for (String size : sizes) { + runSubprocessTest("-XX:+UseG1GC", "-XX:+EnableContended", "-XX:-RestrictContended", "-Xmx128m", "-XX:ContendedPaddingWidth=8192", size); + } + } + + @Test + public void testParallel() throws IOException, InterruptedException { + runSubprocessTest("-XX:+UseParallelGC", "-XX:+EnableContended", "-XX:-RestrictContended", "-Xmx128m", "-XX:ContendedPaddingWidth=8192"); + } + + @Contended Integer int1 = 1; + @Contended Integer int2 = 2; + @Contended Integer int3 = 3; + @Contended Integer int4 = 4; + @Contended Integer int5 = 5; + @Contended Integer int6 = 6; + @Contended Integer int7 = 7; + @Contended Integer int8 = 8; + @Contended Integer int9 = 9; + @Contended Integer int10 = 10; + @Contended Integer int11 = 11; + @Contended Integer int12 = 12; + @Contended Integer int13 = 13; + @Contended Integer int14 = 14; + @Contended Integer int15 = 15; + @Contended Integer int16 = 16; + @Contended Integer int17 = 17; + @Contended Integer int18 = 18; + @Contended Integer int19 = 19; + @Contended Integer int20 = 20; + @Contended Integer int21 = 21; + @Contended Integer int22 = 22; + @Contended Integer int23 = 23; + @Contended Integer int24 = 24; + @Contended Integer int25 = 25; + @Contended Integer int26 = 26; + @Contended Integer int27 = 27; + @Contended Integer int28 = 28; + @Contended Integer int29 = 29; + @Contended Integer int30 = 30; + @Contended Integer int31 = 31; + @Contended Integer int32 = 32; + @Contended Integer int33 = 33; + @Contended Integer int34 = 34; + @Contended Integer int35 = 35; + @Contended Integer int36 = 36; + @Contended Integer int37 = 37; + @Contended Integer int38 = 38; + @Contended Integer int39 = 39; + @Contended Integer int40 = 40; + @Contended Integer int41 = 41; + @Contended Integer int42 = 42; + @Contended Integer int43 = 43; + @Contended Integer int44 = 44; + @Contended Integer int45 = 45; + @Contended Integer int46 = 46; + @Contended Integer int47 = 47; + @Contended Integer int48 = 48; + @Contended Integer int49 = 49; + @Contended Integer int50 = 50; + @Contended Integer int51 = 51; + @Contended Integer int52 = 52; + @Contended Integer int53 = 53; + @Contended Integer int54 = 54; + @Contended Integer int55 = 55; + @Contended Integer int56 = 56; + @Contended Integer int57 = 57; + @Contended Integer int58 = 58; + @Contended Integer int59 = 59; + @Contended Integer int60 = 60; + @Contended Integer int61 = 61; + @Contended Integer int62 = 62; + @Contended Integer int63 = 63; + @Contended Integer int64 = 64; + @Contended Integer int65 = 65; + @Contended Integer int66 = 66; + @Contended Integer int67 = 67; + @Contended Integer int68 = 68; + @Contended Integer int69 = 69; + @Contended Integer int70 = 70; + @Contended Integer int71 = 71; + @Contended Integer int72 = 72; + @Contended Integer int73 = 73; + @Contended Integer int74 = 74; + @Contended Integer int75 = 75; + @Contended Integer int76 = 76; + @Contended Integer int77 = 77; + @Contended Integer int78 = 78; + @Contended Integer int79 = 79; + @Contended Integer int80 = 80; + @Contended Integer int81 = 81; + @Contended Integer int82 = 82; + @Contended Integer int83 = 83; + @Contended Integer int84 = 84; + @Contended Integer int85 = 85; + @Contended Integer int86 = 86; + @Contended Integer int87 = 87; + @Contended Integer int88 = 88; + @Contended Integer int89 = 89; + @Contended Integer int90 = 90; + @Contended Integer int91 = 91; + @Contended Integer int92 = 92; + @Contended Integer int93 = 93; + @Contended Integer int94 = 94; + @Contended Integer int95 = 95; + @Contended Integer int96 = 96; + @Contended Integer int97 = 97; + @Contended Integer int98 = 98; + @Contended Integer int99 = 99; + @Contended Integer int100 = 100; + @Contended Integer int101 = 101; + @Contended Integer int102 = 102; + @Contended Integer int103 = 103; + @Contended Integer int104 = 104; + @Contended Integer int105 = 105; + @Contended Integer int106 = 106; + @Contended Integer int107 = 107; + @Contended Integer int108 = 108; + @Contended Integer int109 = 109; + @Contended Integer int110 = 110; + @Contended Integer int111 = 111; + @Contended Integer int112 = 112; + @Contended Integer int113 = 113; + @Contended Integer int114 = 114; + @Contended Integer int115 = 115; + @Contended Integer int116 = 116; + @Contended Integer int117 = 117; + @Contended Integer int118 = 118; + @Contended Integer int119 = 119; + @Contended Integer int120 = 120; + @Contended Integer int121 = 121; + @Contended Integer int122 = 122; + @Contended Integer int123 = 123; + @Contended Integer int124 = 124; + @Contended Integer int125 = 125; + @Contended Integer int126 = 126; + @Contended Integer int127 = 127; + @Contended Integer int128 = 128; + @Contended Integer int129 = 129; + @Contended Integer int130 = 130; + @Contended Integer int131 = 131; + @Contended Integer int132 = 132; + @Contended Integer int133 = 133; + @Contended Integer int134 = 134; + @Contended Integer int135 = 135; + @Contended Integer int136 = 136; + @Contended Integer int137 = 137; + @Contended Integer int138 = 138; + @Contended Integer int139 = 139; + @Contended Integer int140 = 140; + @Contended Integer int141 = 141; + @Contended Integer int142 = 142; + @Contended Integer int143 = 143; + @Contended Integer int144 = 144; + @Contended Integer int145 = 145; + @Contended Integer int146 = 146; + @Contended Integer int147 = 147; + @Contended Integer int148 = 148; + @Contended Integer int149 = 149; + @Contended Integer int150 = 150; + @Contended Integer int151 = 151; + @Contended Integer int152 = 152; + @Contended Integer int153 = 153; + @Contended Integer int154 = 154; + @Contended Integer int155 = 155; + @Contended Integer int156 = 156; + @Contended Integer int157 = 157; + @Contended Integer int158 = 158; + @Contended Integer int159 = 159; + @Contended Integer int160 = 160; + @Contended Integer int161 = 161; + @Contended Integer int162 = 162; + @Contended Integer int163 = 163; + @Contended Integer int164 = 164; + @Contended Integer int165 = 165; + @Contended Integer int166 = 166; + @Contended Integer int167 = 167; + @Contended Integer int168 = 168; + @Contended Integer int169 = 169; + @Contended Integer int170 = 170; + @Contended Integer int171 = 171; + @Contended Integer int172 = 172; + @Contended Integer int173 = 173; + @Contended Integer int174 = 174; + @Contended Integer int175 = 175; + @Contended Integer int176 = 176; + @Contended Integer int177 = 177; + @Contended Integer int178 = 178; + @Contended Integer int179 = 179; + @Contended Integer int180 = 180; + @Contended Integer int181 = 181; + @Contended Integer int182 = 182; + @Contended Integer int183 = 183; + @Contended Integer int184 = 184; + @Contended Integer int185 = 185; + @Contended Integer int186 = 186; + @Contended Integer int187 = 187; + @Contended Integer int188 = 188; + @Contended Integer int189 = 189; + @Contended Integer int190 = 190; + @Contended Integer int191 = 191; + @Contended Integer int192 = 192; + @Contended Integer int193 = 193; + @Contended Integer int194 = 194; + @Contended Integer int195 = 195; + @Contended Integer int196 = 196; + @Contended Integer int197 = 197; + @Contended Integer int198 = 198; + @Contended Integer int199 = 199; + @Contended Integer int200 = 200; + @Contended Integer int201 = 201; + @Contended Integer int202 = 202; + @Contended Integer int203 = 203; + @Contended Integer int204 = 204; + @Contended Integer int205 = 205; + @Contended Integer int206 = 206; + @Contended Integer int207 = 207; + @Contended Integer int208 = 208; + @Contended Integer int209 = 209; + @Contended Integer int210 = 210; + @Contended Integer int211 = 211; + @Contended Integer int212 = 212; + @Contended Integer int213 = 213; + @Contended Integer int214 = 214; + @Contended Integer int215 = 215; + @Contended Integer int216 = 216; + @Contended Integer int217 = 217; + @Contended Integer int218 = 218; + @Contended Integer int219 = 219; + @Contended Integer int220 = 220; + @Contended Integer int221 = 221; + @Contended Integer int222 = 222; + @Contended Integer int223 = 223; + @Contended Integer int224 = 224; + @Contended Integer int225 = 225; + @Contended Integer int226 = 226; + @Contended Integer int227 = 227; + @Contended Integer int228 = 228; + @Contended Integer int229 = 229; + @Contended Integer int230 = 230; + @Contended Integer int231 = 231; + @Contended Integer int232 = 232; + @Contended Integer int233 = 233; + @Contended Integer int234 = 234; + @Contended Integer int235 = 235; + @Contended Integer int236 = 236; + @Contended Integer int237 = 237; + @Contended Integer int238 = 238; + @Contended Integer int239 = 239; + @Contended Integer int240 = 240; + @Contended Integer int241 = 241; + @Contended Integer int242 = 242; + @Contended Integer int243 = 243; + @Contended Integer int244 = 244; + @Contended Integer int245 = 245; + @Contended Integer int246 = 246; + @Contended Integer int247 = 247; + @Contended Integer int248 = 248; + @Contended Integer int249 = 249; + @Contended Integer int250 = 250; + @Contended Integer int251 = 251; + @Contended Integer int252 = 252; + @Contended Integer int253 = 253; + @Contended Integer int254 = 254; + @Contended Integer int255 = 255; + @Contended Integer int256 = 256; + @Contended Integer int257 = 257; + @Contended Integer int258 = 258; + @Contended Integer int259 = 259; + @Contended Integer int260 = 260; + @Contended Integer int261 = 261; + @Contended Integer int262 = 262; + @Contended Integer int263 = 263; + @Contended Integer int264 = 264; + @Contended Integer int265 = 265; + @Contended Integer int266 = 266; + @Contended Integer int267 = 267; + @Contended Integer int268 = 268; + @Contended Integer int269 = 269; + @Contended Integer int270 = 270; + @Contended Integer int271 = 271; + @Contended Integer int272 = 272; + @Contended Integer int273 = 273; + @Contended Integer int274 = 274; + @Contended Integer int275 = 275; + @Contended Integer int276 = 276; + @Contended Integer int277 = 277; + @Contended Integer int278 = 278; + @Contended Integer int279 = 279; + @Contended Integer int280 = 280; + @Contended Integer int281 = 281; + @Contended Integer int282 = 282; + @Contended Integer int283 = 283; + @Contended Integer int284 = 284; + @Contended Integer int285 = 285; + @Contended Integer int286 = 286; + @Contended Integer int287 = 287; + @Contended Integer int288 = 288; + @Contended Integer int289 = 289; + @Contended Integer int290 = 290; + @Contended Integer int291 = 291; + @Contended Integer int292 = 292; + @Contended Integer int293 = 293; + @Contended Integer int294 = 294; + @Contended Integer int295 = 295; + @Contended Integer int296 = 296; + @Contended Integer int297 = 297; + @Contended Integer int298 = 298; + @Contended Integer int299 = 299; + @Contended Integer int300 = 300; +} diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java index f5c579b542be..d1c39cadb83d 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java @@ -146,10 +146,11 @@ public HotSpotAllocationSnippets(GraalHotSpotVMConfig config, HotSpotRegistersPr @Snippet protected Object allocateInstance(KlassPointer hub, @ConstantParameter long size, + @ConstantParameter boolean forceSlowPath, @ConstantParameter FillContent fillContents, @ConstantParameter boolean emitMemoryBarrier, @ConstantParameter HotSpotAllocationProfilingData profilingData) { - Object result = allocateInstanceImpl(hub.asWord(), WordFactory.unsigned(size), fillContents, emitMemoryBarrier, true, profilingData); + Object result = allocateInstanceImpl(hub.asWord(), WordFactory.unsigned(size), forceSlowPath, fillContents, emitMemoryBarrier, true, profilingData); return piCastToSnippetReplaceeStamp(result); } @@ -194,7 +195,7 @@ public Object allocateInstanceDynamic(@NonNullParameter Class type, * binding of parameters is not yet supported by the GraphBuilderPlugin system. */ UnsignedWord size = WordFactory.unsigned(layoutHelper); - return allocateInstanceImpl(nonNullHub.asWord(), size, fillContents, emitMemoryBarrier, false, profilingData); + return allocateInstanceImpl(nonNullHub.asWord(), size, false, fillContents, emitMemoryBarrier, false, profilingData); } } else { DeoptimizeNode.deopt(None, RuntimeConstraint); @@ -653,12 +654,14 @@ public void lower(NewInstanceNode node, LoweringTool tool) { HotSpotResolvedObjectType type = (HotSpotResolvedObjectType) node.instanceClass(); assert !type.isArray(); ConstantNode hub = ConstantNode.forConstant(KlassPointerStamp.klassNonNull(), type.klass(), tool.getMetaAccess(), graph); - long size = instanceSize(type); + long size = type.instanceSize(); OptionValues localOptions = graph.getOptions(); Arguments args = new Arguments(allocateInstance, graph.getGuardsStage(), tool.getLoweringStage()); args.add("hub", hub); - args.addConst("size", size); + // instanceSize returns a negative number for types which should be slow path allocated + args.addConst("size", Math.abs(size)); + args.addConst("forceSlowPath", size < 0); args.addConst("fillContents", FillContent.fromBoolean(node.fillContents())); args.addConst("emitMemoryBarrier", node.emitMemoryBarrier()); args.addConst("profilingData", getProfilingData(localOptions, "instance", type)); @@ -790,11 +793,6 @@ private static HotSpotResolvedObjectType lookupArrayClass(LoweringTool tool, Jav return HotSpotAllocationSnippets.lookupArrayClass(tool.getMetaAccess(), kind); } - private static long instanceSize(HotSpotResolvedObjectType type) { - long size = type.instanceSize(); - assert size >= 0; - return size; - } } private static class HotSpotAllocationProfilingData extends AllocationProfilingData { diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java index e55351118461..12154f7ebb33 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java @@ -48,6 +48,7 @@ public abstract class AllocationSnippets implements Snippets { protected Object allocateInstanceImpl(Word hub, UnsignedWord size, + boolean forceSlowPath, FillContent fillContents, boolean emitMemoryBarrier, boolean constantSize, @@ -57,7 +58,7 @@ protected Object allocateInstanceImpl(Word hub, Word top = readTlabTop(tlabInfo); Word end = readTlabEnd(tlabInfo); Word newTop = top.add(size); - if (useTLAB() && probability(FAST_PATH_PROBABILITY, shouldAllocateInTLAB(size, false)) && probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { + if (!forceSlowPath && useTLAB() && probability(FAST_PATH_PROBABILITY, shouldAllocateInTLAB(size, false)) && probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { writeTlabTop(tlabInfo, newTop); emitPrefetchAllocate(newTop, false); result = formatObject(hub, size, top, fillContents, emitMemoryBarrier, constantSize, profilingData.snippetCounters); @@ -122,7 +123,7 @@ protected UnsignedWord arrayAllocationSize(int length, int arrayBaseOffset, int * We do an unsigned multiplication so that a negative array length will result in an array size * greater than Integer.MAX_VALUE. */ - public static long arrayAllocationSize(int length, int arrayBaseOffset, int log2ElementSize, int alignment) { + public static long arrayAllocationSize(long length, int arrayBaseOffset, int log2ElementSize, int alignment) { long size = ((length & 0xFFFFFFFFL) << log2ElementSize) + arrayBaseOffset + (alignment - 1); long mask = ~(alignment - 1); return size & mask; diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java index 541938993712..02a08a275d22 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java @@ -130,7 +130,7 @@ protected Object allocateInstance(@NonNullParameter DynamicHub hub, @ConstantParameter FillContent fillContents, @ConstantParameter boolean emitMemoryBarrier, @ConstantParameter AllocationProfilingData profilingData) { - Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), WordFactory.unsigned(size), fillContents, emitMemoryBarrier, true, profilingData); + Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), WordFactory.unsigned(size), false, fillContents, emitMemoryBarrier, true, profilingData); return piCastToSnippetReplaceeStamp(result); } @@ -229,7 +229,7 @@ protected Object allocateInstanceDynamicImpl(DynamicHub hub, FillContent fillCon @SuppressWarnings("unused") boolean supportsOptimizedFilling, AllocationProfilingData profilingData) { // The hub was already verified by a ValidateNewInstanceClassNode. UnsignedWord size = LayoutEncoding.getPureInstanceAllocationSize(hub.getLayoutEncoding()); - Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), size, fillContents, emitMemoryBarrier, false, profilingData); + Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), size, false, fillContents, emitMemoryBarrier, false, profilingData); return piCastToSnippetReplaceeStamp(result); } diff --git a/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java b/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java index ff7132f32eac..3fffe943679f 100644 --- a/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java +++ b/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java @@ -596,7 +596,7 @@ protected int getBaseInstanceSize(Class type) { HotSpotMetaAccessProvider meta = (HotSpotMetaAccessProvider) getMetaAccess(); HotSpotResolvedObjectType resolvedType = (HotSpotResolvedObjectType) meta.lookupJavaType(type); - return resolvedType.instanceSize(); + return Math.abs(resolvedType.instanceSize()); } private static boolean fieldIsNotEligible(Class clazz, ResolvedJavaField f) { From e676225d71c09a4f75f355ce3c7cd2657fd0c704 Mon Sep 17 00:00:00 2001 From: Christian Haeubl Date: Wed, 25 Oct 2023 14:12:53 +0200 Subject: [PATCH 11/17] Remove static field and other minor changes. --- .../genscavenge/AbstractMemoryPoolMXBean.java | 14 ++-- .../oracle/svm/core/genscavenge/GCImpl.java | 7 +- .../GenScavengeMemoryPoolMXBeans.java | 41 ++++------- .../graal/GenScavengeGCFeature.java | 14 ++-- .../substitutions/GraalSubstitutions.java | 72 +++++++++---------- 5 files changed, 71 insertions(+), 77 deletions(-) diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java index 1f2eb8ede7b3..d0724fd7b597 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java @@ -45,14 +45,13 @@ public abstract class AbstractMemoryPoolMXBean extends AbstractMXBean implements MemoryPoolMXBean { - protected static final UnsignedWord UNDEFINED = WordFactory.signed(UNDEFINED_MEMORY_USAGE); - private static final UnsignedWord UNINITIALIZED = WordFactory.zero(); + protected static final UnsignedWord UNDEFINED = WordFactory.unsigned(UNDEFINED_MEMORY_USAGE); private final String name; private final String[] managerNames; protected final UninterruptibleUtils.AtomicUnsigned peakUsage = new UninterruptibleUtils.AtomicUnsigned(); - protected UnsignedWord initialValue = UNINITIALIZED; + private UnsignedWord initialValue = UNDEFINED; @Platforms(Platform.HOSTED_ONLY.class) protected AbstractMemoryPoolMXBean(String name, String... managerNames) { @@ -61,7 +60,7 @@ protected AbstractMemoryPoolMXBean(String name, String... managerNames) { } UnsignedWord getInitialValue() { - if (initialValue.equal(UNINITIALIZED)) { + if (initialValue.equal(UNDEFINED)) { initialValue = computeInitialValue(); } return initialValue; @@ -69,8 +68,6 @@ UnsignedWord getInitialValue() { abstract UnsignedWord computeInitialValue(); - abstract UnsignedWord getMaximumValue(); - abstract void beforeCollection(); abstract void afterCollection(); @@ -169,4 +166,9 @@ void updatePeakUsage(UnsignedWord value) { current = peakUsage.get(); } while (value.aboveThan(current) && !peakUsage.compareAndSet(current, value)); } + + protected UnsignedWord getMaximumValue() { + /* Actual usage may temporarily exceed the maximum, so we need to return UNDEFINED. */ + return UNDEFINED; + } } diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java index 3ff352baf538..2ccf9cdea9d0 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java @@ -29,7 +29,6 @@ import java.lang.ref.Reference; -import jdk.graal.compiler.api.replacements.Fold; import org.graalvm.nativeimage.CurrentIsolate; import org.graalvm.nativeimage.IsolateThread; import org.graalvm.nativeimage.Platform; @@ -97,6 +96,8 @@ import com.oracle.svm.core.util.TimeUtils; import com.oracle.svm.core.util.VMError; +import jdk.graal.compiler.api.replacements.Fold; + /** * Garbage collector (incremental or complete) for {@link HeapImpl}. */ @@ -211,14 +212,14 @@ assert getCollectionEpoch().equal(data.getRequestingEpoch()) || printGCBefore(cause); ThreadLocalAllocation.disableAndFlushForAllThreads(); - GenScavengeMemoryPoolMXBeans.notifyBeforeCollection(); + GenScavengeMemoryPoolMXBeans.singleton().notifyBeforeCollection(); HeapImpl.getAccounting().notifyBeforeCollection(); boolean outOfMemory = collectImpl(cause, data.getRequestingNanoTime(), data.getForceFullGC()); data.setOutOfMemory(outOfMemory); HeapImpl.getAccounting().notifyAfterCollection(); - GenScavengeMemoryPoolMXBeans.notifyAfterCollection(); + GenScavengeMemoryPoolMXBeans.singleton().notifyAfterCollection(); printGCAfter(cause); JfrGCHeapSummaryEvent.emit(JfrGCWhen.AFTER_GC); diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java index 80d6b861370e..0eb785bd61db 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java @@ -25,9 +25,9 @@ */ package com.oracle.svm.core.genscavenge; -import java.lang.management.MemoryPoolMXBean; import java.lang.management.MemoryUsage; +import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.Platform; import org.graalvm.nativeimage.Platforms; import org.graalvm.word.UnsignedWord; @@ -36,8 +36,9 @@ import com.oracle.svm.core.SubstrateOptions; import com.oracle.svm.core.util.VMError; -public class GenScavengeMemoryPoolMXBeans { +import jdk.graal.compiler.api.replacements.Fold; +public class GenScavengeMemoryPoolMXBeans { static final String YOUNG_GEN_SCAVENGER = "young generation scavenger"; static final String COMPLETE_SCAVENGER = "complete scavenger"; static final String EPSILON_SCAVENGER = "epsilon scavenger"; @@ -47,10 +48,10 @@ public class GenScavengeMemoryPoolMXBeans { static final String OLD_GEN_SPACE = "old generation space"; static final String EPSILON_HEAP = "epsilon heap"; - private static AbstractMemoryPoolMXBean[] mxBeans; + private final AbstractMemoryPoolMXBean[] mxBeans; @Platforms(Platform.HOSTED_ONLY.class) - public static MemoryPoolMXBean[] createMemoryPoolMXBeans() { + public GenScavengeMemoryPoolMXBeans() { if (SubstrateOptions.UseSerialGC.getValue()) { mxBeans = new AbstractMemoryPoolMXBean[]{ new EdenMemoryPoolMXBean(YOUNG_GEN_SCAVENGER, COMPLETE_SCAVENGER), @@ -63,16 +64,24 @@ public static MemoryPoolMXBean[] createMemoryPoolMXBeans() { new EpsilonMemoryPoolMXBean(EPSILON_SCAVENGER) }; } + } + + @Fold + public static GenScavengeMemoryPoolMXBeans singleton() { + return ImageSingletons.lookup(GenScavengeMemoryPoolMXBeans.class); + } + + public AbstractMemoryPoolMXBean[] getMXBeans() { return mxBeans; } - public static void notifyBeforeCollection() { + public void notifyBeforeCollection() { for (AbstractMemoryPoolMXBean mxBean : mxBeans) { mxBean.beforeCollection(); } } - public static void notifyAfterCollection() { + public void notifyAfterCollection() { for (AbstractMemoryPoolMXBean mxBean : mxBeans) { mxBean.afterCollection(); } @@ -100,11 +109,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getInitialEdenSize(); } - @Override - UnsignedWord getMaximumValue() { - return UNDEFINED; - } - @Override public MemoryUsage getUsage() { return memoryUsage(getCurrentUsage()); @@ -148,11 +152,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getInitialSurvivorSize(); } - @Override - UnsignedWord getMaximumValue() { - return UNDEFINED; - } - @Override public MemoryUsage getUsage() { return getCollectionUsage(); @@ -191,11 +190,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getInitialOldSize(); } - @Override - UnsignedWord getMaximumValue() { - return UNDEFINED; - } - @Override public MemoryUsage getUsage() { return getCollectionUsage(); @@ -234,11 +228,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getMinimumHeapSize(); } - @Override - UnsignedWord getMaximumValue() { - return UNDEFINED; - } - @Override public MemoryUsage getUsage() { HeapAccounting accounting = HeapImpl.getAccounting(); diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java index 6d3abab9a557..6727381fad42 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java @@ -24,15 +24,11 @@ */ package com.oracle.svm.core.genscavenge.graal; -import java.lang.management.MemoryPoolMXBean; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import jdk.graal.compiler.graph.Node; -import jdk.graal.compiler.options.OptionValues; -import jdk.graal.compiler.phases.util.Providers; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.hosted.Feature; @@ -70,6 +66,10 @@ import com.oracle.svm.core.jvmstat.PerfManager; import com.sun.management.GarbageCollectorMXBean; +import jdk.graal.compiler.graph.Node; +import jdk.graal.compiler.options.OptionValues; +import jdk.graal.compiler.phases.util.Providers; + @AutomaticallyRegisteredFeature class GenScavengeGCFeature implements InternalFeature { @Override @@ -95,7 +95,9 @@ public void duringSetup(DuringSetupAccess access) { ImageSingletons.add(Heap.class, heap); ImageSingletons.add(GCAllocationSupport.class, new GenScavengeAllocationSupport()); - List memoryPools = Arrays.asList(GenScavengeMemoryPoolMXBeans.createMemoryPoolMXBeans()); + GenScavengeMemoryPoolMXBeans memoryPoolMXBeans = new GenScavengeMemoryPoolMXBeans(); + ImageSingletons.add(GenScavengeMemoryPoolMXBeans.class, memoryPoolMXBeans); + List garbageCollectors; if (SubstrateOptions.UseEpsilonGC.getValue()) { garbageCollectors = Arrays.asList(new EpsilonGarbageCollectorMXBean()); @@ -105,7 +107,7 @@ public void duringSetup(DuringSetupAccess access) { ManagementSupport managementSupport = ManagementSupport.getSingleton(); managementSupport.addPlatformManagedObjectSingleton(java.lang.management.MemoryMXBean.class, new HeapImplMemoryMXBean()); - managementSupport.addPlatformManagedObjectList(java.lang.management.MemoryPoolMXBean.class, memoryPools); + managementSupport.addPlatformManagedObjectList(java.lang.management.MemoryPoolMXBean.class, Arrays.asList(memoryPoolMXBeans.getMXBeans())); managementSupport.addPlatformManagedObjectList(com.sun.management.GarbageCollectorMXBean.class, garbageCollectors); /* Not supported yet. */ managementSupport.addPlatformManagedObjectList(java.lang.management.BufferPoolMXBean.class, Collections.emptyList()); diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java index 338206c69f9b..29342b33f5a5 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java @@ -34,44 +34,9 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import com.oracle.svm.graal.GraalSupport; -import jdk.graal.compiler.core.match.MatchRuleRegistry; -import jdk.graal.compiler.debug.KeyRegistry; -import jdk.graal.compiler.debug.TTY; -import jdk.graal.compiler.nodes.NamedLocationIdentity; -import jdk.graal.compiler.nodes.graphbuilderconf.InvocationPlugins; -import jdk.graal.compiler.phases.common.inlining.info.elem.InlineableGraph; -import jdk.graal.compiler.phases.common.inlining.walker.ComputeInliningRelevance; -import jdk.graal.compiler.replacements.nodes.BinaryMathIntrinsicNode; -import jdk.graal.compiler.replacements.nodes.UnaryMathIntrinsicNode; import org.graalvm.collections.EconomicMap; import org.graalvm.collections.EconomicSet; import org.graalvm.collections.Equivalence; -import jdk.graal.compiler.core.common.CompilationIdentifier; -import jdk.graal.compiler.core.common.SuppressFBWarnings; -import jdk.graal.compiler.core.gen.NodeLIRBuilder; -import jdk.graal.compiler.core.match.MatchStatement; -import jdk.graal.compiler.debug.DebugContext; -import jdk.graal.compiler.debug.DebugHandlersFactory; -import jdk.graal.compiler.debug.MetricKey; -import jdk.graal.compiler.debug.TimeSource; -import jdk.graal.compiler.graph.Node; -import jdk.graal.compiler.graph.NodeClass; -import jdk.graal.compiler.lir.CompositeValue; -import jdk.graal.compiler.lir.CompositeValueClass; -import jdk.graal.compiler.lir.LIRInstruction; -import jdk.graal.compiler.lir.LIRInstructionClass; -import jdk.graal.compiler.lir.gen.ArithmeticLIRGeneratorTool; -import jdk.graal.compiler.lir.phases.LIRPhase; -import jdk.graal.compiler.nodes.Invoke; -import jdk.graal.compiler.nodes.StructuredGraph; -import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool; -import jdk.graal.compiler.options.OptionValues; -import jdk.graal.compiler.phases.BasePhase; -import jdk.graal.compiler.phases.common.CanonicalizerPhase; -import jdk.graal.compiler.phases.tiers.HighTierContext; -import jdk.graal.compiler.printer.NoDeadCodeVerifyHandler; -import jdk.graal.compiler.serviceprovider.GlobalAtomicLong; import org.graalvm.nativeimage.CurrentIsolate; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.hosted.FieldValueTransformer; @@ -94,11 +59,46 @@ import com.oracle.svm.core.log.Log; import com.oracle.svm.core.option.HostedOptionValues; import com.oracle.svm.core.util.VMError; +import com.oracle.svm.graal.GraalSupport; import com.oracle.svm.graal.hosted.FieldsOffsetsFeature; import com.oracle.svm.graal.hosted.RuntimeCompilationFeature; import com.oracle.svm.graal.meta.SubstrateMethod; import com.oracle.svm.util.ReflectionUtil; +import jdk.graal.compiler.core.common.CompilationIdentifier; +import jdk.graal.compiler.core.common.SuppressFBWarnings; +import jdk.graal.compiler.core.gen.NodeLIRBuilder; +import jdk.graal.compiler.core.match.MatchRuleRegistry; +import jdk.graal.compiler.core.match.MatchStatement; +import jdk.graal.compiler.debug.DebugContext; +import jdk.graal.compiler.debug.DebugHandlersFactory; +import jdk.graal.compiler.debug.KeyRegistry; +import jdk.graal.compiler.debug.MetricKey; +import jdk.graal.compiler.debug.TTY; +import jdk.graal.compiler.debug.TimeSource; +import jdk.graal.compiler.graph.Node; +import jdk.graal.compiler.graph.NodeClass; +import jdk.graal.compiler.lir.CompositeValue; +import jdk.graal.compiler.lir.CompositeValueClass; +import jdk.graal.compiler.lir.LIRInstruction; +import jdk.graal.compiler.lir.LIRInstructionClass; +import jdk.graal.compiler.lir.gen.ArithmeticLIRGeneratorTool; +import jdk.graal.compiler.lir.phases.LIRPhase; +import jdk.graal.compiler.nodes.Invoke; +import jdk.graal.compiler.nodes.NamedLocationIdentity; +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.graphbuilderconf.InvocationPlugins; +import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool; +import jdk.graal.compiler.options.OptionValues; +import jdk.graal.compiler.phases.BasePhase; +import jdk.graal.compiler.phases.common.CanonicalizerPhase; +import jdk.graal.compiler.phases.common.inlining.info.elem.InlineableGraph; +import jdk.graal.compiler.phases.common.inlining.walker.ComputeInliningRelevance; +import jdk.graal.compiler.phases.tiers.HighTierContext; +import jdk.graal.compiler.printer.NoDeadCodeVerifyHandler; +import jdk.graal.compiler.replacements.nodes.BinaryMathIntrinsicNode; +import jdk.graal.compiler.replacements.nodes.UnaryMathIntrinsicNode; +import jdk.graal.compiler.serviceprovider.GlobalAtomicLong; import jdk.vm.ci.code.TargetDescription; import jdk.vm.ci.meta.ResolvedJavaMethod; @@ -235,7 +235,7 @@ class GlobalAtomicLongAddressProvider implements FieldValueTransformer { @Override public Object transform(Object receiver, Object originalValue) { long initialValue = ((GlobalAtomicLong) receiver).getInitialValue(); - return CGlobalDataFactory.createWord((Pointer) WordFactory.unsigned(initialValue), null, true); + return CGlobalDataFactory.createWord(WordFactory.unsigned(initialValue), null, true); } } From 685a18301602a004e7dd3570bcb62508e8a0666e Mon Sep 17 00:00:00 2001 From: Andreas Woess Date: Mon, 30 Oct 2023 15:55:38 +0100 Subject: [PATCH 12/17] Ensure long and double values are 8-byte aligned in the DynamicObject extension array. --- .../oracle/truffle/object/CoreAllocator.java | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java b/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java index ebefda284276..d20d1c406beb 100644 --- a/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java +++ b/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -60,6 +60,8 @@ import com.oracle.truffle.object.CoreLocations.TypedLocation; import com.oracle.truffle.object.CoreLocations.ValueLocation; +import sun.misc.Unsafe; + @SuppressWarnings("deprecation") class CoreAllocator extends ShapeImpl.BaseAllocator { @@ -130,7 +132,8 @@ protected Location newIntLocation(boolean useFinal) { if (com.oracle.truffle.object.ObjectStorageOptions.InObjectFields && primitiveFieldSize + getLayout().getLongFieldSize() <= getLayout().getPrimitiveFieldCount()) { return advance(new IntLocationDecorator(getLayout().getPrimitiveFieldLocation(primitiveFieldSize))); } else if (getLayout().hasPrimitiveExtensionArray()) { - return advance(new IntLocationDecorator(new LongArrayLocation(primitiveArraySize))); + int alignedIndex = alignArrayIndex(primitiveArraySize, Long.BYTES); + return advance(new IntLocationDecorator(new LongArrayLocation(alignedIndex))); } } return newObjectLocation(useFinal, true); @@ -146,7 +149,8 @@ Location newDoubleLocation(boolean useFinal, boolean allowedIntToDouble) { if (com.oracle.truffle.object.ObjectStorageOptions.InObjectFields && primitiveFieldSize + getLayout().getLongFieldSize() <= getLayout().getPrimitiveFieldCount()) { return advance(new DoubleLocationDecorator(getLayout().getPrimitiveFieldLocation(primitiveFieldSize), allowedIntToDouble)); } else if (getLayout().hasPrimitiveExtensionArray()) { - return advance(new DoubleLocationDecorator(new LongArrayLocation(primitiveArraySize), allowedIntToDouble)); + int alignedIndex = alignArrayIndex(primitiveArraySize, Long.BYTES); + return advance(new DoubleLocationDecorator(new LongArrayLocation(alignedIndex), allowedIntToDouble)); } } return newObjectLocation(useFinal, true); @@ -162,7 +166,8 @@ Location newLongLocation(boolean useFinal, boolean allowedIntToLong) { if (com.oracle.truffle.object.ObjectStorageOptions.InObjectFields && primitiveFieldSize + getLayout().getLongFieldSize() <= getLayout().getPrimitiveFieldCount()) { return advance((Location) CoreLocations.createLongLocation(getLayout().getPrimitiveFieldLocation(primitiveFieldSize), allowedIntToLong)); } else if (getLayout().hasPrimitiveExtensionArray()) { - return advance(new LongArrayLocation(primitiveArraySize, allowedIntToLong)); + int alignedIndex = alignArrayIndex(primitiveArraySize, Long.BYTES); + return advance(new LongArrayLocation(alignedIndex, allowedIntToLong)); } } return newObjectLocation(useFinal, true); @@ -243,4 +248,25 @@ protected Location locationForValueUpcast(Object value, Location oldLocation, lo } return locationForValue(value, false, value != null); } + + /** + * Adjust index to ensure alignment for slots larger than the array element size, e.g. long and + * double slots in an int[] array. Note that array element 0 is not necessarily 8-byte aligned. + */ + private static int alignArrayIndex(int index, int bytes) { + assert bytes > 0 && (bytes & (bytes - 1)) == 0; + final int baseOffset = Unsafe.ARRAY_INT_BASE_OFFSET; + final int indexScale = Unsafe.ARRAY_INT_INDEX_SCALE; + if (bytes <= indexScale) { + // Always aligned. + return index; + } else { + int misalignment = (baseOffset + indexScale * index) & (bytes - 1); + if (misalignment == 0) { + return index; + } else { + return index + ((bytes - misalignment) / indexScale); + } + } + } } From 8f948c22da7838b20c2e832e1f20d78a12d572d8 Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Mon, 30 Oct 2023 15:42:33 +0100 Subject: [PATCH 13/17] TRegex: cleanup in casefolding script --- .../tools/casefolding/src/main.rs | 127 +++++++++--------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs index 5823893d722f..ad5bf1b0da98 100644 --- a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs @@ -70,6 +70,9 @@ error_chain! { } } +/// refers to the index of a codepoint or string in a global index +type IElement = usize; + const FILE_FORMAT_VERSION: u16 = 0; const OUTPUT_FOLDER: &str = "./out"; const PATH_GRAAL_REPO: &str = "../../../../../"; @@ -98,11 +101,11 @@ enum EqMapping { Set(usize), AlternatingAL, AlternatingUL, - Single(usize), + Single(IElement), } impl EqMapping { - fn from_single_mapping(src: usize, dst: usize) -> EqMapping { + fn from_single_mapping(src: IElement, dst: IElement) -> EqMapping { let offset = (dst as i32) - (src as i32); if offset == 1 { if src & 1 == 0 { EqMapping::AlternatingAL } else { EqMapping::AlternatingUL } @@ -119,8 +122,8 @@ enum OrderMapping { } trait RangeMapping { - fn lo(&self) -> usize; - fn hi(&self) -> usize; + fn lo(&self) -> IElement; + fn hi(&self) -> IElement; fn mapping(&self) -> &T; } @@ -132,11 +135,11 @@ struct OrderTableEntry { } impl RangeMapping for OrderTableEntry { - fn lo(&self) -> usize { + fn lo(&self) -> IElement { self.lo } - fn hi(&self) -> usize { + fn hi(&self) -> IElement { self.hi } @@ -147,8 +150,8 @@ impl RangeMapping for OrderTableEntry { #[derive(Debug, Clone, Eq, PartialEq)] struct EqTableEntry { - lo: usize, - hi: usize, + lo: IElement, + hi: IElement, mapping: EqMapping, } @@ -161,7 +164,7 @@ impl EqTableEntry { } } - fn with_hi(&self, hi: usize) -> EqTableEntry { + fn with_hi(&self, hi: IElement) -> EqTableEntry { EqTableEntry { lo: self.lo, hi, @@ -169,7 +172,7 @@ impl EqTableEntry { } } - fn with_lo(&self, lo: usize) -> EqTableEntry { + fn with_lo(&self, lo: IElement) -> EqTableEntry { EqTableEntry { lo, hi: self.hi, @@ -187,11 +190,11 @@ impl EqTableEntry { } impl RangeMapping for EqTableEntry { - fn lo(&self) -> usize { + fn lo(&self) -> IElement { self.lo } - fn hi(&self) -> usize { + fn hi(&self) -> IElement { self.hi } @@ -203,7 +206,7 @@ impl RangeMapping for EqTableEntry { trait RangeMappingTable> { fn table(&self) -> &Vec; - fn binary_search(&self, key: usize) -> Option<&M> { + fn binary_search(&self, key: IElement) -> Option<&M> { let table = self.table(); let mut lo: i32 = 0; let mut hi: i32 = (table.len() as i32) - 1; @@ -238,7 +241,7 @@ impl RangeMappingTable for OrderTable { struct EqTable { table: Vec, - sets: Vec>, + sets: Vec>, } impl RangeMappingTable for EqTable { @@ -335,8 +338,8 @@ impl EqTable { /// fn create Ordering>(collator: F, full_map: &Vec) -> EqTable { let mut eq_map_0: Vec = Vec::with_capacity(full_map.len()); - let mut eq_sets: Vec> = Vec::new(); - let mut buf: Vec = Vec::new(); + let mut eq_sets: Vec> = Vec::new(); + let mut buf: Vec = Vec::new(); // first pass: find equivalent elements and create mappings for i in 1..full_map.len() { if collator(&full_map[i - 1].element.string, &full_map[i].element.string) == Ordering::Equal { @@ -360,13 +363,13 @@ impl EqTable { EqTable { table: EqTable::eq_map_merge_adjacent(&mut eq_map_0), sets: eq_sets } } - fn from_vec<'a>(mut equivalences: Vec>) -> EqTable { + fn from_vec<'a>(mut equivalences: Vec>) -> EqTable { for vec in equivalences.iter_mut() { vec.sort(); } equivalences.sort(); let mut eq_map_0: Vec = Vec::with_capacity(equivalences.len()); - let mut eq_sets: Vec> = Vec::new(); + let mut eq_sets: Vec> = Vec::new(); // first pass: find equivalent elements and create mappings for buf in equivalences { if buf.len() > 1 { @@ -379,7 +382,7 @@ impl EqTable { EqTable { table: EqTable::eq_map_merge_adjacent(&mut eq_map_0), sets: eq_sets } } - fn eq_map_push_first_pass(eq_map_0: &mut Vec, eq_tables: &mut Vec>, buf: &Vec) { + fn eq_map_push_first_pass(eq_map_0: &mut Vec, eq_tables: &mut Vec>, buf: &Vec) { if buf.len() == 2 { let offset = (buf[0] as i32) - (buf[1] as i32); if offset.abs() == 1 { @@ -433,8 +436,8 @@ impl EqTable { eq_map } - fn create_one_way_mapping(mappings: Vec<(usize, usize)>) -> EqTable { - fn can_use_single_mapping(last: &EqTableEntry, dst: usize) -> bool { + fn create_one_way_mapping(mappings: Vec<(IElement, IElement)>) -> EqTable { + fn can_use_single_mapping(last: &EqTableEntry, dst: IElement) -> bool { match last.mapping { EqMapping::IntegerOffset(offset) => { last.lo == last.hi && last.lo as i32 + offset == dst as i32 @@ -492,7 +495,7 @@ impl EqTable { (a, b) => { a.eq(b) } } } - fn mapping_clone(child: &EqTable, cur_child: &EqTableEntry, eq_table_diff: &mut Vec, sets_diff: &mut Vec>, sets_map: &mut Vec>) { + fn mapping_clone(child: &EqTable, cur_child: &EqTableEntry, eq_table_diff: &mut Vec, sets_diff: &mut Vec>, sets_map: &mut Vec>) { if eq_table_diff.last().map(|last| last.hi == cur_child.hi).unwrap_or(false) { return; } @@ -516,7 +519,7 @@ impl EqTable { } let mut eq_table_diff: Vec = Vec::with_capacity(child.table.len()); - let mut lut_diff: Vec> = Vec::with_capacity(child.sets.len()); + let mut lut_diff: Vec> = Vec::with_capacity(child.sets.len()); let mut lut_map: Vec> = vec![None; child.sets.len()]; let mut i_parent = parent.table.iter(); let mut i_child = child.table.iter(); @@ -579,19 +582,19 @@ impl EqTable { let diff = EqTable { table: eq_table_diff, sets: lut_diff }; for e in &child.table { for i in e.lo..e.hi { - let vec1: Vec = child.lookup(i).unwrap(); - let vec2: Vec = diff.lookup(i).unwrap_or_else(|| { parent.lookup(i).unwrap() }); - assert_eq!(HashSet::::from_iter(vec1), HashSet::::from_iter(vec2), ""); + let vec1: Vec = child.lookup(i).unwrap(); + let vec2: Vec = diff.lookup(i).unwrap_or_else(|| { parent.lookup(i).unwrap() }); + assert_eq!(HashSet::::from_iter(vec1), HashSet::::from_iter(vec2), ""); } } diff } - fn lookup(&self, key: usize) -> Option> { + fn lookup(&self, key: IElement) -> Option> { self.binary_search(key).map(|e| { return match &e.mapping { EqMapping::IntegerOffset(o) => { - vec![key, (o + (key as i32)) as usize] + vec![key, (o + (key as i32)) as IElement] } EqMapping::Set(i) => { self.sets[*i].clone() @@ -660,8 +663,8 @@ impl EqTable { } } -fn list_to_ranges(set: &Vec) -> Vec { - let mut ranges: Vec = vec![]; +fn list_to_ranges(set: &Vec) -> Vec { + let mut ranges: Vec = vec![]; if set.len() > 0 { ranges.push(set[0]); let mut last = set[0]; @@ -678,7 +681,7 @@ fn list_to_ranges(set: &Vec) -> Vec { return ranges; } -fn list_to_ranges_str(set: &Vec) -> String { +fn list_to_ranges_str(set: &Vec) -> String { list_to_ranges(set).iter().map(|v| format!("{:#08x}", v)).collect::>().join(", ") } @@ -922,7 +925,7 @@ fn main() -> Result<()> { } fn generate_case_fold_data() -> Result<()> { - let mut multi_character_strings: HashMap = HashMap::new(); + let mut multi_character_strings: HashMap = HashMap::new(); let unicode_version = "15.0.0"; let unicode_version_oracle_db = "12.1.0"; @@ -937,7 +940,7 @@ fn generate_case_fold_data() -> Result<()> { let eq_ruby = unicode_case_folding_one_way(&unicode_case_folding_txt, &mut multi_character_strings, Full)?; let eq_oracle = unicode_case_folding_one_way(&unicode_case_folding_txt_oracle, &mut multi_character_strings, Full)?; let eq_oracle_ai = oracledb_extract_ai_case_fold_table(&mut multi_character_strings)?; - let foldable_chars: Vec = parse_case_folding_txt(&unicode_case_folding_txt, Simple)?.iter().map(|(src, _)| src.chars().next().unwrap() as usize).collect(); + let foldable_chars: Vec = parse_case_folding_txt(&unicode_case_folding_txt, Simple)?.iter().map(|(src, _)| src.chars().next().unwrap() as IElement).collect(); let mut out = vec![]; writeln!(out)?; @@ -1042,7 +1045,7 @@ fn parse_case_folding_txt(unicode_case_folding: &String, variant: UnicodeCaseFol }))) } -fn unicode_case_folding(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { +fn unicode_case_folding(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); for (src, dst) in parse_case_folding_txt(unicode_case_folding, variant)? { eq_builder.add_equivalence(src.as_str(), dst.as_str()); @@ -1050,16 +1053,16 @@ fn unicode_case_folding(unicode_case_folding: &String, multi_character_strings: Ok(eq_builder.create_eq_table()) } -fn unicode_case_folding_one_way(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { +fn unicode_case_folding_one_way(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); - let mut mappings: Vec<(usize, usize)> = vec![]; + let mut mappings: Vec<(IElement, IElement)> = vec![]; for (src, dst) in parse_case_folding_txt(unicode_case_folding, variant)? { mappings.push((eq_builder.index(src.as_str()), eq_builder.index(dst.as_str()))); } Ok(EqTable::create_one_way_mapping(mappings)) } -fn js_non_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { +fn js_non_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { let mut upper_map: HashMap = HashMap::new(); for result in unicode_table(unicode_data)?.records() { let record = result?; @@ -1098,8 +1101,8 @@ fn js_non_unicode_case_folding(unicode_data: &String, unicode_special_casing: &S Ok(eq_builder.create_eq_table()) } -fn python_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { - fn read_data_file_mapping(unicode_data_file: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { +fn python_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { + fn read_data_file_mapping(unicode_data_file: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); for result in unicode_table(unicode_data_file)?.records() { let record = result?; @@ -1111,7 +1114,7 @@ fn python_unicode_case_folding(unicode_data: &String, unicode_special_casing: &S Ok(eq_builder.equivalences) } - fn read_special_casing_mapping(unicode_special_casing: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { + fn read_special_casing_mapping(unicode_special_casing: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); for result in unicode_table(unicode_special_casing)?.records() { let record = result?; @@ -1132,23 +1135,23 @@ fn python_unicode_case_folding(unicode_data: &String, unicode_special_casing: &S let eq_upper = read_data_file_mapping(unicode_data, multi_character_strings, 0, 13)?; let eq_special_lower = read_special_casing_mapping(unicode_special_casing, multi_character_strings, 0, 1)?; let eq_special_upper = read_special_casing_mapping(unicode_special_casing, multi_character_strings, 0, 3)?; - let merged = Vec::from_iter(merge_sets(merge_sets(eq_lower.values(), eq_special_lower.values()).iter(), merge_sets(eq_upper.values(), eq_special_upper.values()).iter()).iter().map(|set| Vec::from_iter(set.iter().cloned()))); + let merged = Vec::from_iter(merge_eq_classes(merge_eq_classes(eq_lower.values(), eq_special_lower.values()).iter(), merge_eq_classes(eq_upper.values(), eq_special_upper.values()).iter()).iter().map(|set| Vec::from_iter(set.iter().cloned()))); Ok(EqTable::from_vec(merged)) } struct EquivalenceBuilder<'a> { - multi_character_strings: &'a mut HashMap, - equivalences: HashMap>, + multi_character_strings: &'a mut HashMap, + equivalences: HashMap>, } impl EquivalenceBuilder<'_> { - fn new(multi_character_strings: &mut HashMap) -> EquivalenceBuilder { + fn new(multi_character_strings: &mut HashMap) -> EquivalenceBuilder { EquivalenceBuilder { multi_character_strings, equivalences: Default::default() } } - fn index(&mut self, s: &str) -> usize { + fn index(&mut self, s: &str) -> IElement { if s.chars().count() == 1 { - return s.chars().next().unwrap() as usize; + return s.chars().next().unwrap() as IElement; } let next_id = self.multi_character_strings.len() + 0x11_0000; return *self.multi_character_strings.entry(s.to_string()).or_insert(next_id); @@ -1175,24 +1178,24 @@ impl EquivalenceBuilder<'_> { } } -fn merge_sets<'a, I, Inner>(a: I, b: I) -> Vec> where I: Iterator, Inner: IntoIterator + Copy + Debug { - let sets: Vec> = Vec::from_iter(a.map(|x| HashSet::from_iter(x.into_iter().cloned()))); - let m: HashMap = HashMap::from_iter(sets.iter().enumerate().flat_map(|(i, set)| { +fn merge_eq_classes<'a, I, Inner>(a: I, b: I) -> Vec> where I: Iterator, Inner: IntoIterator + Copy + Debug { + let eq_classes_a: Vec> = Vec::from_iter(a.map(|eq_class_a| HashSet::from_iter(eq_class_a.into_iter().cloned()))); + let chars_a_mapped_to_class_index: HashMap = HashMap::from_iter(eq_classes_a.iter().enumerate().flat_map(|(i, set)| { set.iter().cloned().map(move |v| (v, i)) })); - let mut to_copy = vec![true; sets.len()]; - let mut ret: Vec> = b.map(|vec| { - HashSet::from_iter(vec.into_iter().flat_map(|v: &usize| m.get(v)).flat_map(|i| { - to_copy[*i] = false; - sets.get(*i) - }).flatten().cloned().chain(vec.into_iter().cloned())) + let mut eq_class_a_copy = vec![true; eq_classes_a.len()]; + let mut merged_classes: Vec> = b.map(|eq_class_b| { + HashSet::from_iter(eq_class_b.into_iter().flat_map(|char_b: &IElement| chars_a_mapped_to_class_index.get(char_b)).flat_map(|i| { + eq_class_a_copy[*i] = false; + eq_classes_a.get(*i) + }).flatten().cloned().chain(eq_class_b.into_iter().cloned())) }).collect(); - for (i, copy) in to_copy.iter().enumerate() { + for (i, copy) in eq_class_a_copy.iter().enumerate() { if *copy { - ret.push(sets.get(i).unwrap().clone()); + merged_classes.push(eq_classes_a.get(i).unwrap().clone()); } } - ret + merged_classes } fn oracledb_start_docker_container() { @@ -1226,11 +1229,11 @@ fn oracledb_connect() -> std::result::Result { }) } -fn oracledb_extract_ai_case_fold_table<'a>(multi_character_strings: &mut HashMap) -> Result { +fn oracledb_extract_ai_case_fold_table<'a>(multi_character_strings: &mut HashMap) -> Result { let conn = oracledb_connect()?; let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); - let mut mappings: Vec<(usize, usize)> = vec![]; + let mut mappings: Vec<(IElement, IElement)> = vec![]; let query = "select nlssort(:c, 'nls_sort = binary_ai') from dual"; println!("extracting accent insensitive mappings from OracleDB"); @@ -1275,8 +1278,8 @@ fn oracledb_generate_posix_char_classes() -> Result<()> { let mut statement = conn.statement(query).build()?; let mut out = vec![]; for name in ["alpha", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit"] { - let mut chars: Vec = vec![]; - for row_result in statement.query_as::(&[&format!("[[:{}:]]", name).as_str()])? { + let mut chars: Vec = vec![]; + for row_result in statement.query_as::(&[&format!("[[:{}:]]", name).as_str()])? { chars.push(row_result?); } writeln!(out, "\n\nPOSIX_CHAR_CLASSES.put(\"{}\", CodePointSet.createNoDedup(", name)?; From 16f4c0e9eeaff2ef82c2334d155013a3b2623b95 Mon Sep 17 00:00:00 2001 From: Tom Shull Date: Thu, 14 Sep 2023 22:31:40 +0200 Subject: [PATCH 14/17] change handling of returning conservative typeflow. --- .../src/com/oracle/graal/pointsto/api/HostVM.java | 12 ++++++++++++ .../graal/pointsto/flow/InvokeTypeFlow.java | 13 ++++++++++--- .../graal/hosted/ParseOnceDeoptTestFeature.java | 5 +++++ .../ParseOnceRuntimeCompilationFeature.java | 15 +++++++++++++++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java index f13a3a6af331..e324e1bca9b6 100644 --- a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java +++ b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java @@ -354,6 +354,13 @@ public interface MultiMethodAnalysisPolicy { * return values. */ boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey multiMethodKey); + + /** + * Some methods can be transformed after analysis; in these cases we do not know what the + * returned value will be. + */ + boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target); + } /** @@ -385,6 +392,11 @@ public boolean canComputeReturnedParameterIndex(MultiMethod.MultiMethodKey multi public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey multiMethodKey) { return false; } + + @Override + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { + return false; + } }; public MultiMethodAnalysisPolicy getMultiMethodAnalysisPolicy() { diff --git a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java index dbb3e7caf21a..ece8f7c168fe 100644 --- a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java +++ b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java @@ -203,14 +203,16 @@ protected void updateReceiver(PointsToAnalysis bb, MethodFlowsGraphInfo calleeFl } protected void updateReceiver(PointsToAnalysis bb, MethodFlowsGraphInfo calleeFlows, TypeState receiverTypeState) { - if (bb.getHostVM().getMultiMethodAnalysisPolicy().performParameterLinking(callerMultiMethodKey, calleeFlows.getMethod().getMultiMethodKey())) { + var analysisPolicy = bb.getHostVM().getMultiMethodAnalysisPolicy(); + var calleeKey = calleeFlows.getMethod().getMultiMethodKey(); + if (analysisPolicy.performParameterLinking(callerMultiMethodKey, calleeKey)) { FormalReceiverTypeFlow formalReceiverFlow = calleeFlows.getFormalReceiver(); if (formalReceiverFlow != null) { formalReceiverFlow.addReceiverState(bb, receiverTypeState); } } - if (bb.getHostVM().getMultiMethodAnalysisPolicy().performReturnLinking(callerMultiMethodKey, calleeFlows.getMethod().getMultiMethodKey())) { + if (analysisPolicy.performReturnLinking(callerMultiMethodKey, calleeKey) && !analysisPolicy.unknownReturnValue(bb, callerMultiMethodKey, calleeFlows.getMethod())) { if (bb.optimizeReturnedParameter()) { int paramIndex = calleeFlows.getMethod().getTypeFlow().getReturnedParameterIndex(); if (actualReturn != null && paramIndex == 0) { @@ -279,7 +281,12 @@ public void linkReturn(PointsToAnalysis bb, boolean isStatic, MethodFlowsGraphIn * created for the return, then {@code setActualReturn} will perform all necessary linking. */ if (actualReturn != null && bb.getHostVM().getMultiMethodAnalysisPolicy().performReturnLinking(callerMultiMethodKey, calleeFlows.getMethod().getMultiMethodKey())) { - if (bb.optimizeReturnedParameter()) { + if (bb.getHostVM().getMultiMethodAnalysisPolicy().unknownReturnValue(bb, callerMultiMethodKey, calleeFlows.getMethod())) { + /* + * When there is an unknown return value we must be conservative. + */ + actualReturn.declaredType.getTypeFlow(bb, true).addUse(bb, actualReturn); + } else if (bb.optimizeReturnedParameter()) { int paramNodeIndex = calleeFlows.getMethod().getTypeFlow().getReturnedParameterIndex(); if (paramNodeIndex != -1) { if (isStatic || paramNodeIndex != 0) { diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java index 14329990aca0..b1602572999d 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java @@ -244,5 +244,10 @@ public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey m */ return multiMethodKey == DEOPT_TARGET_METHOD; } + + @Override + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { + return false; + } } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java index 5409eb73a922..492916e93329 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java @@ -1253,6 +1253,21 @@ public boolean canComputeReturnedParameterIndex(MultiMethod.MultiMethodKey multi public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey multiMethodKey) { return multiMethodKey == DEOPT_TARGET_METHOD || multiMethodKey == RUNTIME_COMPILED_METHOD; } + + @Override + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { + if (callerMultiMethodKey == RUNTIME_COMPILED_METHOD) { + /* + * If the method may be intrinsified later, the implementation can change. + */ + var originalTarget = target.getMultiMethod(ORIGINAL_METHOD); + var options = bb.getOptions(); + return (hostedProviders.getGraphBuilderPlugins().getInvocationPlugins().lookupInvocation(originalTarget, options) != null) || + hostedProviders.getReplacements().hasSubstitution(originalTarget, options); + + } + return false; + } } /** From 891bfe2d26bde6da9225c0d2d92cb1109ecb5790 Mon Sep 17 00:00:00 2001 From: David Kozak Date: Wed, 20 Sep 2023 15:03:47 +0200 Subject: [PATCH 15/17] check unknownReturnValue for all non-original methods --- .../svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java index 492916e93329..740ddbd0ed8e 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java @@ -1256,7 +1256,7 @@ public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey m @Override public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { - if (callerMultiMethodKey == RUNTIME_COMPILED_METHOD) { + if (callerMultiMethodKey != ORIGINAL_METHOD) { /* * If the method may be intrinsified later, the implementation can change. */ From 0c95dc0fa14b750e015eb07696645bcaddd3edf6 Mon Sep 17 00:00:00 2001 From: Tom Shull Date: Thu, 28 Sep 2023 15:21:40 +0200 Subject: [PATCH 16/17] fix criteria for unknown return value. --- .../src/com/oracle/graal/pointsto/api/HostVM.java | 4 ++-- .../graal/hosted/ParseOnceDeoptTestFeature.java | 2 +- .../hosted/ParseOnceRuntimeCompilationFeature.java | 14 ++++++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java index e324e1bca9b6..5fb82aaff105 100644 --- a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java +++ b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java @@ -359,7 +359,7 @@ public interface MultiMethodAnalysisPolicy { * Some methods can be transformed after analysis; in these cases we do not know what the * returned value will be. */ - boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target); + boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation); } @@ -394,7 +394,7 @@ public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey m } @Override - public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation) { return false; } }; diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java index b1602572999d..aa9d43fe94f8 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java @@ -246,7 +246,7 @@ public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey m } @Override - public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation) { return false; } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java index 740ddbd0ed8e..21d7e16abdd9 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java @@ -1255,16 +1255,18 @@ public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey m } @Override - public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod target) { - if (callerMultiMethodKey != ORIGINAL_METHOD) { + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation) { + if (callerMultiMethodKey == RUNTIME_COMPILED_METHOD || implementation.isDeoptTarget()) { /* * If the method may be intrinsified later, the implementation can change. + * + * We also must ensure deopt methods always return a superset of the original + * method. */ - var originalTarget = target.getMultiMethod(ORIGINAL_METHOD); + var origImpl = implementation.getMultiMethod(ORIGINAL_METHOD); var options = bb.getOptions(); - return (hostedProviders.getGraphBuilderPlugins().getInvocationPlugins().lookupInvocation(originalTarget, options) != null) || - hostedProviders.getReplacements().hasSubstitution(originalTarget, options); - + return (hostedProviders.getGraphBuilderPlugins().getInvocationPlugins().lookupInvocation(origImpl, options) != null) || + hostedProviders.getReplacements().hasSubstitution(origImpl, options); } return false; } From 074503f4e2e86bbcfd50f88aa462a29e6aca4f33 Mon Sep 17 00:00:00 2001 From: Tomas Zezula Date: Tue, 10 Oct 2023 14:24:49 +0200 Subject: [PATCH 17/17] [GR-49334] Regression in spawning Espresso context on platforms where it runs on Sulong. --- sdk/CHANGELOG.md | 3 + sdk/src/org.graalvm.polyglot/snapshot.sigtest | 2 + .../src/org/graalvm/polyglot/Engine.java | 2 +- .../polyglot/impl/AbstractPolyglotImpl.java | 4 +- .../org/graalvm/polyglot/io/FileSystem.java | 25 +- .../svm/truffle/TruffleBaseFeature.java | 53 +- .../instrumentation/TruffleInstrument.java | 2 + .../ContextPreInitializationTest.java | 18 +- .../api/test/polyglot/FileSystemsTest.java | 4 +- .../test/polyglot/InternalResourceTest.java | 316 ++++++++---- .../com/oracle/truffle/api/TruffleFile.java | 3 +- .../oracle/truffle/api/TruffleLanguage.java | 2 +- .../com/oracle/truffle/api/impl/Accessor.java | 6 +- .../com/oracle/truffle/api/source/Source.java | 18 +- .../truffle/polyglot/EngineAccessor.java | 64 +-- .../oracle/truffle/polyglot/FileSystems.java | 467 +++++------------- .../truffle/polyglot/InstrumentCache.java | 4 + .../polyglot/InternalResourceCache.java | 397 +++++---------- .../polyglot/InternalResourceRoots.java | 304 ++++++++++++ .../truffle/polyglot/LanguageCache.java | 4 + .../truffle/polyglot/PolyglotEngineImpl.java | 2 +- .../oracle/truffle/polyglot/PolyglotImpl.java | 6 +- .../truffle/polyglot/PolyglotInstrument.java | 4 - .../truffle/polyglot/PolyglotLanguage.java | 4 - 24 files changed, 899 insertions(+), 815 deletions(-) create mode 100644 truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java diff --git a/sdk/CHANGELOG.md b/sdk/CHANGELOG.md index dd964eff5f47..ca9337076abf 100644 --- a/sdk/CHANGELOG.md +++ b/sdk/CHANGELOG.md @@ -2,6 +2,9 @@ This changelog summarizes major changes between GraalVM SDK versions. The main focus is on APIs exported by GraalVM SDK. +## Version 24.0.0 +* (GR-49334) Deprecated the `FileSystems#allowLanguageHomeAccess()` method and introduced `FileSystem#allowInternalResourceAccess()` as a replacement. To ensure compatibility, both methods now provide support for language homes and internal resources. + ## Version 23.1.0 * (GR-43819) The GraalVM SDK was split into several more fine-grained modules. The use of the graalvm-sdk module is now deprecated. Please update your Maven and module dependencies accordingly. Note that all APIs remain compatible. The following new modules are available: * `org.graalvm.nativeimage` A framework that allows to customize native image generation. diff --git a/sdk/src/org.graalvm.polyglot/snapshot.sigtest b/sdk/src/org.graalvm.polyglot/snapshot.sigtest index b3b6f5a5e5e1..1714553a080a 100644 --- a/sdk/src/org.graalvm.polyglot/snapshot.sigtest +++ b/sdk/src/org.graalvm.polyglot/snapshot.sigtest @@ -673,7 +673,9 @@ meth public java.lang.String getSeparator() meth public java.nio.charset.Charset getEncoding(java.nio.file.Path) meth public java.nio.file.Path getTempDirectory() meth public java.nio.file.Path readSymbolicLink(java.nio.file.Path) throws java.io.IOException +meth public static org.graalvm.polyglot.io.FileSystem allowInternalResourceAccess(org.graalvm.polyglot.io.FileSystem) meth public static org.graalvm.polyglot.io.FileSystem allowLanguageHomeAccess(org.graalvm.polyglot.io.FileSystem) + anno 0 java.lang.Deprecated(boolean forRemoval=false, java.lang.String since="") meth public static org.graalvm.polyglot.io.FileSystem newDefaultFileSystem() meth public static org.graalvm.polyglot.io.FileSystem newFileSystem(java.nio.file.FileSystem) meth public static org.graalvm.polyglot.io.FileSystem newReadOnlyFileSystem(org.graalvm.polyglot.io.FileSystem) diff --git a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java index 0b3e4bd3727d..28fab9e930e8 100644 --- a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java +++ b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java @@ -2101,7 +2101,7 @@ public FileSystem newDefaultFileSystem(String hostTmpDir) { } @Override - public FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { + public FileSystem allowInternalResourceAccess(FileSystem fileSystem) { throw noPolyglotImplementationFound(); } diff --git a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java index 79038185b3b5..fab8c9d0933e 100644 --- a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java +++ b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java @@ -1389,8 +1389,8 @@ public FileSystem newDefaultFileSystem(String hostTmpDir) { return getNext().newDefaultFileSystem(hostTmpDir); } - public FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return getNext().allowLanguageHomeAccess(fileSystem); + public FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return getNext().allowInternalResourceAccess(fileSystem); } public FileSystem newReadOnlyFileSystem(FileSystem fileSystem) { diff --git a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java index 9960324819b3..2d3e9040d153 100644 --- a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java +++ b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java @@ -67,6 +67,7 @@ import java.util.Objects; import java.util.Set; import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.Engine; import org.graalvm.polyglot.io.IOAccess.Builder; /** @@ -512,9 +513,31 @@ static FileSystem newDefaultFileSystem() { * {@link #getPathSeparator() path separator} as the {@link #newDefaultFileSystem() * default file system}. * @since 22.2 + * @deprecated Use {{@link #allowInternalResourceAccess(FileSystem)}}. */ + @Deprecated static FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return IOHelper.ImplHolder.IMPL.allowLanguageHomeAccess(fileSystem); + return allowInternalResourceAccess(fileSystem); + } + + /** + * Decorates the given {@code fileSystem} by an implementation that forwards access to the + * internal resources to the default file system. The method is intended to be used by custom + * filesystem implementations with non default storage to allow guest languages to access + * internal resources. As the returned filesystem uses a default file system to access internal + * resources, the {@code fileSystem} has to use the same {@link Path} type, + * {@link #getSeparator() separator} and {@link #getPathSeparator() path separator} as the + * {@link #newDefaultFileSystem() default filesystem}. + * + * @throws IllegalArgumentException when the {@code fileSystem} does not use the same + * {@link Path} type or has a different {@link #getSeparator() separator} or + * {@link #getPathSeparator() path separator} as the {@link #newDefaultFileSystem() + * default file system}. + * @see Engine#copyResources(Path, String...) + * @since 24.0 + */ + static FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return IOHelper.ImplHolder.IMPL.allowInternalResourceAccess(fileSystem); } /** diff --git a/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java b/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java index 5bc549275818..97cf84abcff3 100644 --- a/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java +++ b/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java @@ -488,6 +488,39 @@ public void beforeAnalysis(BeforeAnalysisAccess access) { Class frameClass = config.findClassByName("com.oracle.truffle.api.impl.FrameWithoutBoxing"); config.registerFieldValueTransformer(config.findField(frameClass, "ASSERTIONS_ENABLED"), new AssertionStatusFieldTransformer(frameClass)); + registerInternalResourceFieldValueTransformers(config); + } + + private static void registerInternalResourceFieldValueTransformers(BeforeAnalysisAccessImpl config) { + Class internalResourceCacheClass = config.findClassByName("com.oracle.truffle.polyglot.InternalResourceCache"); + Class internalResourceRootsClass = config.findClassByName("com.oracle.truffle.polyglot.InternalResourceRoots"); + Class resetableCacheRootClass = config.findClassByName("com.oracle.truffle.polyglot.InternalResourceCache$ResettableCachedRoot"); + Field cacheRootField = ReflectionUtil.lookupField(true, internalResourceCacheClass, "cacheRoot"); + if (cacheRootField != null) { + // graalvm-23.1.0 + assert internalResourceRootsClass == null; + config.registerFieldValueTransformer(cacheRootField, ResetFieldValueTransformer.INSTANCE); + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, resetableCacheRootClass, "resourceCacheRoot"), ResetFieldValueTransformer.INSTANCE); + } else { + // graalvm-24.0 + assert resetableCacheRootClass == null; + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, internalResourceCacheClass, "owningRoot"), ResetFieldValueTransformer.INSTANCE); + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, internalResourceCacheClass, "path"), ResetFieldValueTransformer.INSTANCE); + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, internalResourceRootsClass, "roots"), ResetFieldValueTransformer.INSTANCE); + } + } + + private static final class ResetFieldValueTransformer implements FieldValueTransformer { + + private static final FieldValueTransformer INSTANCE = new ResetFieldValueTransformer(); + + private ResetFieldValueTransformer() { + } + + @Override + public Object transform(Object receiver, Object originalValue) { + return null; + } } private static class AssertionStatusFieldTransformer implements FieldValueTransformer { @@ -1371,14 +1404,6 @@ final class Target_com_oracle_truffle_polyglot_LanguageCache { @TargetClass(className = "com.oracle.truffle.polyglot.InternalResourceCache", onlyWith = TruffleBaseFeature.IsEnabled.class) final class Target_com_oracle_truffle_polyglot_InternalResourceCache { - /* - * The field cannot be reset from the #afterAnalysis(). The reset comes too late for the - * String-must-not-contain-the-home-directory verification in DisallowedImageHeapObjectFeature, - * so we do the implicit reset using a substitution. - */ - @Alias @RecomputeFieldValue(kind = Kind.Reset) // - private static volatile Pair cacheRoot; - @Alias @RecomputeFieldValue(kind = Kind.Custom, declClass = UseInternalResourcesComputer.class, isFinal = true) // private static boolean useInternalResources; @@ -1395,18 +1420,6 @@ public Object transform(Object receiver, Object originalValue) { } } -@TargetClass(className = "com.oracle.truffle.polyglot.InternalResourceCache$ResettableCachedRoot", onlyWith = TruffleBaseFeature.IsEnabled.class) -final class Target_com_oracle_truffle_polyglot_InternalResourceCache_ResettableCachedRoot { - - /* - * The field cannot be reset from the #afterAnalysis(). The reset comes too late for the - * String-must-not-contain-the-home-directory verification in DisallowedImageHeapObjectFeature, - * so we do the implicit reset using a substitution. - */ - @Alias @RecomputeFieldValue(kind = Kind.Reset) // - private volatile Path resourceCacheRoot; -} - @TargetClass(className = "com.oracle.truffle.object.CoreLocations$DynamicObjectFieldLocation", onlyWith = TruffleBaseFeature.IsEnabled.class) final class Target_com_oracle_truffle_object_CoreLocations_DynamicObjectFieldLocation { @Alias @RecomputeFieldValue(kind = Kind.AtomicFieldUpdaterOffset) // diff --git a/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java b/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java index 5dd1a0121266..d4e98ef846c2 100644 --- a/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java +++ b/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java @@ -1076,6 +1076,8 @@ public Object getScope(LanguageInfo language) { * unpacking would be repeated once per operating system user. When the language was * compiled using native-image internal resources are unpacked at native-image compile time * and stored relative to the native-image. + *

+ * The caller thread must be entered in a context. * * @param resource the resource class to load * @throws IllegalArgumentException if {@code resource} is not associated with this diff --git a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java index e566880d5e45..c70818ba6abd 100644 --- a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java +++ b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java @@ -2612,10 +2612,10 @@ private static Consumer newResourceBuildTimeVerifier(List file try { TruffleFile root = env.getInternalResource(ContextPreInitializationResource.class); assertNotNull(root); - assertFalse(root.isAbsolute()); + assertTrue(root.isAbsolute()); TruffleFile resource = root.resolve(ContextPreInitializationResource.FILE_NAME); assertNotNull(resource); - assertFalse(resource.isAbsolute()); + assertTrue(resource.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(resource.readAllBytes(), StandardCharsets.UTF_8)); files.add(resource); } catch (IOException ioe) { @@ -2628,16 +2628,16 @@ private static Consumer newResourceExecutionTimeVerifier(List return (env) -> { try { TruffleFile file1 = files.get(0); - assertFalse(file1.isAbsolute()); + assertTrue(file1.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(file1.readAllBytes(), StandardCharsets.UTF_8)); ContextPreInitializationResource.unpackCount = 0; TruffleFile root = env.getInternalResource(ContextPreInitializationResource.class); assertNotNull(root); - assertFalse(root.isAbsolute()); + assertTrue(root.isAbsolute()); assertEquals(0, ContextPreInitializationResource.unpackCount); TruffleFile file2 = root.resolve(ContextPreInitializationResource.FILE_NAME); assertNotNull(file2); - assertFalse(file2.isAbsolute()); + assertTrue(file2.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(file2.readAllBytes(), StandardCharsets.UTF_8)); assertEquals(file1, file2); assertEquals(file1.getAbsoluteFile(), file2.getAbsoluteFile()); @@ -2696,14 +2696,14 @@ public void testInstrumentInternalResources() throws Exception { Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); setPatchable(FIRST); AtomicReference rootRef = new AtomicReference<>(); - ContextPreInitializationFirstInstrument.actions = Collections.singletonMap("onCreate", (e) -> { + ContextPreInitializationFirstInstrument.actions = Collections.singletonMap("onContextCreated", (e) -> { try { TruffleFile root = e.env.getInternalResource(ContextPreInitializationResource.class); assertNotNull(root); - assertFalse(root.isAbsolute()); + assertTrue(root.isAbsolute()); TruffleFile resource = root.resolve(ContextPreInitializationResource.FILE_NAME); assertNotNull(resource); - assertFalse(resource.isAbsolute()); + assertTrue(resource.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(resource.readAllBytes(), StandardCharsets.UTF_8)); rootRef.set(root); } catch (IOException ioe) { @@ -2739,7 +2739,7 @@ public void testOverriddenCacheRoot() throws Exception { Path overriddenCacheRoot = Files.createTempDirectory(null).toRealPath(); Engine.copyResources(overriddenCacheRoot, FIRST); System.setProperty("polyglot.engine.resourcePath", overriddenCacheRoot.toRealPath().toString()); - TemporaryResourceCacheRoot.reset(false); + TemporaryResourceCacheRoot.reset(true); try { BaseLanguage.registerAction(ContextPreInitializationTestFirstLanguage.class, ActionKind.ON_PATCH_CONTEXT, newResourceExecutionTimeVerifier(files, overriddenCacheRoot.toString())); diff --git a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java index a71a3e1065ca..32adfe0011b4 100644 --- a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java +++ b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java @@ -276,7 +276,7 @@ public static void createConfigurations() throws IOException, ReflectiveOperatio cfgs.put(MEMORY_FILE_SYSTEM, new Configuration(MEMORY_FILE_SYSTEM, ctx, memDir, fileSystem, false, true, true, true)); // Memory with language home - fileSystem = FileSystem.allowLanguageHomeAccess(new MemoryFileSystem()); + fileSystem = FileSystem.allowInternalResourceAccess(new MemoryFileSystem()); memDir = mkdirs(fileSystem.toAbsolutePath(fileSystem.parsePath("work")), fileSystem); fileSystem.setCurrentWorkingDirectory(memDir); createContent(memDir, fileSystem); @@ -286,7 +286,7 @@ public static void createConfigurations() throws IOException, ReflectiveOperatio if (TruffleTestAssumptions.isNoClassLoaderEncapsulation()) { // setCwd not supported // Memory with language home - in language home - fileSystem = FileSystem.allowLanguageHomeAccess(new MemoryFileSystem()); + fileSystem = FileSystem.allowInternalResourceAccess(new MemoryFileSystem()); memDir = mkdirs(fileSystem.toAbsolutePath(fileSystem.parsePath("work")), fileSystem); fileSystem.setCurrentWorkingDirectory(memDir); privateDir = createContent(memDir, fileSystem); diff --git a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java index 09724bcd9d1e..e442baec8844 100644 --- a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java +++ b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java @@ -70,7 +70,6 @@ import org.graalvm.nativeimage.ImageInfo; import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Engine; -import org.graalvm.polyglot.io.IOAccess; import org.junit.Assert; import org.junit.Assume; import org.junit.BeforeClass; @@ -85,7 +84,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static com.oracle.truffle.api.test.polyglot.AbstractPolyglotTest.assertFails; @@ -279,103 +277,122 @@ public static class TestAccessFileOutsideOfResourceRoot extends AbstractExecutab @TruffleBoundary @SuppressWarnings("try") protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { - TruffleFile hostFolder = env.createTempDirectory(null, getClass().getSimpleName()); + TruffleFile hostFolder = env.getInternalTruffleFile((String) contextArguments[0]); try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { - // Relative paths - TruffleFile lib = env.getInternalResource(LibraryResource.class); - assertNull(lib.getParent()); - assertNoFileAccess(lib.resolve(".."), hostFolder); // Absolute paths - TruffleFile absoluteLibParent = lib.getAbsoluteFile().getParent(); - assertNotNull(absoluteLibParent); - assertNoFileAccess(absoluteLibParent, hostFolder); + TruffleFile lib = env.getInternalResource(LibraryResource.class); + assertTrue(lib.isAbsolute()); + TruffleFile outsideCacheFolder = getParentTransitive(lib, 4); + assertNotNull(outsideCacheFolder); + assertNoFileAccess(outsideCacheFolder, hostFolder); // Combine absolute paths with relative paths to escape from internal resource root - absoluteLibParent = lib.getAbsoluteFile().resolve(".."); - assertNoFileAccess(absoluteLibParent, hostFolder); - absoluteLibParent = lib.getAbsoluteFile().resolve("prefix").resolve("..").resolve(".."); - assertNoFileAccess(absoluteLibParent, hostFolder); + outsideCacheFolder = resolveParentTransitive(lib, 4); + assertNoFileAccess(outsideCacheFolder, hostFolder); + outsideCacheFolder = resolveParentTransitive(lib.resolve("prefix"), 5); + assertNoFileAccess(outsideCacheFolder, hostFolder); // Try to access other resource files TruffleFile src = env.getInternalResource(SourcesResource.class); - TruffleFile srcResolvedUsingLib = lib.resolve(src.getAbsoluteFile().toString()); - assertNoFileAccess(srcResolvedUsingLib, hostFolder); + assertTrue(src.isAbsolute()); + TruffleFile srcResolvedUsingLib = lib.resolve(src.toString()); + // With the shared filesystem the access to other resource cache dir is allowed. + assertTrue(srcResolvedUsingLib.isDirectory()); return null; - } finally { - delete(hostFolder); } } - private static void assertNoFileAccess(TruffleFile file, TruffleFile hostFolder) { - assertSecurityException(() -> file.resolve("fooDir").createDirectory()); - assertSecurityException(() -> file.resolve("fooDir").createDirectories()); - assertSecurityException(() -> file.resolve("fooFile").createFile()); - assertSecurityException(file::exists); - assertSecurityException(file::isDirectory); - assertSecurityException(file::isRegularFile); - assertSecurityException(file::isSymbolicLink); - assertSecurityException(file::isReadable); - assertSecurityException(file::isExecutable); - assertSecurityException(file::size); - assertFalse(file.isWritable()); - assertSecurityException(() -> file.isSameFile(file.resolveSibling("other"))); - assertSecurityException(() -> file.getAttribute(TruffleFile.CREATION_TIME)); - assertSecurityException(() -> file.getAttributes(List.of(TruffleFile.CREATION_TIME))); - assertSecurityException(file::getCreationTime); - assertSecurityException(file::getLastAccessTime); - assertSecurityException(file::getLastModifiedTime); - assertSecurityException(() -> file.setAttribute(TruffleFile.CREATION_TIME, FileTime.from(Instant.now()))); - assertSecurityException(() -> file.setCreationTime(FileTime.from(Instant.now()))); - assertSecurityException(() -> file.setLastAccessTime(FileTime.from(Instant.now()))); - assertSecurityException(() -> file.setLastModifiedTime(FileTime.from(Instant.now()))); - assertSecurityException(file::list); - assertSecurityException(() -> file.visit(new FileVisitor<>() { - @Override - public FileVisitResult preVisitDirectory(TruffleFile dir, BasicFileAttributes attrs) { - return FileVisitResult.CONTINUE; + private static TruffleFile getParentTransitive(TruffleFile file, int times) { + TruffleFile res = file; + for (int i = 0; i < times; i++) { + res = res.getParent(); + if (res == null) { + throw new IllegalArgumentException("File " + file.getAbsoluteFile() + " has not enough path components to go up " + times + " times."); } + } + return res; + } - @Override - public FileVisitResult visitFile(TruffleFile f, BasicFileAttributes attrs) { - return FileVisitResult.CONTINUE; - } + private static TruffleFile resolveParentTransitive(TruffleFile file, int times) { + TruffleFile res = file; + for (int i = 0; i < times; i++) { + res = res.resolve(".."); + } + return res; + } + } + + private static void assertNoFileAccess(TruffleFile file, TruffleFile hostFolder) { + assertSecurityException(() -> file.resolve("fooDir").createDirectory()); + assertSecurityException(() -> file.resolve("fooDir").createDirectories()); + assertSecurityException(() -> file.resolve("fooFile").createFile()); + assertSecurityException(file::exists); + assertSecurityException(file::isDirectory); + assertSecurityException(file::isRegularFile); + assertSecurityException(file::isSymbolicLink); + assertSecurityException(file::isReadable); + assertSecurityException(file::isExecutable); + assertSecurityException(file::size); + assertSecurityException(file::isWritable); + assertSecurityException(() -> file.getAttribute(TruffleFile.CREATION_TIME)); + assertSecurityException(() -> file.getAttributes(List.of(TruffleFile.CREATION_TIME))); + assertSecurityException(file::getCreationTime); + assertSecurityException(file::getLastAccessTime); + assertSecurityException(file::getLastModifiedTime); + assertSecurityException(() -> file.setAttribute(TruffleFile.CREATION_TIME, FileTime.from(Instant.now()))); + assertSecurityException(() -> file.setCreationTime(FileTime.from(Instant.now()))); + assertSecurityException(() -> file.setLastAccessTime(FileTime.from(Instant.now()))); + assertSecurityException(() -> file.setLastModifiedTime(FileTime.from(Instant.now()))); + assertSecurityException(file::list); + assertSecurityException(() -> file.visit(new FileVisitor<>() { + @Override + public FileVisitResult preVisitDirectory(TruffleFile dir, BasicFileAttributes attrs) { + return FileVisitResult.CONTINUE; + } - @Override - public FileVisitResult visitFileFailed(TruffleFile f, IOException exc) { - return FileVisitResult.CONTINUE; - } + @Override + public FileVisitResult visitFile(TruffleFile f, BasicFileAttributes attrs) { + return FileVisitResult.CONTINUE; + } - @Override - public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { - return FileVisitResult.CONTINUE; - } - }, 1)); - assertSecurityException(file::newBufferedReader); - assertSecurityException(file::newBufferedWriter); - assertSecurityException(file::newInputStream); - assertSecurityException(file::newOutputStream); - assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.WRITE, StandardOpenOption.CREATE))); - assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.READ))); - assertSecurityException(file::readAllBytes); - assertSecurityException(file::newDirectoryStream); - assertSecurityException(file::delete); - assertSecurityException(() -> file.copy(hostFolder.resolve("cp"))); - assertSecurityException(() -> file.move(hostFolder.resolve("mv"))); - if (OSUtils.isUnix()) { - assertSecurityException(file::getOwner); - assertSecurityException(file::getGroup); - assertSecurityException(file::getPosixPermissions); - assertSecurityException(() -> file.setPosixPermissions(Set.of())); - assertSecurityException(() -> file.createLink(file.resolveSibling("ln"))); - assertSecurityException(() -> file.createSymbolicLink(file.resolveSibling("lns"))); - assertSecurityException(file::readSymbolicLink); + @Override + public FileVisitResult visitFileFailed(TruffleFile f, IOException exc) { + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { + return FileVisitResult.CONTINUE; } + }, 1)); + assertSecurityException(file::newBufferedReader); + assertSecurityException(file::newBufferedWriter); + assertSecurityException(file::newInputStream); + assertSecurityException(file::newOutputStream); + assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.WRITE, StandardOpenOption.CREATE))); + assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.READ))); + assertSecurityException(file::readAllBytes); + assertSecurityException(file::newDirectoryStream); + assertSecurityException(file::delete); + assertSecurityException(() -> file.copy(hostFolder.resolve("cp"))); + assertSecurityException(() -> file.move(hostFolder.resolve("mv"))); + if (OSUtils.isUnix()) { + assertSecurityException(file::getOwner); + assertSecurityException(file::getGroup); + assertSecurityException(file::getPosixPermissions); + assertSecurityException(() -> file.setPosixPermissions(Set.of())); + assertSecurityException(() -> file.createLink(file.normalize().resolveSibling("ln"))); + assertSecurityException(() -> file.createSymbolicLink(file.normalize().resolveSibling("lns"))); + assertSecurityException(file::readSymbolicLink); } } @Test - public void testAccessFileOutsideOfResourceRoot() { + public void testAccessFileOutsideOfResourceRoot() throws IOException { Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); - try (Context context = Context.newBuilder().allowIO(IOAccess.ALL).build()) { - AbstractExecutableTestLanguage.execute(context, TestAccessFileOutsideOfResourceRoot.class); + Path hostFolder = Files.createTempDirectory("test").toAbsolutePath(); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestAccessFileOutsideOfResourceRoot.class, hostFolder.toString()); + } finally { + delete(hostFolder); } } @@ -386,9 +403,10 @@ public static class TestAccessFileInResourceRoot extends AbstractExecutableTestL @TruffleBoundary @SuppressWarnings("try") protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { - TruffleFile hostFolder = env.createTempDirectory(null, getClass().getSimpleName()); + TruffleFile hostFolder = env.getInternalTruffleFile((String) contextArguments[0]); try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { TruffleFile root = env.getInternalResource(FileAccessCheckResource.class); + assertTrue(root.isAbsolute()); TruffleFile file = root.resolve(FileAccessCheckResource.fileName); TruffleFile folder = root.resolve(FileAccessCheckResource.folderName); TruffleFile linkTarget = root.resolve(FileAccessCheckResource.linkTargetName); @@ -457,7 +475,7 @@ public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { assertTrue(link.isSymbolicLink()); assertSecurityException(() -> file.createLink(file.resolveSibling("ln"))); assertSecurityException(() -> file.createSymbolicLink(file.resolveSibling("lns"))); - assertEquals(linkTarget, link.readSymbolicLink()); + assertEquals(linkTarget.getName(), link.readSymbolicLink().getPath()); } assertSecurityException(file::delete); if (OSUtils.isUnix()) { @@ -470,8 +488,6 @@ public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { assertSecurityException(() -> file.move(file.resolveSibling("mv"))); assertSecurityException(() -> file.move(hostFolder.resolve("mv"))); return null; - } finally { - delete(hostFolder); } } @@ -491,10 +507,13 @@ private static String readContent(BufferedReader r) throws IOException { } @Test - public void testAccessFileInResourceRoot() { + public void testAccessFileInResourceRoot() throws IOException { Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); - try (Context context = Context.newBuilder().allowIO(IOAccess.ALL).build()) { - AbstractExecutableTestLanguage.execute(context, TestAccessFileInResourceRoot.class); + Path hostFolder = Files.createTempDirectory("test").toAbsolutePath(); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestAccessFileInResourceRoot.class, hostFolder.toString()); + } finally { + delete(hostFolder); } } @@ -532,7 +551,7 @@ public void testOverriddenResourceRoot() throws Exception { Path cacheRoot3 = Files.createTempDirectory(null); Engine.copyResources(cacheRoot3, TestUtils.getDefaultLanguageId(TestOverriddenResourceRoot.class)); // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); try { // Set explicit resource cache root @@ -543,7 +562,7 @@ public void testOverriddenResourceRoot() throws Exception { AbstractExecutableTestLanguage.execute(context, TestOverriddenResourceRoot.class, libPath, strPath); } finally { // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); } // Set explicit component (language, instrument) cache root @@ -554,7 +573,7 @@ public void testOverriddenResourceRoot() throws Exception { AbstractExecutableTestLanguage.execute(context, TestOverriddenResourceRoot.class, libPath, strPath); } finally { // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); } // Set explicit component resource cache root @@ -566,7 +585,7 @@ public void testOverriddenResourceRoot() throws Exception { AbstractExecutableTestLanguage.execute(context, TestOverriddenResourceRoot.class, libPath, strPath); } finally { // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); } } finally { // Clean explicit resource root @@ -819,6 +838,85 @@ public void testOptionalResources() { } } + @Registration(/* ... */internalResources = SourcesResource.class) + public static class TestGetInternalTruffleFile extends AbstractExecutableTestLanguage { + + @Override + @TruffleBoundary + @SuppressWarnings("try") + protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { + try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { + TruffleFile srcRoot = env.getInternalResource(SourcesResource.ID); + verifyResources(srcRoot, SourcesResource.RESOURCES); + TruffleFile srcRootAsInternalTruffleFile = env.getInternalTruffleFile(srcRoot.getPath()); + verifyResources(srcRootAsInternalTruffleFile, SourcesResource.RESOURCES); + return ""; + } + } + } + + @Test + public void testGetInternalTruffleFile() { + Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestGetInternalTruffleFile.class); + } + } + + @Registration(/* ... */internalResources = SourcesResource.class) + public static class TestGetPublicTruffleFile extends AbstractExecutableTestLanguage { + + @Override + @TruffleBoundary + @SuppressWarnings("try") + protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { + try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { + TruffleFile srcRoot = env.getInternalResource(SourcesResource.ID); + verifyResources(srcRoot, SourcesResource.RESOURCES); + TruffleFile srcRootAsPublicTruffleFile = env.getPublicTruffleFile(srcRoot.getPath()); + assertNoFileAccess(srcRootAsPublicTruffleFile, srcRootAsPublicTruffleFile.resolveSibling("other")); + for (String resource : SourcesResource.RESOURCES) { + assertFails(() -> srcRootAsPublicTruffleFile.resolve(resource).readAllBytes(), SecurityException.class); + assertFails(() -> env.getPublicTruffleFile(srcRoot.resolve(resource).getPath()).readAllBytes(), SecurityException.class); + } + return ""; + } + } + } + + @Test + public void testGetPublicTruffleFile() { + Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestGetPublicTruffleFile.class); + } + } + + @Registration(/* ... */internalResources = SourcesResource.class) + public static class TestGetTruffleFileInternal extends AbstractExecutableTestLanguage { + + @Override + @TruffleBoundary + @SuppressWarnings("try") + protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { + try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { + TruffleFile srcRoot = env.getInternalResource(SourcesResource.ID); + verifyResources(srcRoot, SourcesResource.RESOURCES); + TruffleFile srcRootAsInternalTruffleFile = env.getTruffleFileInternal(srcRoot.getPath(), (f) -> true); + verifyResources(srcRootAsInternalTruffleFile, SourcesResource.RESOURCES); + return ""; + } + } + } + + @Test + public void testGetTruffleFileInternal() { + Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestGetTruffleFileInternal.class); + } + } + private static boolean hasResource(Path folder, Class language, Class resource) { return hasResource(folder, TestUtils.getDefaultLanguageId(language), resource); } @@ -836,13 +934,15 @@ private static void assertSecurityException(TruffleFileAction action) { }, SecurityException.class); } - private static void delete(TruffleFile file) throws IOException { - if (file.isDirectory()) { - for (TruffleFile child : file.list()) { - delete(child); + private static void delete(Path file) throws IOException { + if (Files.isDirectory(file)) { + try (DirectoryStream children = Files.newDirectoryStream(file)) { + for (Path child : children) { + delete(child); + } } } - file.delete(); + Files.delete(file); } @FunctionalInterface @@ -853,21 +953,19 @@ interface TruffleFileAction { static final class TemporaryResourceCacheRoot implements AutoCloseable { private final Path root; - private final boolean disposeResourceFileSystemOnClose; TemporaryResourceCacheRoot() throws IOException { - this(true); + this(false); } - TemporaryResourceCacheRoot(boolean disposeResourceFileSystemOnClose) throws IOException { - this(Files.createTempDirectory(null), disposeResourceFileSystemOnClose); + TemporaryResourceCacheRoot(boolean nativeImageRuntime) throws IOException { + this(Files.createTempDirectory(null), nativeImageRuntime); } - TemporaryResourceCacheRoot(Path cacheRoot, boolean disposeResourceFileSystemOnClose) throws IOException { + TemporaryResourceCacheRoot(Path cacheRoot, boolean nativeImageRuntime) throws IOException { try { root = cacheRoot.toRealPath(); - this.disposeResourceFileSystemOnClose = disposeResourceFileSystemOnClose; - setTestCacheRoot(root, false); + setTestCacheRoot(root, nativeImageRuntime); } catch (ClassNotFoundException e) { throw new AssertionError("Failed to set cache root.", e); } @@ -880,7 +978,7 @@ Path getRoot() { @Override public void close() { try { - setTestCacheRoot(null, disposeResourceFileSystemOnClose); + setTestCacheRoot(null, false); delete(root); } catch (IOException | ClassNotFoundException e) { throw new AssertionError("Failed to reset cache root.", e); @@ -898,13 +996,13 @@ private static void delete(Path path) throws IOException { Files.delete(path); } - static void reset(boolean disposeResourceFileSystem) throws ClassNotFoundException { - setTestCacheRoot(null, disposeResourceFileSystem); + static void reset(boolean nativeImageRuntime) throws ClassNotFoundException { + setTestCacheRoot(null, nativeImageRuntime); } - private static void setTestCacheRoot(Path root, boolean disposeResourceFileSystem) throws ClassNotFoundException { - Class internalResourceCacheClass = Class.forName("com.oracle.truffle.polyglot.InternalResourceCache"); - ReflectionUtils.invokeStatic(internalResourceCacheClass, "setTestCacheRoot", new Class[]{Path.class, boolean.class}, root, disposeResourceFileSystem); + private static void setTestCacheRoot(Path root, boolean nativeImageRuntime) throws ClassNotFoundException { + Class internalResourceCacheClass = Class.forName("com.oracle.truffle.polyglot.InternalResourceRoots"); + ReflectionUtils.invokeStatic(internalResourceCacheClass, "setTestCacheRoot", new Class[]{Path.class, boolean.class}, root, nativeImageRuntime); } } } diff --git a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java index 32713f74b9f3..e8cf7fc5c357 100644 --- a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java +++ b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java @@ -2068,8 +2068,7 @@ public interface FileTypeDetector { static final class FileSystemContext { - // instance of PolyglotLanguageContext, PolyglotEngineImpl or - // PolyglotImpl.EmbedderFileSystemContext + // Instance of PolyglotLanguageContext or PolyglotImpl.EmbedderFileSystemContext final Object engineObject; private volatile Map> fileTypeDetectors; diff --git a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java index 46f197a74b74..0f90bcf606f4 100644 --- a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java +++ b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java @@ -3056,7 +3056,7 @@ private

TruffleFile getTruffleFileInternalImpl(P path, Predicate optionKey); - public abstract String getRelativePathInLanguageHome(TruffleFile truffleFile); - - public abstract TruffleFile relativizeToInternalResourceCache(TruffleFile truffleFile); + public abstract String getRelativePathInResourceRoot(TruffleFile truffleFile); public abstract void onSourceCreated(Source source); @@ -760,6 +758,8 @@ public abstract Iterator mergeHostGuestFrames(Object polyglotEngine, S public abstract TruffleFile getInternalResource(Object owner, String resourceId) throws IOException; + public abstract Path getEngineResource(Object polyglotEngine, String resourceId) throws IOException; + public abstract Collection getResourceIds(String componentId); public abstract void setIsolatePolyglot(AbstractPolyglotImpl instance); diff --git a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java index 09914686b560..5361cb9472e7 100644 --- a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java +++ b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java @@ -1088,18 +1088,12 @@ static Source buildSource(String language, Object origin, String name, String pa useContent = enforceInterfaceContracts(useContent); String relativePathInLanguageHome = null; if (useTruffleFile != null) { - TruffleFile relativeFileInResourceCache = SourceAccessor.ACCESSOR.engineSupport().relativizeToInternalResourceCache(useTruffleFile); - if (relativeFileInResourceCache != null) { - useTruffleFile = relativeFileInResourceCache; - relativePathInLanguageHome = relativeFileInResourceCache.getPath(); - } else { - /* - * The relativePathInLanguageHome has to be calculated also for Sources created in - * the image execution time. They have to have the same hash code as sources created - * during the context pre-initialization. - */ - relativePathInLanguageHome = SourceAccessor.ACCESSOR.engineSupport().getRelativePathInLanguageHome(useTruffleFile); - } + /* + * The relativePathInLanguageHome has to be calculated also for Sources created in the + * image execution time. They have to have the same hash code as sources created during + * the context pre-initialization. + */ + relativePathInLanguageHome = SourceAccessor.ACCESSOR.engineSupport().getRelativePathInResourceRoot(useTruffleFile); if (relativePathInLanguageHome != null) { Object fsEngineObject = SourceAccessor.ACCESSOR.languageSupport().getFileSystemEngineObject(SourceAccessor.ACCESSOR.languageSupport().getFileSystemContext(useTruffleFile)); if (SourceAccessor.ACCESSOR.engineSupport().inContextPreInitialization(fsEngineObject)) { diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java index c39a020bfe65..bef1cf3482ce 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java @@ -715,8 +715,6 @@ public boolean inContextPreInitialization(Object polyglotObject) { if (polyglotObject instanceof PolyglotLanguageContext languageContext) { PolyglotContextImpl polyglotContext = languageContext.context; return polyglotContext.getEngine().inEnginePreInitialization && polyglotContext.parent == null; - } else if (polyglotObject instanceof PolyglotEngineImpl polyglotEngine) { - return polyglotEngine.inEnginePreInitialization; } else if (polyglotObject instanceof EmbedderFileSystemContext) { return false; } else { @@ -1006,7 +1004,7 @@ public TruffleContext createInternalContext(Object sourcePolyglotLanguageContext fileSystemConfig = creatorConfig.fileSystemConfig; } else { FileSystem publicFileSystem = FileSystems.newNoIOFileSystem(); - FileSystem internalFileSystem = PolyglotEngineImpl.ALLOW_IO ? FileSystems.newLanguageHomeFileSystem() : publicFileSystem; + FileSystem internalFileSystem = PolyglotEngineImpl.ALLOW_IO ? FileSystems.newResourcesFileSystem() : publicFileSystem; fileSystemConfig = new FileSystemConfig(api.getIOAccessNone(), publicFileSystem, internalFileSystem); } @@ -1270,8 +1268,6 @@ public boolean isInternal(Object engineObject, FileSystem fs) { public boolean isSocketIOAllowed(Object engineFileSystemContext) { if (engineFileSystemContext instanceof PolyglotLanguageContext languageContext) { return languageContext.getImpl().getIO().hasHostSocketAccess(languageContext.context.config.fileSystemConfig.ioAccess); - } else if (engineFileSystemContext instanceof PolyglotEngineImpl) { - return false; } else if (engineFileSystemContext instanceof EmbedderFileSystemContext) { return true; } else { @@ -1532,25 +1528,8 @@ public String getUnparsedOptionValue(OptionValues optionValues, OptionKey opt } @Override - public String getRelativePathInLanguageHome(TruffleFile truffleFile) { - return FileSystems.getRelativePathInLanguageHome(truffleFile); - } - - @Override - public TruffleFile relativizeToInternalResourceCache(TruffleFile truffleFile) { - FileSystem fs = LANGUAGE.getFileSystem(truffleFile); - if (FileSystems.isInternalResourceFileSystem(fs)) { - if (truffleFile.isAbsolute()) { - Path root = fs.parsePath(FileSystems.getInternalResourceFileSystemRoot(fs).get().toString()); - Path path = LANGUAGE.getPath(truffleFile); - if (path.startsWith(root)) { - return LANGUAGE.getTruffleFile(root.relativize(path), LANGUAGE.getFileSystemContext(truffleFile)); - } - } else { - return truffleFile; - } - } - return null; + public String getRelativePathInResourceRoot(TruffleFile truffleFile) { + return FileSystems.getRelativePathInResourceRoot(truffleFile); } @Override @@ -2077,28 +2056,22 @@ public TruffleFile getInternalResource(Object owner, String resourceId) throws I } private static TruffleFile getInternalResource(Object owner, String resourceId, boolean failIfMissing) throws IOException { - Map cachedRoots; InternalResourceCache resourceCache; String componentId; Supplier> supportedResourceIds; - if (owner instanceof PolyglotLanguageContext languageContext) { - PolyglotLanguage polyglotLanguage = languageContext.language; - cachedRoots = polyglotLanguage.internalResources; - LanguageCache cache = polyglotLanguage.cache; + PolyglotLanguageContext languageContext; + if (owner instanceof PolyglotLanguageContext) { + languageContext = (PolyglotLanguageContext) owner; + LanguageCache cache = languageContext.language.cache; resourceCache = cache.getResourceCache(resourceId); componentId = cache.getId(); supportedResourceIds = cache::getResourceIds; } else if (owner instanceof PolyglotInstrument polyglotInstrument) { - cachedRoots = polyglotInstrument.internalResources; InstrumentCache cache = polyglotInstrument.cache; resourceCache = cache.getResourceCache(resourceId); componentId = cache.getId(); supportedResourceIds = cache::getResourceIds; - } else if (owner instanceof PolyglotEngineImpl) { - cachedRoots = null; - resourceCache = InternalResourceCache.getEngineResource(resourceId); - componentId = PolyglotEngineImpl.ENGINE_ID; - supportedResourceIds = InternalResourceCache::getEngineResourceIds; + languageContext = getPolyglotContext(null).getHostContext(); } else { throw CompilerDirectives.shouldNotReachHere("Unsupported owner " + owner); } @@ -2110,17 +2083,18 @@ private static TruffleFile getInternalResource(Object owner, String resourceId, return null; } } - TruffleFile root = cachedRoots != null ? cachedRoots.get(resourceId) : null; - if (root == null) { - PolyglotEngineImpl polyglotEngine = ((VMObject) owner).getEngine(); - Object fsContext = EngineAccessor.LANGUAGE.createFileSystemContext(polyglotEngine, resourceCache.getResourceFileSystem(polyglotEngine)); - root = EngineAccessor.LANGUAGE.getTruffleFile(".", fsContext); - if (cachedRoots != null) { - var prevValue = cachedRoots.putIfAbsent(resourceId, root); - root = prevValue != null ? prevValue : root; - } + Path rootPath = resourceCache.getPath(languageContext.getEngine()); + return EngineAccessor.LANGUAGE.getTruffleFile(rootPath.toString(), languageContext.getInternalFileSystemContext()); + } + + @Override + public Path getEngineResource(Object polyglotEngine, String resourceId) throws IOException { + InternalResourceCache resourceCache = InternalResourceCache.getEngineResource(resourceId); + if (resourceCache != null) { + return resourceCache.getPath((PolyglotEngineImpl) polyglotEngine); + } else { + return null; } - return root; } @Override diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java index aeaaab66ba15..ae70ea767f62 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java @@ -104,8 +104,8 @@ static FileSystem newNIOFileSystem(java.nio.file.FileSystem fileSystem) { return new NIOFileSystem(fileSystem, null, false); } - static FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return new LanguageHomeFileSystem(newDefaultFileSystem(null), fileSystem); + static FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return new ResourcesFileSystem(newDefaultFileSystem(null), fileSystem); } static FileSystem newReadOnlyFileSystem(FileSystem fileSystem) { @@ -116,9 +116,9 @@ static FileSystem newNoIOFileSystem() { return new DeniedIOFileSystem(); } - static FileSystem newLanguageHomeFileSystem() { + static FileSystem newResourcesFileSystem() { FileSystem defaultFS = newDefaultFileSystem(null); - return new LanguageHomeFileSystem(new ReadOnlyFileSystem(defaultFS), new PathOperationsOnlyFileSystem(defaultFS)); + return new ResourcesFileSystem(new ReadOnlyFileSystem(defaultFS), new PathOperationsOnlyFileSystem(defaultFS)); } static boolean hasNoAccess(FileSystem fileSystem) { @@ -137,11 +137,18 @@ static Supplier>> return new FileTypeDetectorsSupplier(languageCaches); } - static String getRelativePathInLanguageHome(TruffleFile file) { + static String getRelativePathInResourceRoot(TruffleFile file) { Object engineObject = EngineAccessor.LANGUAGE.getFileSystemEngineObject(EngineAccessor.LANGUAGE.getFileSystemContext(file)); if (engineObject instanceof PolyglotLanguageContext languageContext) { - FileSystem fs = EngineAccessor.LANGUAGE.getFileSystem(file); Path path = EngineAccessor.LANGUAGE.getPath(file); + if (InternalResourceCache.usesInternalResources()) { + Path hostPath = toHostPath(path); + InternalResourceCache cache = InternalResourceRoots.findInternalResource(hostPath); + if (cache != null) { + return cache.getPathOrNull().relativize(hostPath).toString(); + } + } + FileSystem fs = EngineAccessor.LANGUAGE.getFileSystem(file); String result = relativizeToLanguageHome(fs, path, languageContext.language); if (result != null) { return result; @@ -149,7 +156,7 @@ static String getRelativePathInLanguageHome(TruffleFile file) { Map accessibleLanguages = languageContext.getAccessibleLanguages(true); /* * The accessibleLanguages is null for a closed context. The - * getRelativePathInLanguageHome may be called even for closed context by the compiler + * getRelativePathInResourceRoot may be called even for closed context by the compiler * thread. */ if (accessibleLanguages != null) { @@ -162,9 +169,6 @@ static String getRelativePathInLanguageHome(TruffleFile file) { } } return null; - } else if (engineObject instanceof PolyglotEngineImpl) { - // instrument internal resources are never relative to language homes - return null; } else if (engineObject instanceof EmbedderFileSystemContext) { // embedding sources are never relative to language homes return null; @@ -173,19 +177,11 @@ static String getRelativePathInLanguageHome(TruffleFile file) { } } - static FileSystem newInternalResourceFileSystem(Supplier rootSupplier) { - return newReadOnlyFileSystem(new InternalResourceFileSystem(rootSupplier)); - } - - static boolean isInternalResourceFileSystem(FileSystem fileSystem) { - return (fileSystem instanceof ReadOnlyFileSystem readOnlyFileSystem) && readOnlyFileSystem.delegateFileSystem instanceof InternalResourceFileSystem; - } - - static Supplier getInternalResourceFileSystemRoot(FileSystem fileSystem) { - if (isInternalResourceFileSystem(fileSystem)) { - return ((InternalResourceFileSystem) ((ReadOnlyFileSystem) fileSystem).delegateFileSystem).rootSupplier; + private static Path toHostPath(Path path) { + if (path.getClass() != Path.of("").getClass()) { + return Paths.get(path.toString()); } else { - throw new IllegalArgumentException(Objects.toString(fileSystem)); + return path; } } @@ -678,17 +674,7 @@ private Path resolve(FileSystem fs) { if (current instanceof Path) { return (Path) current; } else if (current instanceof ImageHeapPath) { - ImageHeapPath imageHeapPath = (ImageHeapPath) current; - String languageId = imageHeapPath.languageId; - String path = imageHeapPath.path; - Path result; - String newLanguageHome; - if (languageId != null && (newLanguageHome = LanguageCache.languages().get(languageId).getLanguageHome()) != null) { - result = fs.parsePath(newLanguageHome).resolve(path); - } else { - result = fs.parsePath(path); - } - return result; + return ((ImageHeapPath) current).resolve(fs); } else { throw new IllegalStateException("Unknown delegate " + current); } @@ -696,15 +682,25 @@ private Path resolve(FileSystem fs) { void onPreInitializeContextEnd(Map languageHomes) { Path internalPath = (Path) delegatePath; - String languageId = null; - for (Map.Entry e : languageHomes.entrySet()) { - if (internalPath.startsWith(e.getValue())) { - internalPath = e.getValue().relativize(internalPath); - languageId = e.getKey(); - break; + ImageHeapPath result = null; + InternalResourceCache owner = InternalResourceRoots.findInternalResource(internalPath); + if (owner != null) { + String relativePath = owner.getPathOrNull().relativize(internalPath).toString(); + result = new InternalResourceImageHeapPath(owner, relativePath); + } + if (result == null) { + for (Map.Entry e : languageHomes.entrySet()) { + if (internalPath.startsWith(e.getValue())) { + String languageId = e.getKey(); + String relativePath = e.getValue().relativize(internalPath).toString(); + result = new LanguageHomeImageHeapPath(languageId, relativePath); + } } } - delegatePath = new ImageHeapPath(languageId, internalPath.toString(), internalPath.isAbsolute()); + if (result == null) { + result = new PathImageHeapPath(internalPath.toString(), internalPath.isAbsolute()); + } + delegatePath = result; } @Override @@ -726,9 +722,7 @@ public String toString() { // TruffleFiles created during context pre-initialization. if (delegate == INVALID_FILESYSTEM) { ImageHeapPath imageHeapPath = (ImageHeapPath) delegatePath; - if (imageHeapPath.languageId != null) { - throw new UnsupportedOperationException("ToString in the image heap form is supported only for files outside language homes."); - } + assert imageHeapPath instanceof PathImageHeapPath : "ToString can be called only for non internal resource files located outside of language homes."; return imageHeapPath.path; } else { return super.toString(); @@ -758,10 +752,61 @@ public URI getReinitializedURI() { } } - private record ImageHeapPath(String languageId, String path, boolean absolute) { + private abstract static class ImageHeapPath { + + final String path; + final boolean absolute; + + ImageHeapPath(String path, boolean absolute) { + this.path = Objects.requireNonNull(path, "Path must be non-null"); + this.absolute = absolute; + } + + abstract Path resolve(FileSystem fileSystem); + + } + + private static final class LanguageHomeImageHeapPath extends ImageHeapPath { + + private final String languageId; + + LanguageHomeImageHeapPath(String languageId, String path) { + super(path, false); + this.languageId = Objects.requireNonNull(languageId, "LanguageId must be non-null"); + } + + @Override + Path resolve(FileSystem fileSystem) { + String newLanguageHome = LanguageCache.languages().get(languageId).getLanguageHome(); + assert newLanguageHome != null : "Pre-initialized language " + languageId + " must exist in the image execution time."; + return fileSystem.parsePath(newLanguageHome).resolve(path); + } + } + + private static final class InternalResourceImageHeapPath extends ImageHeapPath { + + private final InternalResourceCache cache; - private ImageHeapPath { - assert path != null; + InternalResourceImageHeapPath(InternalResourceCache cache, String path) { + super(path, false); + this.cache = cache; + } + + @Override + Path resolve(FileSystem fileSystem) { + return fileSystem.parsePath(cache.getPathOrNull().toString()).resolve(path); + } + } + + private static final class PathImageHeapPath extends ImageHeapPath { + + PathImageHeapPath(String path, boolean absolute) { + super(path, absolute); + } + + @Override + Path resolve(FileSystem fileSystem) { + return fileSystem.parsePath(path); } } } @@ -1183,24 +1228,24 @@ public boolean isSameFile(Path path1, Path path2, LinkOption... options) throws } } - private static final class LanguageHomeFileSystem implements PolyglotFileSystem { + private static final class ResourcesFileSystem implements PolyglotFileSystem { - private final FileSystem languageHomeFileSystem; + private final FileSystem resourcesFileSystem; private final FileSystem delegateFileSystem; private volatile Set languageHomes; - LanguageHomeFileSystem(FileSystem languageHomeFileSystem, FileSystem delegateFileSystem) { - this.languageHomeFileSystem = languageHomeFileSystem; + ResourcesFileSystem(FileSystem resourcesFileSystem, FileSystem delegateFileSystem) { + this.resourcesFileSystem = resourcesFileSystem; this.delegateFileSystem = delegateFileSystem; - Class languageHomeFileSystemPathType = this.languageHomeFileSystem.parsePath("").getClass(); + Class resourcesFileSystemPathType = this.resourcesFileSystem.parsePath("").getClass(); Class customFileSystemPathType = delegateFileSystem.parsePath("").getClass(); - if (languageHomeFileSystemPathType != customFileSystemPathType) { + if (resourcesFileSystemPathType != customFileSystemPathType) { throw new IllegalArgumentException("Given FileSystem must have the same Path type as the default FileSystem."); } - if (!languageHomeFileSystem.getSeparator().equals(delegateFileSystem.getSeparator())) { + if (!resourcesFileSystem.getSeparator().equals(delegateFileSystem.getSeparator())) { throw new IllegalArgumentException("Given FileSystem must use the same separator character as the default FileSystem."); } - if (!languageHomeFileSystem.getPathSeparator().equals(delegateFileSystem.getPathSeparator())) { + if (!resourcesFileSystem.getPathSeparator().equals(delegateFileSystem.getPathSeparator())) { throw new IllegalArgumentException("Given FileSystem must use the same path separator character as the default FileSystem."); } } @@ -1233,8 +1278,8 @@ public Path parsePath(String path) { @Override public void checkAccess(Path path, Set modes, LinkOption... linkOptions) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.checkAccess(absolutePath, modes, linkOptions); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.checkAccess(absolutePath, modes, linkOptions); } else { delegateFileSystem.checkAccess(path, modes, linkOptions); } @@ -1243,8 +1288,8 @@ public void checkAccess(Path path, Set modes, LinkOption.. @Override public void createDirectory(Path dir, FileAttribute... attrs) throws IOException { Path absolutePath = toNormalizedAbsolutePath(dir); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.createDirectory(absolutePath, attrs); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.createDirectory(absolutePath, attrs); } else { delegateFileSystem.createDirectory(dir, attrs); } @@ -1253,8 +1298,8 @@ public void createDirectory(Path dir, FileAttribute... attrs) throws IOExcept @Override public void delete(Path path) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.delete(absolutePath); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.delete(absolutePath); } else { delegateFileSystem.delete(path); } @@ -1263,8 +1308,8 @@ public void delete(Path path) throws IOException { @Override public SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.newByteChannel(absolutePath, options, attrs); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.newByteChannel(absolutePath, options, attrs); } else { return delegateFileSystem.newByteChannel(path, options, attrs); } @@ -1273,8 +1318,8 @@ public SeekableByteChannel newByteChannel(Path path, Set o @Override public DirectoryStream newDirectoryStream(Path dir, DirectoryStream.Filter filter) throws IOException { Path absolutePath = toNormalizedAbsolutePath(dir); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.newDirectoryStream(absolutePath, filter); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.newDirectoryStream(absolutePath, filter); } else { return delegateFileSystem.newDirectoryStream(dir, filter); } @@ -1288,8 +1333,8 @@ public Path toAbsolutePath(Path path) { @Override public Path toRealPath(Path path, LinkOption... linkOptions) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.toRealPath(path); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.toRealPath(path); } else { return delegateFileSystem.toRealPath(path); } @@ -1298,8 +1343,8 @@ public Path toRealPath(Path path, LinkOption... linkOptions) throws IOException @Override public Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.readAttributes(absolutePath, attributes, options); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.readAttributes(absolutePath, attributes, options); } else { return delegateFileSystem.readAttributes(path, attributes, options); } @@ -1308,8 +1353,8 @@ public Map readAttributes(Path path, String attributes, LinkOpti @Override public void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.setAttribute(absolutePath, attribute, value, options); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.setAttribute(absolutePath, attribute, value, options); } else { delegateFileSystem.setAttribute(path, attribute, value, options); } @@ -1319,10 +1364,10 @@ public void setAttribute(Path path, String attribute, Object value, LinkOption.. public void createLink(Path link, Path existing) throws IOException { Path absoluteLink = toNormalizedAbsolutePath(link); Path absoluteExisting = toNormalizedAbsolutePath(existing); - boolean linkInHome = inLanguageHome(absoluteLink); - boolean existingInHome = inLanguageHome(absoluteExisting); + boolean linkInHome = inResourceRoot(absoluteLink); + boolean existingInHome = inResourceRoot(absoluteExisting); if (linkInHome && existingInHome) { - languageHomeFileSystem.createLink(absoluteLink, absoluteExisting); + resourcesFileSystem.createLink(absoluteLink, absoluteExisting); } else if (!linkInHome && !existingInHome) { delegateFileSystem.createLink(link, existing); } else { @@ -1334,10 +1379,10 @@ public void createLink(Path link, Path existing) throws IOException { public void createSymbolicLink(Path link, Path target, FileAttribute... attrs) throws IOException { Path absoluteLink = toNormalizedAbsolutePath(link); Path absoluteTarget = toNormalizedAbsolutePath(target); - boolean linkInHome = inLanguageHome(absoluteLink); - boolean targetInHome = inLanguageHome(absoluteTarget); + boolean linkInHome = inResourceRoot(absoluteLink); + boolean targetInHome = inResourceRoot(absoluteTarget); if (linkInHome && targetInHome) { - languageHomeFileSystem.createSymbolicLink(absoluteLink, target); + resourcesFileSystem.createSymbolicLink(absoluteLink, target); } else if (!linkInHome && !targetInHome) { delegateFileSystem.createSymbolicLink(link, target); } else { @@ -1348,8 +1393,8 @@ public void createSymbolicLink(Path link, Path target, FileAttribute... attrs @Override public Path readSymbolicLink(Path link) throws IOException { Path absolutePath = toNormalizedAbsolutePath(link); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.readSymbolicLink(absolutePath); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.readSymbolicLink(absolutePath); } else { return delegateFileSystem.readSymbolicLink(link); } @@ -1357,7 +1402,7 @@ public Path readSymbolicLink(Path link) throws IOException { @Override public void setCurrentWorkingDirectory(Path currentWorkingDirectory) { - languageHomeFileSystem.setCurrentWorkingDirectory(currentWorkingDirectory); + resourcesFileSystem.setCurrentWorkingDirectory(currentWorkingDirectory); delegateFileSystem.setCurrentWorkingDirectory(currentWorkingDirectory); } @@ -1374,8 +1419,8 @@ public String getPathSeparator() { @Override public String getMimeType(Path path) { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.getMimeType(absolutePath); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.getMimeType(absolutePath); } else { return delegateFileSystem.getMimeType(path); } @@ -1384,8 +1429,8 @@ public String getMimeType(Path path) { @Override public Charset getEncoding(Path path) { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.getEncoding(absolutePath); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.getEncoding(absolutePath); } else { return delegateFileSystem.getEncoding(path); } @@ -1400,10 +1445,10 @@ public Path getTempDirectory() { public boolean isSameFile(Path path1, Path path2, LinkOption... options) throws IOException { Path absolutePath1 = toNormalizedAbsolutePath(path1); Path absolutePath2 = toNormalizedAbsolutePath(path2); - boolean path1InHome = inLanguageHome(absolutePath1); - boolean path2InHome = inLanguageHome(absolutePath2); + boolean path1InHome = inResourceRoot(absolutePath1); + boolean path2InHome = inResourceRoot(absolutePath2); if (path1InHome && path2InHome) { - return languageHomeFileSystem.isSameFile(absolutePath1, absolutePath2, options); + return resourcesFileSystem.isSameFile(absolutePath1, absolutePath2, options); } else if (!path1InHome && !path2InHome) { return delegateFileSystem.isSameFile(path1, path2); } else { @@ -1415,7 +1460,7 @@ private Path toNormalizedAbsolutePath(Path path) { if (path.isAbsolute()) { return path; } - Path absolutePath = languageHomeFileSystem.toAbsolutePath(path); + Path absolutePath = resourcesFileSystem.toAbsolutePath(path); if (isNormalized(path)) { return absolutePath; } else { @@ -1441,10 +1486,13 @@ private static boolean isNormalized(Path path) { return true; } - private boolean inLanguageHome(final Path path) { + private boolean inResourceRoot(final Path path) { if (!(path.isAbsolute() && isNormalized(path))) { throw new IllegalArgumentException("The path must be normalized absolute path."); } + if (InternalResourceRoots.findRoot(path) != null) { + return true; + } for (Path home : getLanguageHomes()) { if (path.startsWith(home)) { return true; @@ -1776,249 +1824,6 @@ public Map> get() { } } - private static final class InternalResourceFileSystem implements PolyglotFileSystem { - - private final FileSystem delegate; - private final Supplier rootSupplier; - - InternalResourceFileSystem(Supplier rootSupplier) { - Objects.requireNonNull(rootSupplier, "The rootSupplier must be non-null."); - this.delegate = newDefaultFileSystem(null); - this.rootSupplier = rootSupplier; - } - - @Override - public Path parsePath(URI uri) { - throw new UnsupportedOperationException(); - } - - @Override - public Path parsePath(String path) { - return new InternalResourcePath(delegate.parsePath(path)); - } - - @Override - public void checkAccess(Path path, Set modes, LinkOption... linkOptions) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - delegate.checkAccess(normalized, modes, linkOptions); - } - - @Override - public void createDirectory(Path dir, FileAttribute... attrs) throws IOException { - Path normalized = InternalResourcePath.as(dir).resolveDelegateAbsolutePath(); - delegate.createDirectory(normalized, attrs); - } - - @Override - public void delete(Path path) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - delegate.delete(normalized); - } - - @Override - public SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - return delegate.newByteChannel(normalized, options, attrs); - } - - @Override - public DirectoryStream newDirectoryStream(Path dir, DirectoryStream.Filter filter) throws IOException { - InternalResourcePath castedPath = InternalResourcePath.as(dir); - Path normalized = castedPath.resolveDelegateAbsolutePath(); - DirectoryStream delegateStream = delegate.newDirectoryStream(normalized, filter); - return new DirectoryStream<>() { - @Override - public Iterator iterator() { - return new ForwardingPath.ForwardingPathIterator<>(delegateStream.iterator(), castedPath::wrap); - } - - @Override - public void close() throws IOException { - delegateStream.close(); - } - }; - } - - @Override - public Path toAbsolutePath(Path path) { - return path.toAbsolutePath(); - } - - @Override - public Path toRealPath(Path path, LinkOption... linkOptions) throws IOException { - return path.toRealPath(linkOptions); - } - - @Override - public Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - return delegate.readAttributes(normalized, attributes, options); - } - - @Override - public void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - delegate.setAttribute(normalized, attribute, value, options); - } - - @Override - public void createLink(Path link, Path existing) throws IOException { - Path normalizedLink = InternalResourcePath.as(link).resolveDelegateAbsolutePath(); - Path normalizedExisting = InternalResourcePath.as(existing).resolveDelegateAbsolutePath(); - delegate.createLink(normalizedLink, normalizedExisting); - } - - @Override - public void createSymbolicLink(Path link, Path target, FileAttribute... attrs) throws IOException { - Path normalizedLink = InternalResourcePath.as(link).resolveDelegateAbsolutePath(); - Path normalizedTarget = InternalResourcePath.as(target).resolveDelegateAbsolutePath(); - delegate.createSymbolicLink(normalizedLink, normalizedTarget, attrs); - } - - @Override - public Path readSymbolicLink(Path link) throws IOException { - InternalResourcePath castedPath = InternalResourcePath.as(link); - Path normalizedLink = castedPath.resolveDelegateAbsolutePath(); - InternalResourcePath result = castedPath.wrap(delegate.readSymbolicLink(normalizedLink)); - // Ensure that the link does not point outside the internal resource root. - result.resolveDelegateAbsolutePath(); - return result; - } - - @Override - public String getSeparator() { - return delegate.getSeparator(); - } - - @Override - public String getPathSeparator() { - return delegate.getPathSeparator(); - } - - @Override - public boolean isSameFile(Path path1, Path path2, LinkOption... options) { - Path normalized1 = InternalResourcePath.as(path1).resolveDelegateAbsolutePath(); - Path normalized2 = InternalResourcePath.as(path2).resolveDelegateAbsolutePath(); - return normalized1.equals(normalized2); - } - - @Override - public boolean isInternal(AbstractPolyglotImpl polyglot) { - return true; - } - - @Override - public boolean hasNoAccess() { - return false; - } - - @Override - public boolean isHost() { - return false; - } - - private final class InternalResourcePath extends ForwardingPath implements ResetablePath { - - private final Path delegate; - - private InternalResourcePath(Path delegate) { - this.delegate = delegate; - } - - @Override - InternalResourcePath wrap(Path path) { - return path == null ? null : new InternalResourcePath(path); - } - - @Override - Path unwrap() { - return delegate; - } - - static InternalResourcePath as(Path path) { - return (InternalResourcePath) path; - } - - @Override - public Path resolve(Path other) { - if (isRelativeResourceRoot()) { - return other; - } else { - return super.resolve(other); - } - } - - @Override - public Path resolve(String other) { - if (isRelativeResourceRoot()) { - return wrap(delegate.getFileSystem().getPath(other)); - } else { - return super.resolve(other); - } - } - - @Override - public Path toAbsolutePath() { - if (isAbsolute()) { - return this; - } else { - Path root = rootSupplier.get(); - Path resolvedAbsolute = isRelativeResourceRoot() ? root : root.resolve(delegate); - return wrap(resolvedAbsolute); - } - } - - @Override - public Path toRealPath(LinkOption... options) throws IOException { - return wrap(resolveDelegateAbsolutePath().toRealPath(options)); - } - - @Override - public URI toUri() { - if (delegate.isAbsolute()) { - return super.toUri(); - } else { - return toAbsolutePath().toUri(); - } - } - - /** - * Returns the absolute normalized default file system path. If the path after - * normalization escaped the internal resource root it throws {@link SecurityException}. - */ - Path resolveDelegateAbsolutePath() { - Path root = rootSupplier.get(); - Path absolutePath = delegate.isAbsolute() ? delegate : root.resolve(delegate); - absolutePath = absolutePath.normalize(); - if (!absolutePath.startsWith(root)) { - throw new SecurityException(delegate.toString()); - } - return absolutePath; - } - - boolean isRelativeResourceRoot() { - if (!delegate.isAbsolute() && delegate.getNameCount() == 1) { - Path name = delegate.getFileName(); - if (name == null) { - throw CompilerDirectives.shouldNotReachHere("Path has a name component but has no file name " + delegate); - } - return ".".equals(name.toString()); - } - return false; - } - - @Override - public String getReinitializedPath() { - return toAbsolutePath().toString(); - } - - @Override - public URI getReinitializedURI() { - return toUri(); - } - } - } - private interface PolyglotFileSystem extends FileSystem { boolean isInternal(AbstractPolyglotImpl polyglot); diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java index 2b0f3d8b2e4d..b83588d19099 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java @@ -294,6 +294,10 @@ Collection getResourceIds() { return internalResources.keySet(); } + Collection getResources() { + return internalResources.values(); + } + String getWebsite() { return website; } diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java index d386ba36be4a..b3a22bbc4cdf 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java @@ -46,27 +46,21 @@ import com.oracle.truffle.api.TruffleOptions; import com.oracle.truffle.api.provider.InternalResourceProvider; import com.oracle.truffle.polyglot.EngineAccessor.AbstractClassLoaderSupplier; -import org.graalvm.collections.Pair; import org.graalvm.nativeimage.ImageInfo; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.IsolateThread; -import org.graalvm.nativeimage.ProcessProperties; import org.graalvm.nativeimage.c.function.CEntryPoint; import org.graalvm.nativeimage.c.function.CEntryPointLiteral; import org.graalvm.nativeimage.c.function.CFunctionPointer; -import org.graalvm.polyglot.io.FileSystem; -import java.io.IOError; import java.io.IOException; -import java.io.PrintStream; import java.lang.reflect.Constructor; import java.net.URL; import java.nio.file.DirectoryStream; import java.nio.file.FileAlreadyExistsException; +import java.nio.file.FileSystemException; import java.nio.file.Files; -import java.nio.file.InvalidPathException; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.security.CodeSource; import java.util.Collection; @@ -79,8 +73,6 @@ import java.util.ServiceLoader; import java.util.Set; import java.util.TreeSet; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import java.util.function.BooleanSupplier; import java.util.function.Function; import java.util.function.Supplier; @@ -91,20 +83,30 @@ final class InternalResourceCache { private static final char[] FILE_SYSTEM_SPECIAL_CHARACTERS = {'/', '\\', ':'}; - private static final String OVERRIDDEN_CACHE_ROOT = "polyglot.engine.resourcePath"; - private static final String OVERRIDDEN_COMPONENT_ROOT = "polyglot.engine.resourcePath.%s"; - private static final String OVERRIDDEN_RESOURCE_ROOT = "polyglot.engine.resourcePath.%s.%s"; - - private static final Lock unpackLock = new ReentrantLock(); - private static final Map, Map>>> optionalInternalResourcesCaches = new HashMap<>(); private static final Map>> nativeImageCache = TruffleOptions.AOT ? new HashMap<>() : null; - private static volatile Pair cacheRoot; + + /** + * Recomputed before the analyses by a substitution in the {@code TruffleBaseFeature} based on + * the {@code CopyLanguageResources} option value. The field must not be declared as + * {@code final} to make the substitution function correctly. + */ + private static boolean useInternalResources = true; private final String id; private final String resourceId; private final Supplier resourceFactory; - private volatile FileSystem resourceFileSystem; + + /** + * This field is reset to {@code null} by the {@code TruffleBaseFeature} before writing the + * native image heap. + */ + private InternalResourceRoots.Root owningRoot; + /** + * This field is reset to {@code null} by the {@code TruffleBaseFeature} before writing the + * native image heap. + */ + private volatile Path path; InternalResourceCache(String languageId, String resourceId, Supplier resourceFactory) { this.id = Objects.requireNonNull(languageId); @@ -112,8 +114,50 @@ final class InternalResourceCache { this.resourceFactory = Objects.requireNonNull(resourceFactory); } - FileSystem getResourceFileSystem(PolyglotEngineImpl polyglotEngine) throws IOException { - return getResourceFileSystemImpl((resource) -> EngineAccessor.LANGUAGE.createInternalResourceEnv(resource, () -> polyglotEngine.inEnginePreInitialization)); + String getResourceId() { + return resourceId; + } + + Path getPathOrNull() { + return path; + } + + Path getPath(PolyglotEngineImpl polyglotEngine) throws IOException { + if (usesInternalResources()) { + Path result = path; + if (result == null) { + synchronized (this) { + result = path; + if (result == null) { + result = installResource((resource) -> EngineAccessor.LANGUAGE.createInternalResourceEnv(resource, () -> polyglotEngine.inEnginePreInitialization)); + path = result; + } + } + } + return result; + } else { + throw new IllegalArgumentException("Internal resources are restricted. To enable them, use '-H:+CopyLanguageResources' during the native image build."); + } + } + + void initializeOwningRoot(InternalResourceRoots.Root root) { + assert owningRoot == null; + assert path == null; + owningRoot = root; + path = switch (root.kind()) { + case RESOURCE -> root.path(); + case COMPONENT -> root.path().resolve(sanitize(resourceId)); + case UNVERSIONED -> findStandaloneResourceRoot(root.path()); + case VERSIONED -> null; + }; + } + + /** + * Resets state for unit test execution. This method is intended only for testing. + */ + void clearCache() { + owningRoot = null; + path = null; } /** @@ -125,13 +169,17 @@ FileSystem getResourceFileSystem(PolyglotEngineImpl polyglotEngine) throws IOExc */ static Path installRuntimeResource(InternalResource resource) throws IOException { InternalResourceCache cache = createRuntimeResourceCache(resource); - return cache.getResourceFileSystemImpl(InternalResourceCache::createInternalResourceEnvReflectively).parsePath("").toAbsolutePath(); + synchronized (cache) { + return cache.installResource(InternalResourceCache::createInternalResourceEnvReflectively); + } } private static InternalResourceCache createRuntimeResourceCache(InternalResource resource) { InternalResource.Id id = resource.getClass().getAnnotation(InternalResource.Id.class); assert id != null : resource.getClass() + " must be annotated by @InternalResource.Id"; - return new InternalResourceCache(PolyglotEngineImpl.ENGINE_ID, id.value(), () -> resource); + InternalResourceCache cache = new InternalResourceCache(PolyglotEngineImpl.ENGINE_ID, id.value(), () -> resource); + InternalResourceRoots.initializeRuntimeResource(cache); + return cache; } private static InternalResource.Env createInternalResourceEnvReflectively(InternalResource resource) { @@ -144,62 +192,43 @@ private static InternalResource.Env createInternalResourceEnvReflectively(Intern } } - private FileSystem getResourceFileSystemImpl(Function createEnv) throws IOException { - FileSystem result = resourceFileSystem; - if (result == null) { - synchronized (this) { - result = resourceFileSystem; - if (result == null) { - Path root = findOverriddenResourceRoot(); - if (root == null) { - if (hasExplicitCacheRoot()) { - root = findStandaloneResourceRoot(getExplicitCacheRoot()); - } else if (ImageInfo.inImageRuntimeCode()) { - root = findStandaloneResourceRoot(findCacheRootOnNativeImage()); - } else { - InternalResource resource = resourceFactory.get(); - InternalResource.Env env = createEnv.apply(resource); - String versionHash = resource.versionHash(env); - if (versionHash.getBytes().length > 128) { - throw new IOException("The version hash length is restricted to a maximum of 128 bytes."); - } - root = findCacheRootOnHotSpot().resolve(Path.of(sanitize(id), sanitize(resourceId), sanitize(versionHash))); - unpackResourceFiles(root, resource, env); - } - } - ResettableCachedRoot rootSupplier = new ResettableCachedRoot(root); - result = FileSystems.newInternalResourceFileSystem(rootSupplier); - resourceFileSystem = result; - } + private Path installResource(Function resourceEnvProvider) throws IOException { + Objects.requireNonNull(resourceEnvProvider, "ResourceEnvProvider must be non-null."); + assert Thread.holdsLock(this) : "Unpacking must be called under lock"; + assert owningRoot.kind() == InternalResourceRoots.Root.Kind.VERSIONED; + assert !ImageInfo.inImageRuntimeCode() : "Must not be called in the image execution time."; + InternalResource resource = resourceFactory.get(); + InternalResource.Env env = resourceEnvProvider.apply(resource); + String versionHash = resource.versionHash(env); + if (versionHash.getBytes().length > 128) { + throw new IOException("The version hash length is restricted to a maximum of 128 bytes."); + } + Path target = owningRoot.path().resolve(Path.of(sanitize(id), sanitize(resourceId), sanitize(versionHash))); + if (!Files.exists(target)) { + Path parent = target.getParent(); + if (parent == null) { + throw CompilerDirectives.shouldNotReachHere("Target must have a parent directory but was " + target); } - } - return result; - } - - private static void unpackResourceFiles(Path target, InternalResource resource, InternalResource.Env env) throws IOException { - unpackLock.lock(); - try { - if (!Files.exists(target)) { - Path parent = target.getParent(); - if (parent == null) { - throw CompilerDirectives.shouldNotReachHere("Target must have a parent directory but was " + target); - } - Path owner = Files.createDirectories(Objects.requireNonNull(parent)); - Path tmpDir = Files.createTempDirectory(owner, null); - resource.unpackFiles(env, tmpDir); - try { - Files.move(tmpDir, target, StandardCopyOption.ATOMIC_MOVE); - } catch (FileAlreadyExistsException existsException) { - // race with other process that already moved the folder just unlink the tmp - // directory + Path owner = Files.createDirectories(Objects.requireNonNull(parent)); + Path tmpDir = Files.createTempDirectory(owner, null); + resource.unpackFiles(env, tmpDir); + try { + Files.move(tmpDir, target, StandardCopyOption.ATOMIC_MOVE); + } catch (FileAlreadyExistsException existsException) { + // race with other process that already moved the folder just unlink the tmp + // directory + unlink(tmpDir); + } catch (FileSystemException fsException) { + // On some filesystem implementations, the generic FileSystemException is thrown + // instead of FileAlreadyExistsException. We need to check if this is the case. + if (Files.isDirectory(target)) { unlink(tmpDir); } - } else { - verifyResourceRoot(target); } - } finally { - unpackLock.unlock(); + } else { + verifyResourceRoot(target); } + return target; } private static void verifyResourceRoot(Path resourceRoot) throws IOException { @@ -215,18 +244,6 @@ private Path findStandaloneResourceRoot(Path root) { return root.resolve(Path.of(sanitize(id), sanitize(resourceId))); } - private Path findOverriddenResourceRoot() throws IOException { - String value = System.getProperty(String.format(OVERRIDDEN_RESOURCE_ROOT, id, resourceId)); - if (value != null) { - return Paths.get(value).toRealPath(); - } - value = System.getProperty(String.format(OVERRIDDEN_COMPONENT_ROOT, id)); - if (value != null) { - return Paths.get(value).resolve(sanitize(resourceId)).toRealPath(); - } - return null; - } - private static String sanitize(String pathElement) { String result = pathElement; for (char fileSystemsSpecialChar : FILE_SYSTEM_SPECIAL_CHARACTERS) { @@ -235,108 +252,13 @@ private static String sanitize(String pathElement) { return result; } - private static boolean hasExplicitCacheRoot() throws IOException { - Pair res = cacheRoot; - if (res == null) { - String resourcesFolder = System.getProperty(OVERRIDDEN_CACHE_ROOT); - if (resourcesFolder != null) { - Path cache = Paths.get(resourcesFolder).toRealPath(); - res = Pair.create(cache, true); - cacheRoot = res; - } - } - return res != null && res.getRight(); - } - - private static Path getExplicitCacheRoot() { - Pair res = cacheRoot; - if (res == null || !res.getRight()) { - throw CompilerDirectives.shouldNotReachHere("Can be only called when hasExplicitCacheRoot() returned true"); - } - return res.getLeft(); - } - - private static Path findCacheRootOnHotSpot() throws IOException { - Pair res = cacheRoot; - if (res == null) { - String userHomeValue = System.getProperty("user.home"); - if (userHomeValue == null) { - throw CompilerDirectives.shouldNotReachHere("The 'user.home' system property is not set."); - } - Path userHome = Paths.get(userHomeValue); - Path container = switch (InternalResource.OS.getCurrent()) { - case DARWIN -> userHome.resolve(Path.of("Library", "Caches")); - case LINUX -> { - Path userCacheDir = null; - String xdgCacheValue = System.getenv("XDG_CACHE_HOME"); - if (xdgCacheValue != null) { - try { - Path xdgCacheDir = Path.of(xdgCacheValue); - // Do not fail when XDG_CACHE_HOME value is invalid. Fall back to - // $HOME/.cache. - if (xdgCacheDir.isAbsolute()) { - userCacheDir = xdgCacheDir; - } else { - emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not an absolute path. Using the default cache folder '%s'.", userHome.resolve(".cache")); - } - } catch (InvalidPathException notPath) { - emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not a valid path. Using the default cache folder '%s'.", userHome.resolve(".cache")); - } - } - if (userCacheDir == null) { - userCacheDir = userHome.resolve(".cache"); - } - yield userCacheDir; - } - case WINDOWS -> userHome.resolve(Path.of("AppData", "Local")); - }; - Path cache = container.resolve("org.graalvm.polyglot"); - cache = Files.createDirectories(cache).toRealPath(); - res = Pair.create(cache, false); - cacheRoot = res; - } - return res.getLeft(); - } - - private static void emitWarning(String message, Object... args) { - PrintStream out = System.err; - out.printf(message + "%n", args); - } - - private static Path findCacheRootOnNativeImage() { - Pair res = cacheRoot; - if (res == null) { - assert ImageInfo.inImageRuntimeCode() : "Can be called only in the native-image execution time."; - Path executable = getExecutablePath(); - Path cache = executable.resolveSibling("resources"); - res = Pair.create(cache, false); - cacheRoot = res; - } - return res.getLeft(); - } - - private static Path getExecutablePath() { - assert ImageInfo.inImageRuntimeCode(); - if (useInternalResources) { - if (ImageInfo.isExecutable()) { - return Path.of(ProcessProperties.getExecutableName()); - } else if (ImageInfo.isSharedLibrary()) { - return Path.of(ProcessProperties.getObjectFile(InternalResourceCacheSymbol.SYMBOL)); - } else { - throw CompilerDirectives.shouldNotReachHere("Should only be invoked within native image runtime code."); - } - } else { - throw new IllegalArgumentException("Lookup an executable name is restricted. " + - "To enable it, use '-H:+CopyLanguageResources' during the native image build."); - } - } - /** - * Recomputed before the analyses by a substitution in the {@code TruffleBaseFeature} based on - * the {@code CopyLanguageResources} option value. The field must not be declared as - * {@code final} to make the substitution function correctly. + * Returns true if internal resources are enabled. Internal resources can be disabled in the + * native image using {-H:-CopyLanguageResources} option. */ - private static boolean useInternalResources = true; + public static boolean usesInternalResources() { + return useInternalResources; + } /** * Collects optional internal resources for native-image build. This method is called @@ -355,13 +277,6 @@ static void resetNativeImageState() { nativeImageCache.clear(); } - private void resetFileSystemNativeImageState() { - FileSystem fs = resourceFileSystem; - if (fs != null) { - ((ResettableCachedRoot) FileSystems.getInternalResourceFileSystemRoot(fs)).resourceCacheRoot = null; - } - } - /** * Unpacks internal resources after native-image write. This method is called reflectively by * the {@code TruffleBaseFeature#afterAnalysis}. @@ -399,34 +314,31 @@ static boolean copyResourcesForNativeImage(Path target, String... components) th instruments = requiredInstruments; } for (LanguageCache language : languages) { - for (String resourceId : language.getResourceIds()) { - InternalResourceCache cache = language.getResourceCache(resourceId); + for (InternalResourceCache cache : language.getResources()) { result |= cache.copyResourcesForNativeImage(target); } } for (InstrumentCache instrument : instruments) { - for (String resourceId : instrument.getResourceIds()) { - InternalResourceCache cache = instrument.getResourceCache(resourceId); + for (InternalResourceCache cache : instrument.getResources()) { result |= cache.copyResourcesForNativeImage(target); } } // Always install engine resources - for (String resourceId : getEngineResourceIds()) { - InternalResourceCache cache = getEngineResource(resourceId); + for (InternalResourceCache cache : getEngineResources()) { result |= cache.copyResourcesForNativeImage(target); } return result; } private boolean copyResourcesForNativeImage(Path target) throws IOException { - Path resourceRoot = findStandaloneResourceRoot(target); - unlink(resourceRoot); - Files.createDirectories(resourceRoot); + Path root = findStandaloneResourceRoot(target); + unlink(root); + Files.createDirectories(root); InternalResource resource = resourceFactory.get(); InternalResource.Env env = EngineAccessor.LANGUAGE.createInternalResourceEnv(resource, () -> false); - resource.unpackFiles(env, resourceRoot); - if (isEmpty(resourceRoot)) { - Files.deleteIfExists(resourceRoot); + resource.unpackFiles(env, root); + if (isEmpty(root)) { + Files.deleteIfExists(root); return false; } else { return true; @@ -438,6 +350,15 @@ static Collection getEngineResourceIds() { return engineResources != null ? engineResources.keySet() : List.of(); } + static Collection getEngineResources() { + Map> engineResources = loadOptionalInternalResources(EngineAccessor.locatorOrDefaultLoaders()).get(PolyglotEngineImpl.ENGINE_ID); + if (engineResources != null) { + return engineResources.values().stream().map(Supplier::get).collect(Collectors.toList()); + } else { + return List.of(); + } + } + static InternalResourceCache getEngineResource(String resourceId) { Map> engineResources = loadOptionalInternalResources(EngineAccessor.locatorOrDefaultLoaders()).get(PolyglotEngineImpl.ENGINE_ID); Supplier resourceSupplier = engineResources != null ? engineResources.get(resourceId) : null; @@ -531,69 +452,13 @@ private static void unlink(Path path) throws IOException { Files.deleteIfExists(path); } - /** - * Sets the {@link #cacheRoot} in unit tests. This method is called reflectively by the - * {@code InternalResourceTest}. - */ - @SuppressWarnings("unused") - private static void setTestCacheRoot(Path root, boolean disposeResourceFileSystem) { - cacheRoot = root == null ? null : Pair.create(root, false); - for (LanguageCache language : LanguageCache.languages().values()) { - for (String resourceId : language.getResourceIds()) { - InternalResourceCache cache = language.getResourceCache(resourceId); - if (disposeResourceFileSystem) { - cache.resourceFileSystem = null; - } else { - cache.resetFileSystemNativeImageState(); - } - } - } - for (InstrumentCache instrument : InstrumentCache.load()) { - for (String resourceId : instrument.getResourceIds()) { - InternalResourceCache cache = instrument.getResourceCache(resourceId); - if (disposeResourceFileSystem) { - cache.resourceFileSystem = null; - } else { - cache.resetFileSystemNativeImageState(); - } - } - } - } - - private final class ResettableCachedRoot implements Supplier { - - private volatile Path resourceCacheRoot; - - ResettableCachedRoot(Path resourceCacheRoot) { - Objects.requireNonNull(resourceCacheRoot, "ResourceCacheRoot must be non-null."); - this.resourceCacheRoot = resourceCacheRoot; - } - - @Override - public Path get() { - Path res = resourceCacheRoot; - if (res == null) { - if (ImageInfo.inImageBuildtimeCode()) { - throw CompilerDirectives.shouldNotReachHere("Reintroducing internal resource cache path into an image heap."); - } - try { - res = findOverriddenResourceRoot(); - if (res == null) { - Path cache; - if (hasExplicitCacheRoot()) { - cache = getExplicitCacheRoot(); - } else { - cache = findCacheRootOnNativeImage(); - } - res = findStandaloneResourceRoot(cache); - } - resourceCacheRoot = res; - } catch (IOException ioe) { - throw new IOError(ioe); - } - } - return res; - } + @Override + public String toString() { + return "InternalResourceCache[" + + "componentId='" + id + '\'' + + ", resourceId='" + resourceId + '\'' + + ", resourceRoot=" + path + + '}'; } private static final class OptionalResourceSupplier implements Supplier { diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java new file mode 100644 index 000000000000..46d91ea31340 --- /dev/null +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.polyglot; + +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.InternalResource; +import org.graalvm.collections.Pair; +import org.graalvm.nativeimage.ImageInfo; +import org.graalvm.nativeimage.ProcessProperties; + +import java.io.PrintStream; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +final class InternalResourceRoots { + + private static final String OVERRIDDEN_CACHE_ROOT = "polyglot.engine.resourcePath"; + private static final String OVERRIDDEN_COMPONENT_ROOT = "polyglot.engine.resourcePath."; + private static final String OVERRIDDEN_RESOURCE_ROOT = "polyglot.engine.resourcePath."; + + /** + * This field is reset to {@code null} by the {@code TruffleBaseFeature} before writing the + * native image heap. + */ + private static volatile Set roots; + + private InternalResourceRoots() { + } + + /** + * Initializes the internal resource roots. This method is called from entry-points in the + * polyglot during engine construction to ensure that internal resource roots are initialized + * before the engine is used. + */ + static synchronized void ensureInitialized() { + if (roots == null) { + if (InternalResourceCache.usesInternalResources()) { + roots = computeRoots(findDefaultRoot()); + } else { + roots = Set.of(); + } + } + } + + static Root findRoot(Path hostPath) { + for (Root root : roots) { + if (hostPath.startsWith(root.path)) { + return root; + } + } + return null; + } + + static InternalResourceCache findInternalResource(Path hostPath) { + Root root = findRoot(hostPath); + if (root != null) { + for (InternalResourceCache cache : root.caches) { + Path resourceRoot = cache.getPathOrNull(); + // Used InternalResourceCache instances always have non-null root. + if (resourceRoot != null && hostPath.startsWith(resourceRoot)) { + return cache; + } + } + } + return null; + } + + /** + * The unpacking of the Truffle attach library is called reflectively in a boot time when + * accessors, LanguageCache and InstrumentCache cannot be used. We are creating a temporary + * {@link InternalResourceCache} just to unpack the library. + * + */ + static void initializeRuntimeResource(InternalResourceCache truffleRuntimeResource) { + Pair defaultRoot = findDefaultRoot(); + Map, List> collector = new HashMap<>(); + collectRoots(PolyglotEngineImpl.ENGINE_ID, defaultRoot.getLeft(), defaultRoot.getRight(), + List.of(truffleRuntimeResource), collector); + var entry = collector.entrySet().iterator().next(); + var key = entry.getKey(); + truffleRuntimeResource.initializeOwningRoot(new Root(key.getLeft(), key.getRight(), entry.getValue())); + } + + /** + * Sets the {@code #roots} in unit tests. This method is called reflectively by the + * {@code InternalResourceTest}. + * + * @param newRoot the new enforced cache root used by unit tests. + * @param nativeImageRuntime simulates the native image runtime behavior on hotspot. Needed by + * the {@code ContextPreInitializationTest}. + * + */ + @SuppressWarnings("unused") + private static synchronized void setTestCacheRoot(Path newRoot, boolean nativeImageRuntime) { + if (roots != null) { + for (Root root : roots) { + for (InternalResourceCache cache : root.caches()) { + cache.clearCache(); + } + } + } + if (newRoot != null) { + roots = computeRoots(Pair.create(newRoot, nativeImageRuntime ? Root.Kind.UNVERSIONED : Root.Kind.VERSIONED)); + } else if (nativeImageRuntime) { + var defaultRoots = findDefaultRoot(); + roots = computeRoots(Pair.create(defaultRoots.getLeft(), Root.Kind.UNVERSIONED)); + } else { + roots = null; + } + } + + /** + * Computes the internal resource roots. + */ + private static Set computeRoots(Pair defaultRoot) { + Map, List> collector = new HashMap<>(); + for (LanguageCache language : LanguageCache.languages().values()) { + Collection resources = language.getResources(); + if (!resources.isEmpty()) { + collectRoots(language.getId(), defaultRoot.getLeft(), defaultRoot.getRight(), resources, collector); + } + } + for (InstrumentCache instrument : InstrumentCache.load()) { + Collection resources = instrument.getResources(); + if (!resources.isEmpty()) { + collectRoots(instrument.getId(), defaultRoot.getLeft(), defaultRoot.getRight(), resources, collector); + } + } + Collection engineResources = InternalResourceCache.getEngineResources(); + if (!engineResources.isEmpty()) { + collectRoots(PolyglotEngineImpl.ENGINE_ID, defaultRoot.getLeft(), defaultRoot.getRight(), engineResources, collector); + } + // Build a set of immutable Roots. + Set result = new HashSet<>(); + for (var entry : collector.entrySet()) { + var key = entry.getKey(); + var resources = entry.getValue(); + Root internalResourceRoot = new Root(key.getLeft(), key.getRight(), resources); + for (InternalResourceCache resource : resources) { + resource.initializeOwningRoot(internalResourceRoot); + } + result.add(internalResourceRoot); + } + return Collections.unmodifiableSet(result); + } + + private static Pair findDefaultRoot() { + Path root; + Root.Kind kind; + String overriddenRoot = System.getProperty(OVERRIDDEN_CACHE_ROOT); + if (overriddenRoot != null) { + root = Path.of(overriddenRoot); + kind = Root.Kind.UNVERSIONED; + } else if (ImageInfo.inImageRuntimeCode()) { + root = findCacheRootOnNativeImage(); + kind = Root.Kind.UNVERSIONED; + } else { + root = findCacheRootOnHotSpot(); + kind = Root.Kind.VERSIONED; + } + return Pair.create(root, kind); + } + + private static void collectRoots(String componentId, Path componentRoot, Root.Kind componentKind, Collection resources, + Map, List> collector) { + Path useRoot = componentRoot; + Root.Kind useKind = componentKind; + StringBuilder builder = new StringBuilder(OVERRIDDEN_COMPONENT_ROOT); + builder.append(componentId); + String overriddenRoot = System.getProperty(builder.toString()); + if (overriddenRoot != null) { + useRoot = Path.of(overriddenRoot); + useKind = Root.Kind.COMPONENT; + } + for (InternalResourceCache resource : resources) { + Path resourceRoot = useRoot; + Root.Kind resourceKind = useKind; + builder = new StringBuilder(OVERRIDDEN_RESOURCE_ROOT); + builder.append(componentId); + builder.append('.'); + builder.append(resource.getResourceId()); + overriddenRoot = System.getProperty(builder.toString()); + if (overriddenRoot != null) { + resourceRoot = Path.of(overriddenRoot); + resourceKind = Root.Kind.RESOURCE; + } + collector.computeIfAbsent(Pair.create(resourceRoot, resourceKind), (k) -> new ArrayList<>()).add(resource); + } + } + + private static Path findCacheRootOnNativeImage() { + assert ImageInfo.inImageRuntimeCode() : "Can be called only in the native-image execution time."; + Path executable = getExecutablePath(); + return executable.resolveSibling("resources"); + } + + private static Path getExecutablePath() { + assert ImageInfo.inImageRuntimeCode(); + if (ImageInfo.isExecutable()) { + return Path.of(ProcessProperties.getExecutableName()); + } else if (ImageInfo.isSharedLibrary()) { + return Path.of(ProcessProperties.getObjectFile(InternalResourceCacheSymbol.SYMBOL)); + } else { + throw CompilerDirectives.shouldNotReachHere("Should only be invoked within native image runtime code."); + } + } + + private static Path findCacheRootOnHotSpot() { + String userHomeValue = System.getProperty("user.home"); + if (userHomeValue == null) { + throw CompilerDirectives.shouldNotReachHere("The 'user.home' system property is not set."); + } + Path userHome = Paths.get(userHomeValue); + Path container = switch (InternalResource.OS.getCurrent()) { + case DARWIN -> userHome.resolve(Path.of("Library", "Caches")); + case LINUX -> { + Path userCacheDir = null; + String xdgCacheValue = System.getenv("XDG_CACHE_HOME"); + if (xdgCacheValue != null) { + try { + Path xdgCacheDir = Path.of(xdgCacheValue); + // Do not fail when XDG_CACHE_HOME value is invalid. Fall back to + // $HOME/.cache. + if (xdgCacheDir.isAbsolute()) { + userCacheDir = xdgCacheDir; + } else { + emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not an absolute path. Using the default cache folder '%s'.", userHome.resolve(".cache")); + } + } catch (InvalidPathException notPath) { + emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not a valid path. Using the default cache folder '%s'.", userHome.resolve(".cache")); + } + } + if (userCacheDir == null) { + userCacheDir = userHome.resolve(".cache"); + } + yield userCacheDir; + } + case WINDOWS -> userHome.resolve(Path.of("AppData", "Local")); + }; + return container.resolve("org.graalvm.polyglot"); + } + + private static void emitWarning(String message, Object... args) { + PrintStream out = System.err; + out.printf(message + "%n", args); + } + + record Root(Path path, Kind kind, List caches) { + + enum Kind { + COMPONENT, + RESOURCE, + UNVERSIONED, + VERSIONED, + } + } +} diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java index e613a16ac659..5c93cb469c37 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java @@ -675,6 +675,10 @@ Collection getResourceIds() { return internalResources.keySet(); } + Collection getResources() { + return internalResources.values(); + } + @Override public String toString() { return "LanguageCache [id=" + id + ", name=" + name + ", implementationName=" + implementationName + ", version=" + version + ", className=" + className + ", services=" + services + "]"; diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java index c3dd1ec34ac5..13d7b5f75ecd 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java @@ -1751,7 +1751,7 @@ public PolyglotContextImpl createContext(SandboxPolicy contextSandboxPolicy, Out } else if (customFileSystem != null) { fileSystemConfig = new FileSystemConfig(ioAccess, customFileSystem, customFileSystem); } else { - fileSystemConfig = new FileSystemConfig(ioAccess, FileSystems.newNoIOFileSystem(), FileSystems.newLanguageHomeFileSystem()); + fileSystemConfig = new FileSystemConfig(ioAccess, FileSystems.newNoIOFileSystem(), FileSystems.newResourcesFileSystem()); } if (currentWorkingDirectory != null) { Path publicFsCwd; diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java index 16c61b7e35bf..59586e5a6147 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java @@ -272,6 +272,7 @@ public Object buildEngine(String[] permittedLanguages, SandboxPolicy sandboxPoli PolyglotEngineImpl impl = null; try { validateSandbox(sandboxPolicy); + InternalResourceRoots.ensureInitialized(); if (TruffleOptions.AOT) { EngineAccessor.ACCESSOR.initializeNativeImageTruffleLocator(); } @@ -435,6 +436,7 @@ public void preInitializeEngine() { * Used for preinitialized contexts and fallback engine. */ PolyglotEngineImpl createDefaultEngine(TruffleLanguage hostLanguage) { + InternalResourceRoots.ensureInitialized(); Map options = getAPIAccess().readOptionsFromSystemProperties(); LogConfig logConfig = new LogConfig(); SandboxPolicy sandboxPolicy = SandboxPolicy.TRUSTED; @@ -545,8 +547,8 @@ public FileSystem newDefaultFileSystem(String hostTmpDir) { } @Override - public FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return FileSystems.allowLanguageHomeAccess(fileSystem); + public FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return FileSystems.allowInternalResourceAccess(fileSystem); } @Override diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java index 742a1a2039db..ea9f38c68da8 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java @@ -43,8 +43,6 @@ import static com.oracle.truffle.polyglot.EngineAccessor.INSTRUMENT; import static com.oracle.truffle.polyglot.EngineAccessor.LANGUAGE; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import java.util.function.Supplier; import org.graalvm.options.OptionDescriptor; @@ -53,7 +51,6 @@ import org.graalvm.polyglot.impl.AbstractPolyglotImpl.APIAccess; import com.oracle.truffle.api.InstrumentInfo; -import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.instrumentation.TruffleInstrument; import com.oracle.truffle.polyglot.PolyglotLocals.LocalLocation; @@ -65,7 +62,6 @@ class PolyglotInstrument implements com.oracle.truffle.polyglot.PolyglotImpl.VMO final PolyglotEngineImpl engine; private final Object instrumentLock = new Object(); - final Map internalResources = new ConcurrentHashMap<>(); private volatile OptionDescriptors engineOptions; private volatile OptionDescriptors contextOptions; private volatile OptionDescriptors allOptions; diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java index 886c0a306011..ee9c4a5e55de 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java @@ -44,9 +44,7 @@ import static com.oracle.truffle.polyglot.EngineAccessor.LANGUAGE; import static com.oracle.truffle.polyglot.EngineAccessor.NODES; -import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import org.graalvm.home.Version; import org.graalvm.options.OptionDescriptors; @@ -56,7 +54,6 @@ import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; -import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.nodes.LanguageInfo; import com.oracle.truffle.polyglot.PolyglotLocals.LocalLocation; @@ -69,7 +66,6 @@ final class PolyglotLanguage implements com.oracle.truffle.polyglot.PolyglotImpl Object api; // effectively final final int engineIndex; final RuntimeException initError; - final Map internalResources = new ConcurrentHashMap<>(); private volatile OptionDescriptors options; private volatile OptionValuesImpl optionValues;