diff --git a/compiler/mx.compiler/mx_compiler.py b/compiler/mx.compiler/mx_compiler.py index c313b6351669..87d204f0d6f9 100644 --- a/compiler/mx.compiler/mx_compiler.py +++ b/compiler/mx.compiler/mx_compiler.py @@ -566,14 +566,14 @@ def compiler_gate_benchmark_runner(tasks, extraVMarguments=None, prefix='', task if t: for name in dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_daily_scaling_factor): + for _ in range(default_iterations * scala_daily_scaling_factor): _gate_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) with mx_gate.Task('Dacapo benchmark weekly workload', tasks, tags=['dacapo_weekly'], report=task_report_component) as t: if t: for name in dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_weekly_scaling_factor): + for _ in range(default_iterations * scala_weekly_scaling_factor): _gate_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) # ensure we can also run on C2 @@ -603,14 +603,14 @@ def compiler_gate_benchmark_runner(tasks, extraVMarguments=None, prefix='', task if t: for name in scala_dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(scala_dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_dacapo_daily_scaling_factor): + for _ in range(default_iterations * scala_dacapo_daily_scaling_factor): _gate_scala_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) with mx_gate.Task('ScalaDacapo benchmark weekly workload', tasks, tags=['scala_dacapo_weekly'], report=task_report_component) as t: if t: for name in scala_dacapo_suite.benchmarkList(bmSuiteArgs): iterations = int(scala_dacapo_suite.daCapoIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * scala_dacapo_weekly_scaling_factor): + for _ in range(default_iterations * scala_dacapo_weekly_scaling_factor): _gate_scala_dacapo(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + dacapo_esa) # run Renaissance benchmarks # @@ -630,14 +630,14 @@ def compiler_gate_benchmark_runner(tasks, extraVMarguments=None, prefix='', task if t: for name in renaissance_suite.benchmarkList(bmSuiteArgs): iterations = int(renaissance_suite.renaissanceIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations): + for _ in range(default_iterations): _gate_renaissance(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + enable_assertions) with mx_gate.Task('Renaissance benchmark weekly workload', tasks, tags=['renaissance_weekly'], report=task_report_component) as t: if t: for name in renaissance_suite.benchmarkList(bmSuiteArgs): iterations = int(renaissance_suite.renaissanceIterations().get(name, -1) * default_iterations_reduction) - for i in range(default_iterations * daily_weekly_jobs_ratio): + for _ in range(default_iterations * daily_weekly_jobs_ratio): _gate_renaissance(name, iterations, benchVmArgs + ['-Dgraal.TrackNodeSourcePosition=true'] + enable_assertions) # run benchmark with non default setup # diff --git a/compiler/mx.compiler/suite.py b/compiler/mx.compiler/suite.py index bfa72c513957..f747096154ba 100644 --- a/compiler/mx.compiler/suite.py +++ b/compiler/mx.compiler/suite.py @@ -256,6 +256,7 @@ "jdk.internal.module", "jdk.internal.misc", "jdk.internal.util", + "jdk.internal.vm.annotation", ], "java.instrument" : [ "sun.instrument", diff --git a/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java new file mode 100644 index 000000000000..ea222bdefd64 --- /dev/null +++ b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/core/test/InstanceOfCanonicalizationTest.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.graal.compiler.core.test; + +import org.junit.Assert; +import org.junit.Test; + +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.StructuredGraph.AllowAssumptions; +import jdk.graal.compiler.nodes.calc.IsNullNode; +import jdk.graal.compiler.nodes.java.InstanceOfNode; + +public class InstanceOfCanonicalizationTest extends GraalCompilerTest { + + public static boolean checkCastIncompatibleTypes(Object arr) { + // Cast first to a byte array, then to a boolean array. This only succeeds if arr is null. + byte[] barr = (byte[]) arr; + boolean[] bbarr = (boolean[]) (Object) barr; + return true; + } + + public static int unsatisfiableInstanceOf(byte[] barr) { + // Plain instanceof does not allow null, so this will never succeed. + if ((Object) barr instanceof boolean[]) { + return -1; + } + return 1; + } + + @Test + public void testCheckCastIncompatibleTypes() { + StructuredGraph g = parseEager("checkCastIncompatibleTypes", AllowAssumptions.NO, getInitialOptions()); + createCanonicalizerPhase().apply(g, getDefaultHighTierContext()); + + // The second check-cast against boolean[] should canonicalize to a null check + Assert.assertEquals(1, g.getNodes().filter(InstanceOfNode.class).count()); + Assert.assertEquals(1, g.getNodes().filter(IsNullNode.class).count()); + + testAgainstExpected(g.method(), new Result(checkCastIncompatibleTypes(null), null), null, new Object[]{null}); + testAgainstExpected(g.method(), new Result(null, new ClassCastException()), null, new Object[]{new byte[10]}); + testAgainstExpected(g.method(), new Result(null, new ClassCastException()), null, new Object[]{new boolean[10]}); + } + + @Test + public void testUnsatisfiableInstanceOf() { + StructuredGraph g = parseEager("unsatisfiableInstanceOf", AllowAssumptions.NO, getInitialOptions()); + createCanonicalizerPhase().apply(g, getDefaultHighTierContext()); + + // Tested condition can never be true, so it should canonicalize to a constant. + Assert.assertEquals(0, g.getNodes().filter(InstanceOfNode.class).count()); + + testAgainstExpected(g.method(), new Result(unsatisfiableInstanceOf(null), null), null, new Object[]{null}); + testAgainstExpected(g.method(), new Result(unsatisfiableInstanceOf(new byte[10]), null), null, new Object[]{new byte[10]}); + } +} diff --git a/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java new file mode 100644 index 000000000000..49b4368c330c --- /dev/null +++ b/compiler/src/jdk.graal.compiler.test/src/jdk/graal/compiler/hotspot/test/HumongousReferenceObjectTest.java @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.graal.compiler.hotspot.test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import jdk.graal.compiler.core.test.SubprocessTest; +import org.junit.Test; + +import jdk.internal.vm.annotation.Contended; + +@SuppressWarnings("unused") +public class HumongousReferenceObjectTest extends SubprocessTest { + /* + * Due to 300 fields with 8K @Contended padding around each field, it takes 2.4M bytes per + * instance. With small G1 regions, it is bound to cross regions. G1 should properly (card) mark + * the object nevertheless. With 128M heap, it is enough to allocate ~100 of these objects to + * provoke at least one GC. + */ + + static volatile Object instance; + + public static void testSnippet() { + for (int c = 0; c < 100; c++) { + instance = new HumongousReferenceObjectTest(); + } + } + + public void runSubprocessTest(String... args) throws IOException, InterruptedException { + List newArgs = new ArrayList<>(); + Collections.addAll(newArgs, args); + // Filter out any explicitly selected GC + newArgs.remove("-XX:+UseZGC"); + newArgs.remove("-XX:+UseG1GC"); + newArgs.remove("-XX:+UseParallelGC"); + + launchSubprocess(() -> { + test("testSnippet"); + }, newArgs.toArray(new String[0])); + + // Test without assertions as well + newArgs.add("-da"); + launchSubprocess(() -> { + test("testSnippet"); + }, newArgs.toArray(new String[0])); + } + + @Test + public void testG1() throws IOException, InterruptedException { + String[] sizes = {"-XX:G1HeapRegionSize=1M", "-XX:G1HeapRegionSize=2M", "-XX:G1HeapRegionSize=4M", "-XX:G1HeapRegionSize=8M"}; + for (String size : sizes) { + runSubprocessTest("-XX:+UseG1GC", "-XX:+EnableContended", "-XX:-RestrictContended", "-Xmx128m", "-XX:ContendedPaddingWidth=8192", size); + } + } + + @Test + public void testParallel() throws IOException, InterruptedException { + runSubprocessTest("-XX:+UseParallelGC", "-XX:+EnableContended", "-XX:-RestrictContended", "-Xmx128m", "-XX:ContendedPaddingWidth=8192"); + } + + @Contended Integer int1 = 1; + @Contended Integer int2 = 2; + @Contended Integer int3 = 3; + @Contended Integer int4 = 4; + @Contended Integer int5 = 5; + @Contended Integer int6 = 6; + @Contended Integer int7 = 7; + @Contended Integer int8 = 8; + @Contended Integer int9 = 9; + @Contended Integer int10 = 10; + @Contended Integer int11 = 11; + @Contended Integer int12 = 12; + @Contended Integer int13 = 13; + @Contended Integer int14 = 14; + @Contended Integer int15 = 15; + @Contended Integer int16 = 16; + @Contended Integer int17 = 17; + @Contended Integer int18 = 18; + @Contended Integer int19 = 19; + @Contended Integer int20 = 20; + @Contended Integer int21 = 21; + @Contended Integer int22 = 22; + @Contended Integer int23 = 23; + @Contended Integer int24 = 24; + @Contended Integer int25 = 25; + @Contended Integer int26 = 26; + @Contended Integer int27 = 27; + @Contended Integer int28 = 28; + @Contended Integer int29 = 29; + @Contended Integer int30 = 30; + @Contended Integer int31 = 31; + @Contended Integer int32 = 32; + @Contended Integer int33 = 33; + @Contended Integer int34 = 34; + @Contended Integer int35 = 35; + @Contended Integer int36 = 36; + @Contended Integer int37 = 37; + @Contended Integer int38 = 38; + @Contended Integer int39 = 39; + @Contended Integer int40 = 40; + @Contended Integer int41 = 41; + @Contended Integer int42 = 42; + @Contended Integer int43 = 43; + @Contended Integer int44 = 44; + @Contended Integer int45 = 45; + @Contended Integer int46 = 46; + @Contended Integer int47 = 47; + @Contended Integer int48 = 48; + @Contended Integer int49 = 49; + @Contended Integer int50 = 50; + @Contended Integer int51 = 51; + @Contended Integer int52 = 52; + @Contended Integer int53 = 53; + @Contended Integer int54 = 54; + @Contended Integer int55 = 55; + @Contended Integer int56 = 56; + @Contended Integer int57 = 57; + @Contended Integer int58 = 58; + @Contended Integer int59 = 59; + @Contended Integer int60 = 60; + @Contended Integer int61 = 61; + @Contended Integer int62 = 62; + @Contended Integer int63 = 63; + @Contended Integer int64 = 64; + @Contended Integer int65 = 65; + @Contended Integer int66 = 66; + @Contended Integer int67 = 67; + @Contended Integer int68 = 68; + @Contended Integer int69 = 69; + @Contended Integer int70 = 70; + @Contended Integer int71 = 71; + @Contended Integer int72 = 72; + @Contended Integer int73 = 73; + @Contended Integer int74 = 74; + @Contended Integer int75 = 75; + @Contended Integer int76 = 76; + @Contended Integer int77 = 77; + @Contended Integer int78 = 78; + @Contended Integer int79 = 79; + @Contended Integer int80 = 80; + @Contended Integer int81 = 81; + @Contended Integer int82 = 82; + @Contended Integer int83 = 83; + @Contended Integer int84 = 84; + @Contended Integer int85 = 85; + @Contended Integer int86 = 86; + @Contended Integer int87 = 87; + @Contended Integer int88 = 88; + @Contended Integer int89 = 89; + @Contended Integer int90 = 90; + @Contended Integer int91 = 91; + @Contended Integer int92 = 92; + @Contended Integer int93 = 93; + @Contended Integer int94 = 94; + @Contended Integer int95 = 95; + @Contended Integer int96 = 96; + @Contended Integer int97 = 97; + @Contended Integer int98 = 98; + @Contended Integer int99 = 99; + @Contended Integer int100 = 100; + @Contended Integer int101 = 101; + @Contended Integer int102 = 102; + @Contended Integer int103 = 103; + @Contended Integer int104 = 104; + @Contended Integer int105 = 105; + @Contended Integer int106 = 106; + @Contended Integer int107 = 107; + @Contended Integer int108 = 108; + @Contended Integer int109 = 109; + @Contended Integer int110 = 110; + @Contended Integer int111 = 111; + @Contended Integer int112 = 112; + @Contended Integer int113 = 113; + @Contended Integer int114 = 114; + @Contended Integer int115 = 115; + @Contended Integer int116 = 116; + @Contended Integer int117 = 117; + @Contended Integer int118 = 118; + @Contended Integer int119 = 119; + @Contended Integer int120 = 120; + @Contended Integer int121 = 121; + @Contended Integer int122 = 122; + @Contended Integer int123 = 123; + @Contended Integer int124 = 124; + @Contended Integer int125 = 125; + @Contended Integer int126 = 126; + @Contended Integer int127 = 127; + @Contended Integer int128 = 128; + @Contended Integer int129 = 129; + @Contended Integer int130 = 130; + @Contended Integer int131 = 131; + @Contended Integer int132 = 132; + @Contended Integer int133 = 133; + @Contended Integer int134 = 134; + @Contended Integer int135 = 135; + @Contended Integer int136 = 136; + @Contended Integer int137 = 137; + @Contended Integer int138 = 138; + @Contended Integer int139 = 139; + @Contended Integer int140 = 140; + @Contended Integer int141 = 141; + @Contended Integer int142 = 142; + @Contended Integer int143 = 143; + @Contended Integer int144 = 144; + @Contended Integer int145 = 145; + @Contended Integer int146 = 146; + @Contended Integer int147 = 147; + @Contended Integer int148 = 148; + @Contended Integer int149 = 149; + @Contended Integer int150 = 150; + @Contended Integer int151 = 151; + @Contended Integer int152 = 152; + @Contended Integer int153 = 153; + @Contended Integer int154 = 154; + @Contended Integer int155 = 155; + @Contended Integer int156 = 156; + @Contended Integer int157 = 157; + @Contended Integer int158 = 158; + @Contended Integer int159 = 159; + @Contended Integer int160 = 160; + @Contended Integer int161 = 161; + @Contended Integer int162 = 162; + @Contended Integer int163 = 163; + @Contended Integer int164 = 164; + @Contended Integer int165 = 165; + @Contended Integer int166 = 166; + @Contended Integer int167 = 167; + @Contended Integer int168 = 168; + @Contended Integer int169 = 169; + @Contended Integer int170 = 170; + @Contended Integer int171 = 171; + @Contended Integer int172 = 172; + @Contended Integer int173 = 173; + @Contended Integer int174 = 174; + @Contended Integer int175 = 175; + @Contended Integer int176 = 176; + @Contended Integer int177 = 177; + @Contended Integer int178 = 178; + @Contended Integer int179 = 179; + @Contended Integer int180 = 180; + @Contended Integer int181 = 181; + @Contended Integer int182 = 182; + @Contended Integer int183 = 183; + @Contended Integer int184 = 184; + @Contended Integer int185 = 185; + @Contended Integer int186 = 186; + @Contended Integer int187 = 187; + @Contended Integer int188 = 188; + @Contended Integer int189 = 189; + @Contended Integer int190 = 190; + @Contended Integer int191 = 191; + @Contended Integer int192 = 192; + @Contended Integer int193 = 193; + @Contended Integer int194 = 194; + @Contended Integer int195 = 195; + @Contended Integer int196 = 196; + @Contended Integer int197 = 197; + @Contended Integer int198 = 198; + @Contended Integer int199 = 199; + @Contended Integer int200 = 200; + @Contended Integer int201 = 201; + @Contended Integer int202 = 202; + @Contended Integer int203 = 203; + @Contended Integer int204 = 204; + @Contended Integer int205 = 205; + @Contended Integer int206 = 206; + @Contended Integer int207 = 207; + @Contended Integer int208 = 208; + @Contended Integer int209 = 209; + @Contended Integer int210 = 210; + @Contended Integer int211 = 211; + @Contended Integer int212 = 212; + @Contended Integer int213 = 213; + @Contended Integer int214 = 214; + @Contended Integer int215 = 215; + @Contended Integer int216 = 216; + @Contended Integer int217 = 217; + @Contended Integer int218 = 218; + @Contended Integer int219 = 219; + @Contended Integer int220 = 220; + @Contended Integer int221 = 221; + @Contended Integer int222 = 222; + @Contended Integer int223 = 223; + @Contended Integer int224 = 224; + @Contended Integer int225 = 225; + @Contended Integer int226 = 226; + @Contended Integer int227 = 227; + @Contended Integer int228 = 228; + @Contended Integer int229 = 229; + @Contended Integer int230 = 230; + @Contended Integer int231 = 231; + @Contended Integer int232 = 232; + @Contended Integer int233 = 233; + @Contended Integer int234 = 234; + @Contended Integer int235 = 235; + @Contended Integer int236 = 236; + @Contended Integer int237 = 237; + @Contended Integer int238 = 238; + @Contended Integer int239 = 239; + @Contended Integer int240 = 240; + @Contended Integer int241 = 241; + @Contended Integer int242 = 242; + @Contended Integer int243 = 243; + @Contended Integer int244 = 244; + @Contended Integer int245 = 245; + @Contended Integer int246 = 246; + @Contended Integer int247 = 247; + @Contended Integer int248 = 248; + @Contended Integer int249 = 249; + @Contended Integer int250 = 250; + @Contended Integer int251 = 251; + @Contended Integer int252 = 252; + @Contended Integer int253 = 253; + @Contended Integer int254 = 254; + @Contended Integer int255 = 255; + @Contended Integer int256 = 256; + @Contended Integer int257 = 257; + @Contended Integer int258 = 258; + @Contended Integer int259 = 259; + @Contended Integer int260 = 260; + @Contended Integer int261 = 261; + @Contended Integer int262 = 262; + @Contended Integer int263 = 263; + @Contended Integer int264 = 264; + @Contended Integer int265 = 265; + @Contended Integer int266 = 266; + @Contended Integer int267 = 267; + @Contended Integer int268 = 268; + @Contended Integer int269 = 269; + @Contended Integer int270 = 270; + @Contended Integer int271 = 271; + @Contended Integer int272 = 272; + @Contended Integer int273 = 273; + @Contended Integer int274 = 274; + @Contended Integer int275 = 275; + @Contended Integer int276 = 276; + @Contended Integer int277 = 277; + @Contended Integer int278 = 278; + @Contended Integer int279 = 279; + @Contended Integer int280 = 280; + @Contended Integer int281 = 281; + @Contended Integer int282 = 282; + @Contended Integer int283 = 283; + @Contended Integer int284 = 284; + @Contended Integer int285 = 285; + @Contended Integer int286 = 286; + @Contended Integer int287 = 287; + @Contended Integer int288 = 288; + @Contended Integer int289 = 289; + @Contended Integer int290 = 290; + @Contended Integer int291 = 291; + @Contended Integer int292 = 292; + @Contended Integer int293 = 293; + @Contended Integer int294 = 294; + @Contended Integer int295 = 295; + @Contended Integer int296 = 296; + @Contended Integer int297 = 297; + @Contended Integer int298 = 298; + @Contended Integer int299 = 299; + @Contended Integer int300 = 300; +} diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java index f5c579b542be..d1c39cadb83d 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/replacements/HotSpotAllocationSnippets.java @@ -146,10 +146,11 @@ public HotSpotAllocationSnippets(GraalHotSpotVMConfig config, HotSpotRegistersPr @Snippet protected Object allocateInstance(KlassPointer hub, @ConstantParameter long size, + @ConstantParameter boolean forceSlowPath, @ConstantParameter FillContent fillContents, @ConstantParameter boolean emitMemoryBarrier, @ConstantParameter HotSpotAllocationProfilingData profilingData) { - Object result = allocateInstanceImpl(hub.asWord(), WordFactory.unsigned(size), fillContents, emitMemoryBarrier, true, profilingData); + Object result = allocateInstanceImpl(hub.asWord(), WordFactory.unsigned(size), forceSlowPath, fillContents, emitMemoryBarrier, true, profilingData); return piCastToSnippetReplaceeStamp(result); } @@ -194,7 +195,7 @@ public Object allocateInstanceDynamic(@NonNullParameter Class type, * binding of parameters is not yet supported by the GraphBuilderPlugin system. */ UnsignedWord size = WordFactory.unsigned(layoutHelper); - return allocateInstanceImpl(nonNullHub.asWord(), size, fillContents, emitMemoryBarrier, false, profilingData); + return allocateInstanceImpl(nonNullHub.asWord(), size, false, fillContents, emitMemoryBarrier, false, profilingData); } } else { DeoptimizeNode.deopt(None, RuntimeConstraint); @@ -653,12 +654,14 @@ public void lower(NewInstanceNode node, LoweringTool tool) { HotSpotResolvedObjectType type = (HotSpotResolvedObjectType) node.instanceClass(); assert !type.isArray(); ConstantNode hub = ConstantNode.forConstant(KlassPointerStamp.klassNonNull(), type.klass(), tool.getMetaAccess(), graph); - long size = instanceSize(type); + long size = type.instanceSize(); OptionValues localOptions = graph.getOptions(); Arguments args = new Arguments(allocateInstance, graph.getGuardsStage(), tool.getLoweringStage()); args.add("hub", hub); - args.addConst("size", size); + // instanceSize returns a negative number for types which should be slow path allocated + args.addConst("size", Math.abs(size)); + args.addConst("forceSlowPath", size < 0); args.addConst("fillContents", FillContent.fromBoolean(node.fillContents())); args.addConst("emitMemoryBarrier", node.emitMemoryBarrier()); args.addConst("profilingData", getProfilingData(localOptions, "instance", type)); @@ -790,11 +793,6 @@ private static HotSpotResolvedObjectType lookupArrayClass(LoweringTool tool, Jav return HotSpotAllocationSnippets.lookupArrayClass(tool.getMetaAccess(), kind); } - private static long instanceSize(HotSpotResolvedObjectType type) { - long size = type.instanceSize(); - assert size >= 0; - return size; - } } private static class HotSpotAllocationProfilingData extends AllocationProfilingData { diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java index 395ba8fdb791..28d6e9e5cce6 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/nodes/java/InstanceOfNode.java @@ -50,7 +50,6 @@ import jdk.graal.compiler.nodes.spi.CanonicalizerTool; import jdk.graal.compiler.nodes.spi.Lowerable; import jdk.graal.compiler.nodes.type.StampTool; - import jdk.vm.ci.meta.JavaKind; import jdk.vm.ci.meta.JavaTypeProfile; import jdk.vm.ci.meta.ResolvedJavaType; @@ -138,17 +137,19 @@ public static LogicNode findSynonym(ObjectStamp checkedStamp, ValueNode object, if (joinedStamp.isEmpty()) { // The check can never succeed, the intersection of the two stamps is empty. return LogicConstantNode.contradiction(); + } else if (joinedStamp.equals(inputStamp)) { + // The check will always succeed, the intersection of the two stamps is equal to the + // input stamp. + return LogicConstantNode.tautology(); + } else if (joinedStamp.alwaysNull()) { + // The intersection of the two stamps is always null => simplify the check. + return IsNullNode.create(object); } else { ObjectStamp meetStamp = (ObjectStamp) checkedStamp.meet(inputStamp); - if (checkedStamp.equals(meetStamp)) { - // The check will always succeed, the union of the two stamps is equal to the - // checked stamp. - return LogicConstantNode.tautology(); - } else if (checkedStamp.alwaysNull()) { - return IsNullNode.create(object); - } else if (Objects.equals(checkedStamp.type(), meetStamp.type()) && checkedStamp.isExactType() == meetStamp.isExactType() && checkedStamp.alwaysNull() == meetStamp.alwaysNull()) { + if (Objects.equals(checkedStamp.type(), meetStamp.type()) && checkedStamp.isExactType() == meetStamp.isExactType() && checkedStamp.alwaysNull() == meetStamp.alwaysNull()) { assert checkedStamp.nonNull() != inputStamp.nonNull(); - // The only difference makes the null-ness of the value => simplify the check. + // The only difference between the two stamps is their null-ness => simplify the + // check. if (checkedStamp.nonNull()) { return LogicNegationNode.create(IsNullNode.create(object)); } else { @@ -182,8 +183,7 @@ public Stamp getSucceedingStampForValue(boolean negated) { @Override public TriState tryFold(Stamp valueStamp) { - if (valueStamp instanceof ObjectStamp) { - ObjectStamp inputStamp = (ObjectStamp) valueStamp; + if (valueStamp instanceof ObjectStamp inputStamp) { ObjectStamp joinedStamp = (ObjectStamp) checkedStamp.join(inputStamp); if (joinedStamp.isEmpty()) { @@ -232,8 +232,7 @@ public static boolean intrinsify(GraphBuilderContext b, ResolvedJavaType type, V @Override public TriState implies(boolean thisNegated, LogicNode other) { - if (other instanceof InstanceOfNode) { - InstanceOfNode instanceOfNode = (InstanceOfNode) other; + if (other instanceof InstanceOfNode instanceOfNode) { if (instanceOfNode.getValue() == getValue()) { if (thisNegated) { // !X => Y diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java index e55351118461..12154f7ebb33 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/replacements/AllocationSnippets.java @@ -48,6 +48,7 @@ public abstract class AllocationSnippets implements Snippets { protected Object allocateInstanceImpl(Word hub, UnsignedWord size, + boolean forceSlowPath, FillContent fillContents, boolean emitMemoryBarrier, boolean constantSize, @@ -57,7 +58,7 @@ protected Object allocateInstanceImpl(Word hub, Word top = readTlabTop(tlabInfo); Word end = readTlabEnd(tlabInfo); Word newTop = top.add(size); - if (useTLAB() && probability(FAST_PATH_PROBABILITY, shouldAllocateInTLAB(size, false)) && probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { + if (!forceSlowPath && useTLAB() && probability(FAST_PATH_PROBABILITY, shouldAllocateInTLAB(size, false)) && probability(FAST_PATH_PROBABILITY, newTop.belowOrEqual(end))) { writeTlabTop(tlabInfo, newTop); emitPrefetchAllocate(newTop, false); result = formatObject(hub, size, top, fillContents, emitMemoryBarrier, constantSize, profilingData.snippetCounters); @@ -122,7 +123,7 @@ protected UnsignedWord arrayAllocationSize(int length, int arrayBaseOffset, int * We do an unsigned multiplication so that a negative array length will result in an array size * greater than Integer.MAX_VALUE. */ - public static long arrayAllocationSize(int length, int arrayBaseOffset, int log2ElementSize, int alignment) { + public static long arrayAllocationSize(long length, int arrayBaseOffset, int log2ElementSize, int alignment) { long size = ((length & 0xFFFFFFFFL) << log2ElementSize) + arrayBaseOffset + (alignment - 1); long mask = ~(alignment - 1); return size & mask; diff --git a/regex/ci/ci.jsonnet b/regex/ci/ci.jsonnet index a94b9707d80f..af51df34ca95 100644 --- a/regex/ci/ci.jsonnet +++ b/regex/ci/ci.jsonnet @@ -14,6 +14,15 @@ targets: ["gate"], }, + local regex_gate_jdkLatest = regex_common + common.deps.eclipse + common.deps.jdt + { + name: 'gate-regex-jdk' + self.jdk_version, + run: [ + ["mx", "build"], + ["mx", "unittest", "com.oracle.truffle.regex"], + ], + targets: ["gate"], + }, + local regex_gate_lite = regex_common + { name: 'gate-regex-mac-lite-jdk' + self.jdk_version, run: [ @@ -40,5 +49,7 @@ ] for jdk in [ common.labsjdk21, ] - ]), + ]) + [ + common.linux_amd64 + common.labsjdkLatest + regex_gate_jdkLatest, + ], } diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java index 14e843ffc635..63bb7d8d6cf4 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java @@ -107,18 +107,36 @@ public void testQuantifiers() { test("x{4294967296}", "", "x{4294967296}", 0, true, 0, 13); test("x{4294967297}", "", "x{4294967297}", 0, true, 0, 13); test("x??", "", "x", 0, true, 0, 0); - test("x???", "", "x", 0, true, 0, 1); test("x{2}+", "", "x", 0, false); test("x{2}+", "", "xx", 0, true, 0, 2); test("x{2}+", "", "xxx", 0, true, 0, 2); test("x{2}+", "", "xxxx", 0, true, 0, 4); test("x{2}*", "", "xxxx", 0, true, 0, 4); test("x{2}*?", "", "xxxx", 0, true, 0, 0); - test("x{2}*??", "", "xxxx", 0, true, 0, 2); test("x{2}*???", "", "xxxx", 0, true, 0, 0); test("\\A*x\\Z+", "", "x", 0, true, 0, 1); test("\\A*x\\Z+", "", "xx", 0, true, 1, 2); test("\\A+x\\Z+", "", "xx", 0, false); + test("x????", "", "x?", 0, true, 0, 0); + test("x????", "", "xx?", 0, true, 0, 0); + test("x??????", "", "x?", 0, true, 0, 0); + test("x??????", "", "xx?", 0, true, 0, 0); + test("x{2}?", "", "xxxxx", 0, true, 0, 2); + test("x{2}??", "", "xxxxx", 0, true, 0, 2); + test("x{2}+", "", "xxxxx", 0, true, 0, 4); + test("x{2}*", "", "xxxxx", 0, true, 0, 4); + + // known to fail, suspected to be caused by LXR bug 35718208 + + // test("x???", "", "x", 0, true, 0, 1); + // test("x{2}*??", "", "xxxx", 0, true, 0, 2); + // test("x???", "", "x?", 0, true, 0, 1); + // test("x???", "", "xx?", 0, true, 0, 1); + // test("x?????", "", "x?", 0, true, 0, 1); + // test("x?????", "", "xx?", 0, true, 0, 1); + // test("(a{0,1})*b\\1", "", "aab", 0, true, 1, 3, 2, 2); + // test("(a{0,1})*b\\1", "", "aaba", 0, true, 1, 3, 2, 2); + // test("(a{0,1})*b\\1", "", "aabaa", 0, true, 1, 3, 2, 2); } @Test @@ -154,15 +172,8 @@ public void testCharClasses() { expectSyntaxError("[[.a.]-[:lower:]]+", "", "invalid range in regular expression", 7); expectSyntaxError("[[=a=]-[:lower:]]+", "", "invalid range in regular expression", 7); test("[[:upper:]-[.a.]]+", "", "a-A", 0, true, 0, 3); - test("[[=a=]]", "", "\u00e4", 0, false); - test("[[=c=]]", "", "\u010D", 0, false); test("[[=c=]-c]", "", "\u010d-=c", 0, true, 3, 4); - test("[[=c=]-]+", "", "\u010d-=c", 0, true, 1, 2); - // TODO: collator support - // test("[[=a=]]", "", "\u00e4", 0, true, 0, 1); - // test("[[=c=]]", "", "\u010D", 0, true, 0, 1); - // test("[[=c=]-c]", "", "\u010d-=c", 0, true, 3, 4); - // test("[[=c=]-]+", "", "\u010d-=c", 0, true, 0, 2); + test("[[=c=]-]+", "", "\u010d-=c", 0, true, 0, 2); } @Test @@ -170,7 +181,7 @@ public void testBackReferences() { expectSyntaxError("(\\2())", "", "invalid back reference in regular expression", 1); test("(\\1a)", "", "aa", 0, false); test("(\\1a|){2}", "", "aa", 0, true, 0, 0, 0, 0); - test("(\\1a|)*", "", "aa", 0, true, 0, 0, -1, -1); + test("(\\1a|)*", "", "aa", 0, true, 0, 0, 0, 0); test("(()b|\\2a){2}", "", "ba", 0, true, 0, 2, 1, 2, 0, 0); test("(a\\1)", "", "aa", 0, false); test("(a|b\\1){2}", "", "aba", 0, true, 0, 3, 1, 3); @@ -217,4 +228,670 @@ public void testSpecialGroups() { test(String.format("(?%s)", s), "", "?" + s, 0, true, 1, s.length() + 1, 1, s.length() + 1); } } + + @Test + public void generatedTests() { + /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + test("abracadabra$", "", "abracadabracadabra", 0, true, 7, 18); + test("a...b", "", "abababbb", 0, true, 2, 7); + test("XXXXXX", "", "..XXXXXX", 0, true, 2, 8); + test("\\)", "", "()", 0, true, 1, 2); + test("a]", "", "a]a", 0, true, 0, 2); + test("}", "", "}", 0, true, 0, 1); + test("\\}", "", "}", 0, true, 0, 1); + test("\\]", "", "]", 0, true, 0, 1); + test("]", "", "]", 0, true, 0, 1); + test("]", "", "]", 0, true, 0, 1); + test("{", "", "{", 0, true, 0, 1); + test("}", "", "}", 0, true, 0, 1); + test("^a", "", "ax", 0, true, 0, 1); + test("\\^a", "", "a^a", 0, true, 1, 3); + test("a\\^", "", "a^", 0, true, 0, 2); + test("a$", "", "aa", 0, true, 1, 2); + test("a\\$", "", "a$", 0, true, 0, 2); + test("a($)", "", "aa", 0, true, 1, 2, 2, 2); + test("a*(^a)", "", "aa", 0, true, 0, 1, 0, 1); + test("(..)*(...)*", "", "a", 0, true, 0, 0, -1, -1, -1, -1); + test("(..)*(...)*", "", "abcd", 0, true, 0, 4, 2, 4, -1, -1); + test("(ab|a)(bc|c)", "", "abc", 0, true, 0, 3, 0, 2, 2, 3); + test("(ab)c|abc", "", "abc", 0, true, 0, 3, 0, 2); + test("a{0}b", "", "ab", 0, true, 1, 2); + test("(a*)(b?)(b+)b{3}", "", "aaabbbbbbb", 0, true, 0, 10, 0, 3, 3, 4, 4, 7); + test("(a*)(b{0,1})(b{1,})b{3}", "", "aaabbbbbbb", 0, true, 0, 10, 0, 3, 3, 4, 4, 7); + test("a{9876543210}", "", "a", 0, false); + test("((a|a)|a)", "", "a", 0, true, 0, 1, 0, 1, 0, 1); + test("(a*)(a|aa)", "", "aaaa", 0, true, 0, 4, 0, 3, 3, 4); + test("a*(a.|aa)", "", "aaaa", 0, true, 0, 4, 2, 4); + test("a(b)|c(d)|a(e)f", "", "aef", 0, true, 0, 3, -1, -1, -1, -1, 1, 2); + test("(a|b)?.*", "", "b", 0, true, 0, 1, 0, 1); + test("(a|b)c|a(b|c)", "", "ac", 0, true, 0, 2, 0, 1, -1, -1); + test("(a|b)c|a(b|c)", "", "ab", 0, true, 0, 2, -1, -1, 1, 2); + test("(a|b)*c|(a|ab)*c", "", "abc", 0, true, 0, 3, 1, 2, -1, -1); + test("(a|b)*c|(a|ab)*c", "", "xc", 0, true, 1, 2, -1, -1, -1, -1); + test("(.a|.b).*|.*(.a|.b)", "", "xa", 0, true, 0, 2, 0, 2, -1, -1); + test("a?(ab|ba)ab", "", "abab", 0, true, 0, 4, 0, 2); + test("a?(ac{0}b|ba)ab", "", "abab", 0, true, 0, 4, 0, 2); + test("ab|abab", "", "abbabab", 0, true, 0, 2); + test("aba|bab|bba", "", "baaabbbaba", 0, true, 5, 8); + test("aba|bab", "", "baaabbbaba", 0, true, 6, 9); + test("(aa|aaa)*|(a|aaaaa)", "", "aa", 0, true, 0, 2, 0, 2, -1, -1); + test("(a.|.a.)*|(a|.a...)", "", "aa", 0, true, 0, 2, 0, 2, -1, -1); + test("ab|a", "", "xabc", 0, true, 1, 3); + test("ab|a", "", "xxabc", 0, true, 2, 4); + test("(Ab|cD)*", "", "aBcD", 0, true, 0, 0, -1, -1); + test("[^-]", "", "--a", 0, true, 2, 3); + test("[a-]*", "", "--a", 0, true, 0, 3); + test("[a-m-]*", "", "--amoma--", 0, true, 0, 4); + test(":::1:::0:|:::1:1:0:", "", ":::0:::1:::1:::0:", 0, true, 8, 17); + test(":::1:::0:|:::1:1:1:", "", ":::0:::1:::1:::0:", 0, true, 8, 17); + test("[[:upper:]]", "", "A", 0, true, 0, 1); + test("[[:lower:]]+", "", "`az{", 0, true, 1, 3); + test("[[:upper:]]+", "", "@AZ[", 0, true, 1, 3); + test("[[-]]", "", "[[-]]", 0, true, 2, 4); + test("\\n", "", "\\n", 0, true, 1, 2); + test("\\n", "", "\\n", 0, true, 1, 2); + test("[^a]", "", "\\n", 0, true, 0, 1); + test("\\na", "", "\\na", 0, true, 1, 3); + test("(a)(b)(c)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("xxx", "", "xxx", 0, true, 0, 3); + test("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 6,", 0, true, 0, 6, 0, 0, 0, 5, 0, 5, 0, 4, 5, 6); + test("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "2/7", 0, true, 0, 3, 0, 0, 0, 3, 0, 3, 0, 2, 3, 3); + test("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 1,Feb 6", 0, true, 5, 11, 5, 6, 6, 11, 6, 11, 6, 10, 11, 11); + test("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", "", "x", 0, true, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + test("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", "", "xx", 0, true, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + test("a?(ab|ba)*", "", "ababababababababababababababababababababababababababababababababababababababababa", 0, true, 0, 81, 79, 81); + test("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabbbbaa", 0, true, 18, 25); + test("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabaa", 0, true, 18, 22); + test("aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", "", "baaabbbabac", 0, true, 7, 11); + test(".*", "", "\\x01\\xff", 0, true, 0, 8); + test("aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", 0, true, 53, 57); + test("aaaa\\nbbbb\\ncccc\\nddddd\\neeeeee\\nfffffff\\ngggg\\nhhhh\\niiiii\\njjjjj\\nkkkkk\\nllll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", 0, false); + test("a*a*a*a*a*b", "", "aaaaaaaaab", 0, true, 0, 10); + test("^", "", "a", 0, true, 0, 0); + test("$", "", "a", 0, true, 1, 1); + test("^$", "", "a", 0, false); + test("^a$", "", "a", 0, true, 0, 1); + test("abc", "", "abc", 0, true, 0, 3); + test("abc", "", "xabcy", 0, true, 1, 4); + test("abc", "", "ababc", 0, true, 2, 5); + test("ab*c", "", "abc", 0, true, 0, 3); + test("ab*bc", "", "abc", 0, true, 0, 3); + test("ab*bc", "", "abbc", 0, true, 0, 4); + test("ab*bc", "", "abbbbc", 0, true, 0, 6); + test("ab+bc", "", "abbc", 0, true, 0, 4); + test("ab+bc", "", "abbbbc", 0, true, 0, 6); + test("ab?bc", "", "abbc", 0, true, 0, 4); + test("ab?bc", "", "abc", 0, true, 0, 3); + test("ab?c", "", "abc", 0, true, 0, 3); + test("^abc$", "", "abc", 0, true, 0, 3); + test("^abc", "", "abcc", 0, true, 0, 3); + test("abc$", "", "aabc", 0, true, 1, 4); + test("^", "", "abc", 0, true, 0, 0); + test("$", "", "abc", 0, true, 3, 3); + test("a.c", "", "abc", 0, true, 0, 3); + test("a.c", "", "axc", 0, true, 0, 3); + test("a.*c", "", "axyzc", 0, true, 0, 5); + test("a[bc]d", "", "abd", 0, true, 0, 3); + test("a[b-d]e", "", "ace", 0, true, 0, 3); + test("a[b-d]", "", "aac", 0, true, 1, 3); + test("a[-b]", "", "a-", 0, true, 0, 2); + test("a[b-]", "", "a-", 0, true, 0, 2); + test("a]", "", "a]", 0, true, 0, 2); + test("a[]]b", "", "a]b", 0, true, 0, 3); + test("a[^bc]d", "", "aed", 0, true, 0, 3); + test("a[^-b]c", "", "adc", 0, true, 0, 3); + test("a[^]b]c", "", "adc", 0, true, 0, 3); + test("ab|cd", "", "abc", 0, true, 0, 2); + test("ab|cd", "", "abcd", 0, true, 0, 2); + test("a\\(b", "", "a(b", 0, true, 0, 3); + test("a\\(*b", "", "ab", 0, true, 0, 2); + test("a\\(*b", "", "a((b", 0, true, 0, 4); + test("((a))", "", "abc", 0, true, 0, 1, 0, 1, 0, 1); + test("(a)b(c)", "", "abc", 0, true, 0, 3, 0, 1, 2, 3); + test("a+b+c", "", "aabbabc", 0, true, 4, 7); + test("a*", "", "aaa", 0, true, 0, 3); + test("(a*)*", "", "-", 0, true, 0, 0, 0, 0); + test("(a*)+", "", "-", 0, true, 0, 0, 0, 0); + test("(a*|b)*", "", "-", 0, true, 0, 0, 0, 0); + test("(a+|b)*", "", "ab", 0, true, 0, 2, 1, 2); + test("(a+|b)+", "", "ab", 0, true, 0, 2, 1, 2); + test("(a+|b)?", "", "ab", 0, true, 0, 1, 0, 1); + test("[^ab]*", "", "cde", 0, true, 0, 3); + test("(^)*", "", "-", 0, true, 0, 0, 0, 0); + test("a*", "", "a", 0, true, 0, 1); + test("([abc])*d", "", "abbbcd", 0, true, 0, 6, 4, 5); + test("([abc])*bcd", "", "abcd", 0, true, 0, 4, 0, 1); + test("a|b|c|d|e", "", "e", 0, true, 0, 1); + test("(a|b|c|d|e)f", "", "ef", 0, true, 0, 2, 0, 1); + test("((a*|b))*", "", "-", 0, true, 0, 0, 0, 0, 0, 0); + test("abcd*efg", "", "abcdefg", 0, true, 0, 7); + test("ab*", "", "xabyabbbz", 0, true, 1, 3); + test("ab*", "", "xayabbbz", 0, true, 1, 2); + test("(ab|cd)e", "", "abcde", 0, true, 2, 5, 2, 4); + test("[abhgefdc]ij", "", "hij", 0, true, 0, 3); + test("(a|b)c*d", "", "abcd", 0, true, 1, 4, 1, 2); + test("(ab|ab*)bc", "", "abc", 0, true, 0, 3, 0, 1); + test("a([bc]*)c*", "", "abc", 0, true, 0, 3, 1, 3); + test("a([bc]*)(c*d)", "", "abcd", 0, true, 0, 4, 1, 3, 3, 4); + test("a([bc]+)(c*d)", "", "abcd", 0, true, 0, 4, 1, 3, 3, 4); + test("a([bc]*)(c+d)", "", "abcd", 0, true, 0, 4, 1, 2, 2, 4); + test("a[bcd]*dcdcde", "", "adcdcde", 0, true, 0, 7); + test("(ab|a)b*c", "", "abc", 0, true, 0, 3, 0, 2); + test("((a)(b)c)(d)", "", "abcd", 0, true, 0, 4, 0, 3, 0, 1, 1, 2, 3, 4); + test("[A-Za-z_][A-Za-z0-9_]*", "", "alpha", 0, true, 0, 5); + test("^a(bc+|b[eh])g|.h$", "", "abh", 0, true, 1, 3, -1, -1); + test("(bc+d$|ef*g.|h?i(j|k))", "", "effgz", 0, true, 0, 5, 0, 5, -1, -1); + test("(bc+d$|ef*g.|h?i(j|k))", "", "ij", 0, true, 0, 2, 0, 2, 1, 2); + test("(bc+d$|ef*g.|h?i(j|k))", "", "reffgz", 0, true, 1, 6, 1, 6, -1, -1); + test("(((((((((a)))))))))", "", "a", 0, true, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + test("multiple words", "", "multiple words yeah", 0, true, 0, 14); + test("(.*)c(.*)", "", "abcde", 0, true, 0, 5, 0, 2, 3, 5); + test("abcd", "", "abcd", 0, true, 0, 4); + test("a(bc)d", "", "abcd", 0, true, 0, 4, 1, 3); + test("a[\u0001-\u0003]?c", "", "a\u0002c", 0, true, 0, 3); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qaddafi", 0, true, 0, 15, -1, -1, 10, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mo'ammar Gadhafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Kaddafi", 0, true, 0, 15, -1, -1, 10, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qadhafi", 0, true, 0, 15, -1, -1, 10, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gadafi", 0, true, 0, 14, -1, -1, 10, 11); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadafi", 0, true, 0, 15, -1, -1, 11, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moamar Gaddafi", 0, true, 0, 14, -1, -1, 9, 11); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadhdhafi", 0, true, 0, 18, -1, -1, 13, 15); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Khaddafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafy", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghadafi", 0, true, 0, 15, -1, -1, 11, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muamar Kaddafi", 0, true, 0, 14, -1, -1, 9, 11); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Quathafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gheddafi", 0, true, 0, 16, -1, -1, 11, 13); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Khadafy", 0, true, 0, 15, -1, -1, 11, 12); + test("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Qudhafi", 0, true, 0, 15, -1, -1, 10, 12); + test("a+(b|c)*d+", "", "aabcdd", 0, true, 0, 6, 3, 4); + test("^.+$", "", "vivi", 0, true, 0, 4); + test("^(.+)$", "", "vivi", 0, true, 0, 4, 0, 4); + test("^([^!.]+).att.com!(.+)$", "", "gryphon.att.com!eby", 0, true, 0, 19, 0, 7, 16, 19); + test("^([^!]+!)?([^!]+)$", "", "bas", 0, true, 0, 3, -1, -1, 0, 3); + test("^([^!]+!)?([^!]+)$", "", "bar!bas", 0, true, 0, 7, 0, 4, 4, 7); + test("^([^!]+!)?([^!]+)$", "", "foo!bas", 0, true, 0, 7, 0, 4, 4, 7); + test("^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas", 0, true, 0, 11, 4, 8, 8, 11); + test("((foo)|(bar))!bas", "", "bar!bas", 0, true, 0, 7, 0, 3, -1, -1, 0, 3); + test("((foo)|(bar))!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7, -1, -1, 4, 7); + test("((foo)|(bar))!bas", "", "foo!bas", 0, true, 0, 7, 0, 3, 0, 3, -1, -1); + test("((foo)|bar)!bas", "", "bar!bas", 0, true, 0, 7, 0, 3, -1, -1); + test("((foo)|bar)!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7, -1, -1); + test("((foo)|bar)!bas", "", "foo!bas", 0, true, 0, 7, 0, 3, 0, 3); + test("(foo|(bar))!bas", "", "bar!bas", 0, true, 0, 7, 0, 3, 0, 3); + test("(foo|(bar))!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7, 4, 7); + test("(foo|(bar))!bas", "", "foo!bas", 0, true, 0, 7, 0, 3, -1, -1); + test("(foo|bar)!bas", "", "bar!bas", 0, true, 0, 7, 0, 3); + test("(foo|bar)!bas", "", "foo!bar!bas", 0, true, 4, 11, 4, 7); + test("(foo|bar)!bas", "", "foo!bas", 0, true, 0, 7, 0, 3); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas", 0, true, 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bas", 0, true, 0, 3, -1, -1, 0, 3, -1, -1, -1, -1); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bar!bas", 0, true, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas", 0, true, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11); + test("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bas", 0, true, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bas", 0, true, 0, 3, 0, 3, -1, -1, 0, 3, -1, -1, -1, -1); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bar!bas", 0, true, 0, 7, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas", 0, true, 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11); + test("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bas", 0, true, 0, 7, 0, 7, 0, 4, 4, 7, -1, -1, -1, -1); + test(".*(/XXX).*", "", "/XXX", 0, true, 0, 4, 0, 4); + test(".*(\\\\XXX).*", "", "\\XXX", 0, true, 0, 4, 0, 4); + test("\\\\XXX", "", "\\XXX", 0, true, 0, 4); + test(".*(/000).*", "", "/000", 0, true, 0, 4, 0, 4); + test(".*(\\\\000).*", "", "\\000", 0, true, 0, 4, 0, 4); + test("\\\\000", "", "\\000", 0, true, 0, 4); + test("aa*", "", "xaxaax", 0, true, 1, 2); + test("(a*)(ab)*(b*)", "", "abc", 0, true, 0, 2, 0, 1, -1, -1, 1, 2); + test("(a*)(ab)*(b*)", "", "abc", 0, true, 0, 2, 0, 1, -1, -1, 1, 2); + test("((a*)(ab)*)((b*)(a*))", "", "aba", 0, true, 0, 3, 0, 1, 0, 1, -1, -1, 1, 3, 1, 2, 2, 3); + test("((a*)(ab)*)((b*)(a*))", "", "aba", 0, true, 0, 3, 0, 1, 0, 1, -1, -1, 1, 3, 1, 2, 2, 3); + test("(...?.?)*", "", "xxxxxx", 0, true, 0, 6, 4, 6); + test("(...?.?)*", "", "xxxxxx", 0, true, 0, 6, 4, 6); + test("(...?.?)*", "", "xxxxxx", 0, true, 0, 6, 4, 6); + test("(a|ab)(bc|c)", "", "abcabc", 0, true, 0, 3, 0, 1, 1, 3); + test("(a|ab)(bc|c)", "", "abcabc", 0, true, 0, 3, 0, 1, 1, 3); + test("(aba|a*b)(aba|a*b)", "", "ababa", 0, true, 0, 4, 0, 3, 3, 4); + test("(aba|a*b)(aba|a*b)", "", "ababa", 0, true, 0, 4, 0, 3, 3, 4); + test("a(b)*\\1", "", "a", 0, false); + test("a(b)*\\1", "", "a", 0, false); + test("a(b)*\\1", "", "abab", 0, false); + test("(a*){2}", "", "xxxxx", 0, true, 0, 0, 0, 0); + test("(a*){2}", "", "xxxxx", 0, true, 0, 0, 0, 0); + test("a(b)*\\1", "", "abab", 0, false); + test("a(b)*\\1", "", "abab", 0, false); + test("a(b)*\\1", "", "abab", 0, false); + test("(a*)*", "", "a", 0, true, 0, 1, 1, 1); + test("(a*)*", "", "ax", 0, true, 0, 1, 1, 1); + test("(a*)*", "", "a", 0, true, 0, 1, 1, 1); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("(a(b)*)*\\2", "", "abab", 0, true, 0, 4, 2, 3, 1, 2); + test("(a(b)*)*\\2", "", "abab", 0, true, 0, 4, 2, 3, 1, 2); + test("(a?)((ab)?)(b?)a?(ab)?b?", "", "abab", 0, true, 0, 4, 0, 1, 1, 1, -1, -1, 1, 2, -1, -1); + test(".*(.*)", "", "ab", 0, true, 0, 2, 2, 2); + test(".*(.*)", "", "ab", 0, true, 0, 2, 2, 2); + test("(a|ab)(c|bcd)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4); + test("(a|ab)(bcd|c)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4); + test("(ab|a)(c|bcd)", "", "abcd", 0, true, 0, 3, 0, 2, 2, 3); + test("(ab|a)(bcd|c)", "", "abcd", 0, true, 0, 3, 0, 2, 2, 3); + test("((a|ab)(c|bcd))(d*)", "", "abcd", 0, true, 0, 4, 0, 4, 0, 1, 1, 4, 4, 4); + test("((a|ab)(bcd|c))(d*)", "", "abcd", 0, true, 0, 4, 0, 4, 0, 1, 1, 4, 4, 4); + test("((ab|a)(c|bcd))(d*)", "", "abcd", 0, true, 0, 4, 0, 3, 0, 2, 2, 3, 3, 4); + test("((ab|a)(bcd|c))(d*)", "", "abcd", 0, true, 0, 4, 0, 3, 0, 2, 2, 3, 3, 4); + test("(a|ab)((c|bcd)(d*))", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 1, 4, 4, 4); + test("(a|ab)((bcd|c)(d*))", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 1, 4, 4, 4); + test("(ab|a)((c|bcd)(d*))", "", "abcd", 0, true, 0, 4, 0, 2, 2, 4, 2, 3, 3, 4); + test("(ab|a)((bcd|c)(d*))", "", "abcd", 0, true, 0, 4, 0, 2, 2, 4, 2, 3, 3, 4); + test("(a*)(b|abc)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("(a*)(abc|b)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("((a*)(b|abc))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("((a*)(abc|b))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("(a*)((b|abc)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a*)((abc|b)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a*)(b|abc)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("(a*)(abc|b)", "", "abc", 0, true, 0, 2, 0, 1, 1, 2); + test("((a*)(b|abc))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("((a*)(abc|b))(c*)", "", "abc", 0, true, 0, 3, 0, 2, 0, 1, 1, 2, 2, 3); + test("(a*)((b|abc)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a*)((abc|b)(c*))", "", "abc", 0, true, 0, 3, 0, 1, 1, 3, 1, 2, 2, 3); + test("(a|ab)", "", "ab", 0, true, 0, 1, 0, 1); + test("(ab|a)", "", "ab", 0, true, 0, 2, 0, 2); + test("(a|ab)(b*)", "", "ab", 0, true, 0, 2, 0, 1, 1, 2); + test("(ab|a)(b*)", "", "ab", 0, true, 0, 2, 0, 2, 2, 2); + test("a+", "", "xaax", 0, true, 1, 3); + test(".(a*).", "", "xaax", 0, true, 0, 4, 1, 3); + test("(a?)((ab)?)", "", "ab", 0, true, 0, 1, 0, 1, 1, 1, -1, -1); + test("(a?)((ab)?)(b?)", "", "ab", 0, true, 0, 2, 0, 1, 1, 1, -1, -1, 1, 2); + test("((a?)((ab)?))(b?)", "", "ab", 0, true, 0, 2, 0, 1, 0, 1, 1, 1, -1, -1, 1, 2); + test("(a?)(((ab)?)(b?))", "", "ab", 0, true, 0, 2, 0, 1, 1, 2, 1, 1, -1, -1, 1, 2); + test("(.?)", "", "x", 0, true, 0, 1, 0, 1); + test("(.?){1}", "", "x", 0, true, 0, 1, 0, 1); + test("(.?)(.?)", "", "x", 0, true, 0, 1, 0, 1, 1, 1); + test("(.?){2}", "", "x", 0, true, 0, 1, 1, 1); + test("(.?)*", "", "x", 0, true, 0, 1, 1, 1); + test("(.?.?)", "", "xxx", 0, true, 0, 2, 0, 2); + test("(.?.?){1}", "", "xxx", 0, true, 0, 2, 0, 2); + test("(.?.?)(.?.?)", "", "xxx", 0, true, 0, 3, 0, 2, 2, 3); + test("(.?.?){2}", "", "xxx", 0, true, 0, 3, 2, 3); + test("(.?.?)(.?.?)(.?.?)", "", "xxx", 0, true, 0, 3, 0, 2, 2, 3, 3, 3); + test("(.?.?){3}", "", "xxx", 0, true, 0, 3, 3, 3); + test("(.?.?)*", "", "xxx", 0, true, 0, 3, 3, 3); + test("a?((ab)?)(b?)", "", "ab", 0, true, 0, 2, 1, 1, -1, -1, 1, 2); + test("(a?)((ab)?)b?", "", "ab", 0, true, 0, 2, 0, 1, 1, 1, -1, -1); + test("a?((ab)?)b?", "", "ab", 0, true, 0, 2, 1, 1, -1, -1); + test("(a*){2}", "", "xxxxx", 0, true, 0, 0, 0, 0); + test("(ab?)(b?a)", "", "aba", 0, true, 0, 3, 0, 2, 2, 3); + test("(a|ab)(ba|a)", "", "aba", 0, true, 0, 3, 0, 1, 1, 3); + test("(a|ab|ba)", "", "aba", 0, true, 0, 1, 0, 1); + test("(a|ab|ba)(a|ab|ba)", "", "aba", 0, true, 0, 3, 0, 1, 1, 3); + test("(a|ab|ba)*", "", "aba", 0, true, 0, 3, 1, 3); + test("(aba|a*b)", "", "ababa", 0, true, 0, 3, 0, 3); + test("(aba|a*b)(aba|a*b)", "", "ababa", 0, true, 0, 4, 0, 3, 3, 4); + test("(aba|a*b)*", "", "ababa", 0, true, 0, 4, 3, 4); + test("(aba|ab|a)", "", "ababa", 0, true, 0, 3, 0, 3); + test("(aba|ab|a)(aba|ab|a)", "", "ababa", 0, true, 0, 5, 0, 2, 2, 5); + test("(aba|ab|a)*", "", "ababa", 0, true, 0, 3, 0, 3); + test("(a(b)?)", "", "aba", 0, true, 0, 2, 0, 2, 1, 2); + test("(a(b)?)(a(b)?)", "", "aba", 0, true, 0, 3, 0, 2, 1, 2, 2, 3, -1, -1); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("(.*)(.*)", "", "xx", 0, true, 0, 2, 0, 2, 2, 2); + test(".*(.*)", "", "xx", 0, true, 0, 2, 2, 2); + test("(a.*z|b.*y)", "", "azbazby", 0, true, 0, 5, 0, 5); + test("(a.*z|b.*y)(a.*z|b.*y)", "", "azbazby", 0, true, 0, 7, 0, 5, 5, 7); + test("(a.*z|b.*y)*", "", "azbazby", 0, true, 0, 7, 5, 7); + test("(.|..)(.*)", "", "ab", 0, true, 0, 2, 0, 1, 1, 2); + test("((..)*(...)*)", "", "xxx", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)*(...)*)((..)*(...)*)", "", "xxx", 0, true, 0, 2, 0, 2, 0, 2, -1, -1, 2, 2, -1, -1, -1, -1); + test("((..)*(...)*)*", "", "xxx", 0, true, 0, 2, 2, 2, 0, 2, -1, -1); + test("(a{0,1})*b\\1", "", "ab", 0, true, 0, 2, 1, 1); + test("(a*)*b\\1", "", "ab", 0, true, 0, 2, 1, 1); + test("(a*)b\\1*", "", "ab", 0, true, 0, 2, 0, 1); + test("(a*)*b\\1*", "", "ab", 0, true, 0, 2, 1, 1); + test("(a{0,1})*b(\\1)", "", "ab", 0, true, 0, 2, 1, 1, 2, 2); + test("(a*)*b(\\1)", "", "ab", 0, true, 0, 2, 1, 1, 2, 2); + test("(a*)b(\\1)*", "", "ab", 0, true, 0, 2, 0, 1, -1, -1); + test("(a*)*b(\\1)*", "", "ab", 0, true, 0, 2, 1, 1, 2, 2); + test("(a{0,1})*b\\1", "", "aba", 0, true, 0, 2, 1, 1); + test("(a*)*b\\1", "", "aba", 0, true, 0, 2, 1, 1); + test("(a*)b\\1*", "", "aba", 0, true, 0, 3, 0, 1); + test("(a*)*b\\1*", "", "aba", 0, true, 0, 2, 1, 1); + test("(a*)*b(\\1)*", "", "aba", 0, true, 0, 2, 1, 1, 2, 2); + test("(a{0,1})*b\\1", "", "abaa", 0, true, 0, 2, 1, 1); + test("(a*)*b\\1", "", "abaa", 0, true, 0, 2, 1, 1); + test("(a*)b\\1*", "", "abaa", 0, true, 0, 4, 0, 1); + test("(a*)*b\\1*", "", "abaa", 0, true, 0, 2, 1, 1); + test("(a*)*b(\\1)*", "", "abaa", 0, true, 0, 2, 1, 1, 2, 2); + test("(a*)*b\\1", "", "aab", 0, true, 0, 3, 2, 2); + test("(a*)b\\1*", "", "aab", 0, true, 0, 3, 0, 2); + test("(a*)*b\\1*", "", "aab", 0, true, 0, 3, 2, 2); + test("(a*)*b(\\1)*", "", "aab", 0, true, 0, 3, 2, 2, 3, 3); + test("(a*)*b\\1", "", "aaba", 0, true, 0, 3, 2, 2); + test("(a*)b\\1*", "", "aaba", 0, true, 0, 3, 0, 2); + test("(a*)*b\\1*", "", "aaba", 0, true, 0, 3, 2, 2); + test("(a*)*b(\\1)*", "", "aaba", 0, true, 0, 3, 2, 2, 3, 3); + test("(a*)*b\\1", "", "aabaa", 0, true, 0, 3, 2, 2); + test("(a*)b\\1*", "", "aabaa", 0, true, 0, 5, 0, 2); + test("(a*)*b\\1*", "", "aabaa", 0, true, 0, 3, 2, 2); + test("(a*)*b(\\1)*", "", "aabaa", 0, true, 0, 3, 2, 2, 3, 3); + test("(x)*a\\1", "", "a", 0, false); + test("(x)*a\\1*", "", "a", 0, true, 0, 1, -1, -1); + test("(x)*a(\\1)", "", "a", 0, false); + test("(x)*a(\\1)*", "", "a", 0, true, 0, 1, -1, -1, -1, -1); + test("(aa(b(b))?)+", "", "aabbaa", 0, true, 0, 6, 4, 6, 2, 4, 3, 4); + test("(a(b)?)+", "", "aba", 0, true, 0, 3, 2, 3, 1, 2); + test("([ab]+)([bc]+)([cd]*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1", "", "abcdaa", 0, true, 0, 5, 0, 1, 1, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1", "", "abcdab", 0, true, 0, 6, 0, 2, 2, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdaa", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdab", 0, true, 0, 6, 0, 2, 2, 3, 3, 4); + test("^(A([^B]*))?(B(.*))?", "", "Aa", 0, true, 0, 2, 0, 2, 1, 2, -1, -1, -1, -1); + test("^(A([^B]*))?(B(.*))?", "", "Bb", 0, true, 0, 2, -1, -1, -1, -1, 0, 2, 1, 2); + test(".*([AB]).*\\1", "", "ABA", 0, true, 0, 3, 0, 1); + test("[^A]*A", "", "\\nA", 0, true, 0, 3); + test("(a|ab)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a|ab)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)*", "", "a", 0, true, 0, 1, 1, 1); + test("(a*)*", "", "x", 0, true, 0, 0, 0, 0); + test("(a*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("(a*)*", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("(a*)+", "", "a", 0, true, 0, 1, 1, 1); + test("(a*)+", "", "x", 0, true, 0, 0, 0, 0); + test("(a*)+", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("(a*)+", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("(a+)*", "", "a", 0, true, 0, 1, 0, 1); + test("(a+)*", "", "x", 0, true, 0, 0, -1, -1); + test("(a+)*", "", "aaaaaa", 0, true, 0, 6, 0, 6); + test("(a+)*", "", "aaaaaax", 0, true, 0, 6, 0, 6); + test("(a+)+", "", "a", 0, true, 0, 1, 0, 1); + test("(a+)+", "", "x", 0, false); + test("(a+)+", "", "aaaaaa", 0, true, 0, 6, 0, 6); + test("(a+)+", "", "aaaaaax", 0, true, 0, 6, 0, 6); + test("([a]*)*", "", "a", 0, true, 0, 1, 1, 1); + test("([a]*)*", "", "x", 0, true, 0, 0, 0, 0); + test("([a]*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([a]*)*", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("([a]*)+", "", "a", 0, true, 0, 1, 1, 1); + test("([a]*)+", "", "x", 0, true, 0, 0, 0, 0); + test("([a]*)+", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([a]*)+", "", "aaaaaax", 0, true, 0, 6, 6, 6); + test("([^b]*)*", "", "a", 0, true, 0, 1, 1, 1); + test("([^b]*)*", "", "b", 0, true, 0, 0, 0, 0); + test("([^b]*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([^b]*)*", "", "aaaaaab", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "a", 0, true, 0, 1, 1, 1); + test("([ab]*)*", "", "aaaaaa", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "ababab", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "bababa", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "b", 0, true, 0, 1, 1, 1); + test("([ab]*)*", "", "bbbbbb", 0, true, 0, 6, 6, 6); + test("([ab]*)*", "", "aaaabcde", 0, true, 0, 5, 5, 5); + test("([^a]*)*", "", "b", 0, true, 0, 1, 1, 1); + test("([^a]*)*", "", "bbbbbb", 0, true, 0, 6, 6, 6); + test("([^a]*)*", "", "aaaaaa", 0, true, 0, 0, 0, 0); + test("([^ab]*)*", "", "ccccxx", 0, true, 0, 6, 6, 6); + test("([^ab]*)*", "", "ababab", 0, true, 0, 0, 0, 0); + test("((z)+|a)*", "", "zabcde", 0, true, 0, 2, 1, 2, 0, 1); + test("a+?", "", "aaaaaa", 0, true, 0, 1); + test("(a)", "", "aaa", 0, true, 0, 1, 0, 1); + test("(a*?)", "", "aaa", 0, true, 0, 0, 0, 0); + test("(a)*?", "", "aaa", 0, true, 0, 0, -1, -1); + test("(a*?)*?", "", "aaa", 0, true, 0, 0, -1, -1); + test("(a*)*(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*)*(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)*(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)*(x)(\\1)", "", "x", 0, true, 0, 1, 0, 0, 0, 1, 1, 1); + test("(a*)*(x)(\\1)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2, 2, 2); + test("(a*)*(x)(\\1)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2, 2, 2); + test("(a*)*(x)(\\1)(x)", "", "axax", 0, true, 0, 4, 0, 1, 1, 2, 2, 3, 3, 4); + test("(a*)*(x)(\\1)(x)", "", "axxa", 0, true, 0, 3, 1, 1, 1, 2, 2, 2, 2, 3); + test("(a*)*(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*)*(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)*(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)+(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*)+(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*)+(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*){2}(x)", "", "x", 0, true, 0, 1, 0, 0, 0, 1); + test("(a*){2}(x)", "", "ax", 0, true, 0, 2, 1, 1, 1, 2); + test("(a*){2}(x)", "", "axa", 0, true, 0, 2, 1, 1, 1, 2); + test("((..)|(.))", "", "a", 0, true, 0, 1, 0, 1, -1, -1, 0, 1); + test("((..)|(.))((..)|(.))", "", "a", 0, false); + test("((..)|(.))((..)|(.))((..)|(.))", "", "a", 0, false); + test("((..)|(.)){1}", "", "a", 0, true, 0, 1, 0, 1, -1, -1, 0, 1); + test("((..)|(.)){2}", "", "a", 0, false); + test("((..)|(.)){3}", "", "a", 0, false); + test("((..)|(.))*", "", "a", 0, true, 0, 1, 0, 1, -1, -1, 0, 1); + test("((..)|(.))", "", "aa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aa", 0, true, 0, 2, 0, 1, -1, -1, 0, 1, 1, 2, -1, -1, 1, 2); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aa", 0, false); + test("((..)|(.)){1}", "", "aa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aa", 0, true, 0, 2, 1, 2, -1, -1, 1, 2); + test("((..)|(.)){3}", "", "aa", 0, false); + test("((..)|(.))*", "", "aa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))", "", "aaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaa", 0, true, 0, 3, 0, 2, 0, 2, -1, -1, 2, 3, -1, -1, 2, 3); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaa", 0, true, 0, 3, 0, 1, -1, -1, 0, 1, 1, 2, -1, -1, 1, 2, 2, 3, -1, -1, 2, 3); + test("((..)|(.)){1}", "", "aaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaa", 0, true, 0, 3, 2, 3, 0, 2, 2, 3); + test("((..)|(.)){3}", "", "aaa", 0, true, 0, 3, 2, 3, -1, -1, 2, 3); + test("((..)|(.))*", "", "aaa", 0, true, 0, 3, 2, 3, 0, 2, 2, 3); + test("((..)|(.))", "", "aaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 3, -1, -1, 2, 3, 3, 4, -1, -1, 3, 4); + test("((..)|(.)){1}", "", "aaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.)){3}", "", "aaaa", 0, true, 0, 4, 3, 4, 0, 2, 3, 4); + test("((..)|(.))*", "", "aaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.))", "", "aaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaa", 0, true, 0, 5, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1, 4, 5, -1, -1, 4, 5); + test("((..)|(.)){1}", "", "aaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.)){3}", "", "aaaaa", 0, true, 0, 5, 4, 5, 2, 4, 4, 5); + test("((..)|(.))*", "", "aaaaa", 0, true, 0, 5, 4, 5, 2, 4, 4, 5); + test("((..)|(.))", "", "aaaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.))((..)|(.))", "", "aaaaaa", 0, true, 0, 4, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1); + test("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaaa", 0, true, 0, 6, 0, 2, 0, 2, -1, -1, 2, 4, 2, 4, -1, -1, 4, 6, 4, 6, -1, -1); + test("((..)|(.)){1}", "", "aaaaaa", 0, true, 0, 2, 0, 2, 0, 2, -1, -1); + test("((..)|(.)){2}", "", "aaaaaa", 0, true, 0, 4, 2, 4, 2, 4, -1, -1); + test("((..)|(.)){3}", "", "aaaaaa", 0, true, 0, 6, 4, 6, 4, 6, -1, -1); + test("((..)|(.))*", "", "aaaaaa", 0, true, 0, 6, 4, 6, 4, 6, -1, -1); + test("X(.?){0,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){1,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){2,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){3,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){4,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){5,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){6,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){7,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){8,}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){0,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){1,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){2,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){3,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){4,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){5,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){6,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){7,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("X(.?){8,8}Y", "", "X1234567Y", 0, true, 0, 9, 8, 8); + test("(a|ab|c|bcd){0,}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){1,}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){2,}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){3,}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){4,}(d*)", "", "ababcd", 0, false); + test("(a|ab|c|bcd){0,10}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){1,10}(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd){2,10}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){3,10}(d*)", "", "ababcd", 0, true, 0, 6, 3, 6, 6, 6); + test("(a|ab|c|bcd){4,10}(d*)", "", "ababcd", 0, false); + test("(a|ab|c|bcd)*(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(a|ab|c|bcd)+(d*)", "", "ababcd", 0, true, 0, 1, 0, 1, 1, 1); + test("(ab|a|c|bcd){0,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){1,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){2,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){3,}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){4,}(d*)", "", "ababcd", 0, false); + test("(ab|a|c|bcd){0,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){1,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){2,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){3,10}(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd){4,10}(d*)", "", "ababcd", 0, false); + test("(ab|a|c|bcd)*(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(ab|a|c|bcd)+(d*)", "", "ababcd", 0, true, 0, 6, 4, 5, 5, 6); + test("(a|ab)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a|ab)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a|ab)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(b|abc)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a*)(abc|b)(c*)", "", "abc", 0, true, 0, 3, 0, 1, 1, 2, 2, 3); + test("(a|ab)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(a|ab)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 1, 1, 4, 4, 4); + test("(ab|a)(c|bcd)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("(ab|a)(bcd|c)(d|.*)", "", "abcd", 0, true, 0, 4, 0, 2, 2, 3, 3, 4); + test("\ufb00", "i", "FF", 0, true, 0, 2); + test("(\ufb00)\\1", "i", "FFFF", 0, true, 0, 4, 0, 2); + test("(\ufb00)\\1", "i", "FF\ufb00", 0, false); + test("(\ufb00)\\1", "i", "\ufb00FF", 0, false); + test("\ufb01", "i", "FI", 0, true, 0, 2); + test("(\ufb01)\\1", "i", "FIFI", 0, true, 0, 4, 0, 2); + test("\ufb02", "i", "FL", 0, true, 0, 2); + test("\ufb03", "i", "FFI", 0, true, 0, 3); + test("\ufb04", "i", "FFL", 0, true, 0, 3); + test("\ufb00I", "i", "\ufb03", 0, true, 0, 1); + test("\ufb03", "i", "\ufb00I", 0, true, 0, 2); + test("F\ufb01", "i", "\ufb03", 0, true, 0, 1); + test("\ufb03", "i", "F\ufb01", 0, true, 0, 2); + test("\ufb00L", "i", "\ufb04", 0, true, 0, 1); + test("\ufb04", "i", "\ufb00L", 0, true, 0, 2); + test("F\ufb02", "i", "\ufb04", 0, true, 0, 1); + test("\ufb04", "i", "F\ufb02", 0, true, 0, 2); + test("[\ufb04[=a=]o]+", "i", "F\ufb02a\u00c4\u00f6", 0, true, 0, 4); + test("\u1f50", "i", "\u03c5\u0313", 0, true, 0, 2); + test("\u1f52", "i", "\u03c5\u0313\u0300", 0, true, 0, 3); + test("\u1f54", "i", "\u03c5\u0313\u0301", 0, true, 0, 3); + test("\u1f56", "i", "\u03c5\u0313\u0342", 0, true, 0, 3); + test("\u1f50\u0300", "i", "\u1f52", 0, true, 0, 1); + test("\u1f52", "i", "\u1f50\u0300", 0, true, 0, 2); + test("\u1f50\u0301", "i", "\u1f54", 0, true, 0, 1); + test("\u1f54", "i", "\u1f50\u0301", 0, true, 0, 2); + test("\u1f50\u0342", "i", "\u1f56", 0, true, 0, 1); + test("\u1f56", "i", "\u1f50\u0342", 0, true, 0, 2); + test("\u1fb6", "i", "\u03b1\u0342", 0, true, 0, 2); + test("\u1fb7", "i", "\u03b1\u0342\u03b9", 0, true, 0, 3); + test("\u1fb6\u03b9", "i", "\u1fb7", 0, true, 0, 1); + test("\u1fb7", "i", "\u1fb6\u03b9", 0, true, 0, 2); + test("\u1fc6", "i", "\u03b7\u0342", 0, true, 0, 2); + test("\u1fc7", "i", "\u03b7\u0342\u03b9", 0, true, 0, 3); + test("\u1fc6\u03b9", "i", "\u1fc7", 0, true, 0, 1); + test("\u1fc7", "i", "\u1fc6\u03b9", 0, true, 0, 2); + test("\u1ff6", "i", "\u03c9\u0342", 0, true, 0, 2); + test("\u1ff7", "i", "\u03c9\u0342\u03b9", 0, true, 0, 3); + test("\u1ff6\u03b9", "i", "\u1ff7", 0, true, 0, 1); + test("\u1ff7", "i", "\u1ff6\u03b9", 0, true, 0, 2); + test("f*", "i", "ff", 0, true, 0, 2); + test("f*", "i", "\ufb00", 0, true, 0, 0); + test("f+", "i", "ff", 0, true, 0, 2); + test("f+", "i", "\ufb00", 0, false); + test("f{1,}", "i", "ff", 0, true, 0, 2); + test("f{1,}", "i", "\ufb00", 0, false); + test("f{1,2}", "i", "ff", 0, true, 0, 2); + test("f{1,2}", "i", "\ufb00", 0, false); + test("f{,2}", "i", "ff", 0, false); + test("f{,2}", "i", "\ufb00", 0, false); + test("ff?", "i", "ff", 0, true, 0, 2); + test("ff?", "i", "\ufb00", 0, false); + test("f{2}", "i", "ff", 0, true, 0, 2); + test("f{2}", "i", "\ufb00", 0, false); + test("f{2,2}", "i", "ff", 0, true, 0, 2); + test("f{2,2}", "i", "\ufb00", 0, false); + test("K", "i", "\u212a", 0, true, 0, 1); + test("k", "i", "\u212a", 0, true, 0, 1); + test("\\w", "i", "\u212a", 0, true, 0, 1); + test("\\W", "i", "\u212a", 0, false); + test("[\\w]", "i", "\u212a", 0, false); + test("[\\w]+", "i", "a\\wWc", 0, true, 1, 4); + test("[\\W]+", "i", "a\\wWc", 0, true, 1, 4); + test("[\\d]+", "i", "0\\dD9", 0, true, 1, 4); + test("[\\D]+", "i", "a\\dDc", 0, true, 1, 4); + test("[\\s]+", "i", " \\sS\u0009", 0, true, 1, 4); + test("[\\S]+", "i", " \\sS\u0009", 0, true, 1, 4); + test("[kx]", "i", "\u212a", 0, true, 0, 1); + test("ff", "i", "\ufb00", 0, true, 0, 1); + test("[f]f", "i", "\ufb00", 0, false); + test("f[f]", "i", "\ufb00", 0, false); + test("[f][f]", "i", "\ufb00", 0, false); + test("(?:f)f", "i", "\ufb00", 0, false); + test("f(?:f)", "i", "\ufb00", 0, false); + test("(?:f)(?:f)", "i", "\ufb00", 0, false); + test("\\A[\ufb00]\\z", "i", "\ufb00", 0, true, 0, 1); + test("\\A[\ufb00]\\z", "i", "ff", 0, true, 0, 2); + test("\\A[^\ufb00]\\z", "i", "\ufb00", 0, false); + test("\\A[^\ufb00]\\z", "i", "ff", 0, false); + test("\\A[^[^\ufb00]]\\z", "i", "\ufb00", 0, false); + test("\\A[^[^\ufb00]]\\z", "i", "ff", 0, false); + test("\\A[[^[^\ufb00]]]\\z", "i", "\ufb00", 0, false); + test("\\A[[^[^\ufb00]]]\\z", "i", "ff", 0, false); + test("[^a-c]", "i", "A", 0, false); + test("[[^a-c]]", "i", "A", 0, false); + test("[^a]", "i", "a", 0, false); + test("[[^a]]", "i", "a", 0, false); + test("\\A\\W\\z", "i", "\ufb00", 0, false); + test("\\A\\W\\z", "i", "ff", 0, false); + test("\\A[\\p{L}]\\z", "i", "\ufb00", 0, false); + test("\\A[\\p{L}]\\z", "i", "ff", 0, false); + test("\\A\\W\\z", "i", "\ufb03", 0, false); + test("\\A\\W\\z", "i", "ffi", 0, false); + test("\\A\\W\\z", "i", "\ufb00i", 0, false); + test("\\A[\\p{L}]\\z", "i", "\ufb03", 0, false); + test("\\A[\\p{L}]\\z", "i", "ffi", 0, false); + test("\\A[\\p{L}]\\z", "i", "\ufb00i", 0, false); + test("([[=a=]])\\1", "i", "aA", 0, true, 0, 2, 0, 1); + test("([[=a=]])\\1", "i", "Aa", 0, true, 0, 2, 0, 1); + test("([[=a=]])\\1", "i", "a\u00e4", 0, false); + test("([[=a=]])\\1", "i", "a\u00c4", 0, false); + test("([[=a=]])\\1", "i", "\u00e4a", 0, false); + test("([[=a=]])\\1", "i", "\u00c4a", 0, false); + test("([[=a=]])\\1", "i", "\u00c4A", 0, false); + test("[[=a=]o]+", "i", "\u00e4O\u00f6", 0, true, 0, 2); + test("[[=a=]o]+", "i", "\u00e4O\u00f6", 0, true, 0, 2); + test("[[=\u00df=]o]+", "i", "s", 0, false); + test("[[=\u00df=]o]+", "i", "ss", 0, true, 0, 2); + test("[[=\u00df=]o]+", "", "s", 0, false); + test("[[=\u00df=]o]+", "", "ss", 0, true, 0, 2); + test("[\u0132]+", "", "ij", 0, false); + test("[\u0132]+", "i", "ij", 0, false); + test("[[=\u0132=]]+", "", "ij", 0, true, 0, 2); + test("[[=\u0132=]o]+", "", "ij", 0, true, 0, 2); + test("[[=\u0132=]o]+", "i", "ij", 0, true, 0, 2); + expectSyntaxError("[\\s-r]+", "", "invalid range in regular expression"); + test("[\\s-v]+", "", "\\stu", 0, true, 0, 4); + /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + } } diff --git a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java index 98246584b233..b3e450b79284 100644 --- a/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java +++ b/regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java @@ -40,14 +40,14 @@ */ package com.oracle.truffle.regex.tregex.test; -import com.oracle.truffle.regex.tregex.TRegexOptions; -import com.oracle.truffle.regex.tregex.string.Encodings; import org.graalvm.polyglot.PolyglotException; import org.graalvm.polyglot.Value; import org.junit.Assert; import org.junit.Test; import com.oracle.truffle.regex.errors.PyErrorMessages; +import com.oracle.truffle.regex.tregex.TRegexOptions; +import com.oracle.truffle.regex.tregex.string.Encodings; public class PythonTests extends RegexTestBase { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java index e983fa1ebe99..97e9bad06d40 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/analysis/RegexUnifier.java @@ -127,6 +127,9 @@ public String getUnifiedPattern() throws RegexSyntaxException { case groupEnd: dump.append(")"); break; + case literalChar: + dump.append("x"); + break; case charClass: if (((Token.CharacterClass) token).getCodePointSet().matchesSingleChar()) { dump.append("x"); @@ -134,6 +137,9 @@ public String getUnifiedPattern() throws RegexSyntaxException { dump.append("[c]"); } break; + case charClassEnd: + dump.append("[c]"); + break; } } dump.append("/"); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java index e8977c3a4543..6754fec3e688 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/ClassSetContents.java @@ -40,7 +40,7 @@ */ package com.oracle.truffle.regex.charset; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.util.json.JsonConvertible; import com.oracle.truffle.regex.tregex.util.json.JsonValue; import org.graalvm.collections.EconomicSet; @@ -93,7 +93,7 @@ public static ClassSetContents createRange(int lo, int hi) { } public static ClassSetContents createPOSIXCollationElement(int codePoint) { - return new ClassSetContents(Kind.POSIXCollationElement, CodePointSet.create(codePoint), EconomicSet.create(), true); + return new ClassSetContents(Kind.POSIXCollationElement, CodePointSet.create(codePoint), EconomicSet.create(), false); } public static ClassSetContents createPOSIXCollationElement(String string) { @@ -103,7 +103,7 @@ public static ClassSetContents createPOSIXCollationElement(String string) { } public static ClassSetContents createPOSIXCollationEquivalenceClass(int codePoint) { - return new ClassSetContents(Kind.POSIXCollationEquivalenceClass, CodePointSet.create(codePoint), EconomicSet.create(), true); + return new ClassSetContents(Kind.POSIXCollationEquivalenceClass, CodePointSet.create(codePoint), EconomicSet.create(), false); } public static ClassSetContents createPOSIXCollationEquivalenceClass(String string) { @@ -115,9 +115,9 @@ public static ClassSetContents createPOSIXCollationEquivalenceClass(String strin public ClassSetContents caseFold(CodePointSetAccumulator tmp) { EconomicSet foldedStrings = EconomicSet.create(strings.size()); for (String string : strings) { - foldedStrings.add(CaseFoldTable.simpleCaseFold(string)); + foldedStrings.add(CaseFoldData.icuSimpleCaseFold(string)); } - return new ClassSetContents(kind, CaseFoldTable.simpleCaseFold(codePointSet, tmp), foldedStrings, mayContainStrings); + return new ClassSetContents(kind, CaseFoldData.simpleCaseFold(codePointSet, tmp), foldedStrings, mayContainStrings); } public EconomicSet getStrings() { @@ -136,6 +136,10 @@ public boolean isRange() { return kind == Kind.Range; } + public boolean isPosixCollationEquivalenceClass() { + return kind == Kind.POSIXCollationEquivalenceClass; + } + public boolean isAllowedInRange() { return kind == Kind.Character || kind == Kind.POSIXCollationElement || kind == Kind.POSIXCollationEquivalenceClass; } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java index f8157658d8b4..488884dec276 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/charset/Constants.java @@ -41,7 +41,7 @@ package com.oracle.truffle.regex.charset; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.string.Encodings; public final class Constants { @@ -253,13 +253,9 @@ public final class Constants { HEX_CHARS }; - public static final CodePointSet FOLDABLE_CHARACTERS = CodePointSet.createNoDedup(CaseFoldTable.SIMPLE_CASE_FOLDING_ENTRIES); + public static final CodePointSet WORD_CHARS_UNICODE_SETS_IGNORE_CASE = CaseFoldData.simpleCaseFold(WORD_CHARS, new CodePointSetAccumulator()); - public static final CodePointSet FOLDED_CHARACTERS = FOLDABLE_CHARACTERS.createInverse(Encodings.UTF_16); - - public static final CodePointSet WORD_CHARS_UNICODE_SETS_IGNORE_CASE = CaseFoldTable.simpleCaseFold(WORD_CHARS, new CodePointSetAccumulator()); - - public static final CodePointSet NON_WORD_CHARS_UNICODE_SETS_IGNORE_CASE = WORD_CHARS_UNICODE_SETS_IGNORE_CASE.createInverse(FOLDABLE_CHARACTERS, + public static final CodePointSet NON_WORD_CHARS_UNICODE_SETS_IGNORE_CASE = WORD_CHARS_UNICODE_SETS_IGNORE_CASE.createInverse(CaseFoldData.FOLDABLE_CHARACTERS, new CompilationBuffer(Encodings.UTF_16)); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java index d5a10da063eb..f74d36b5ab10 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nodes/nfa/TRegexBacktrackingNFAExecutorNode.java @@ -825,7 +825,7 @@ protected void updateState(TRegexBacktrackingNFAExecutorLocals locals, PureNFATr * OracleDBFlavor. */ assert isForward(); - for (int i = 0; i < nGuards; i += 1) { + for (int i = 0; i < nGuards; i++) { QuantifierGuard guard = transition.getQuantifierGuards()[i]; CompilerAsserts.partialEvaluationConstant(guard); if (guard.getKind() == QuantifierGuard.Kind.updateRecursiveBackrefPointer) { @@ -895,7 +895,6 @@ protected boolean tryUpdateState(VirtualFrame frame, TRegexBacktrackingNFAExecut CompilerAsserts.partialEvaluationConstant(transition); PureNFAState target = transition.getTarget(isForward()); CompilerAsserts.partialEvaluationConstant(target); - assert !isRecursiveBackreferences() : "not implemented"; if (transition.hasCaretGuard() && index != 0) { return false; } @@ -965,6 +964,9 @@ protected boolean tryUpdateState(VirtualFrame frame, TRegexBacktrackingNFAExecut locals.setLastGroup(guard.getIndex() / 2); } break; + case updateRecursiveBackrefPointer: + locals.saveRecursiveBackrefGroupStart(guard.getIndex()); + break; case enterZeroWidth: locals.setZeroWidthQuantifierGuardIndex(q); locals.setZeroWidthQuantifierResults(q); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java new file mode 100644 index 000000000000..ad1c3800154b --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java @@ -0,0 +1,1911 @@ +/* + * Copyright (c) 2023, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.regex.tregex.parser; + +import java.util.function.BiConsumer; +import java.util.function.BiPredicate; + +import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; + +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; +import com.oracle.truffle.regex.charset.Range; +import com.oracle.truffle.regex.charset.RangesBuffer; +import com.oracle.truffle.regex.charset.SortedListOfRanges; +import com.oracle.truffle.regex.tregex.string.Encodings; + +public class CaseFoldData { + + private static final int INTEGER_OFFSET = 1; + private static final int DIRECT_MAPPING = 2; + private static final int ALTERNATING_UL = 3; + private static final int ALTERNATING_AL = 4; + private static final int DIRECT_SINGLE = 5; + + public enum CaseFoldUnfoldAlgorithm { + ECMAScriptNonUnicode, + ECMAScriptUnicode, + PythonAscii, + PythonUnicode; + + public BiPredicate getEqualsPredicate() { + return (codePointA, codePointB) -> getTable(this).equalsIgnoreCase(codePointA, codePointB); + } + } + + public enum CaseFoldAlgorithm { + Ruby, + OracleDB, + OracleDBAI + } + + private static CaseFoldEquivalenceTable getTable(CaseFoldUnfoldAlgorithm algorithm) { + switch (algorithm) { + case ECMAScriptNonUnicode: + return JS_NON_UNICODE; + case ECMAScriptUnicode: + return UNICODE_15_0_0_SIMPLE; + case PythonAscii: + return PYTHON_ASCII; + case PythonUnicode: + return PYTHON_UNICODE; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static CaseFoldTable getTable(CaseFoldAlgorithm algorithm) { + switch (algorithm) { + case Ruby: + return UNICODE_15_0_0_FULL; + case OracleDB: + return ORACLE_DB; + case OracleDBAI: + return ORACLE_DB_AI; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static CaseUnfoldingTrie getUnfoldingTrie(CaseFoldAlgorithm algorithm) { + switch (algorithm) { + case Ruby: + return UNFOLDING_TRIE_RUBY; + case OracleDB: + return UNFOLDING_TRIE_ORACLE_DB; + case OracleDBAI: + return UNFOLDING_TRIE_ORACLE_DB_AI; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static String icuSimpleCaseFold(String string) { + int[] folded = string.codePoints().map(CaseFoldData::icuSimpleCaseFold).toArray(); + return new String(folded, 0, folded.length); + } + + public static int icuSimpleCaseFold(int codePoint) { + return UCharacter.foldCase(codePoint, UCharacter.FOLD_CASE_DEFAULT); + } + + static CodePointSet rangeSet(int... ranges) { + return CodePointSet.createNoDedup(ranges); + } + + public static void applyCaseFoldUnfold(CodePointSetAccumulator codePointSet, CodePointSetAccumulator tmp, CaseFoldUnfoldAlgorithm algorithm) { + codePointSet.copyTo(tmp); + getTable(algorithm).applyCaseFold(codePointSet, tmp); + } + + public static CodePointSet simpleCaseFold(CodePointSet codePointSet, CodePointSetAccumulator tmp) { + tmp.addSet(codePointSet); + UNICODE_15_0_0_SIMPLE.applyCaseFold(tmp, codePointSet); + tmp.intersectWith(FOLDED_CHARACTERS); + return tmp.toCodePointSet(); + } + + /** + * Maps characters to their respective set of equivalent characters in case-insensitive context, + * e.g. {@code A -> [Aa]}. + */ + public static final class CaseFoldEquivalenceTable implements SortedListOfRanges { + + private final CaseFoldEquivalenceTable parent; + private final CodePointSet[] directMappings; + private final int[] ranges; + + CaseFoldEquivalenceTable(CaseFoldEquivalenceTable parent, CodePointSet[] directMappings, int[] ranges) { + this.parent = parent; + this.directMappings = directMappings; + this.ranges = ranges; + } + + void applyCaseFold(CodePointSetAccumulator dst, Iterable src) { + for (Range r : src) { + applyCaseFold(dst, r); + } + } + + private void applyCaseFold(CodePointSetAccumulator dst, Range r) { + int search = binarySearch(r.lo); + if (binarySearchExactMatch(search, r.lo, r.hi)) { + apply(dst, search, r.lo, r.hi); + return; + } + int firstIntersection = binarySearchGetFirstIntersecting(search, r.lo, r.hi); + if (binarySearchNoIntersectingFound(firstIntersection)) { + if (parent != null) { + parent.applyCaseFold(dst, r); + } + return; + } + int lastIntersectionHi = r.lo - 1; + for (int j = firstIntersection; j < size(); j++) { + if (rightOf(j, r.lo, r.hi)) { + break; + } + assert intersects(j, r.lo, r.hi); + int intersectionLo = Math.max(getLo(j), r.lo); + int intersectionHi = Math.min(getHi(j), r.hi); + apply(dst, j, intersectionLo, intersectionHi); + if (parent != null && intersectionLo > lastIntersectionHi + 1) { + parent.applyCaseFold(dst, new Range(lastIntersectionHi + 1, intersectionLo - 1)); + } + lastIntersectionHi = intersectionHi; + } + if (parent != null && r.hi > lastIntersectionHi) { + parent.applyCaseFold(dst, new Range(lastIntersectionHi + 1, r.hi)); + } + } + + private void apply(CodePointSetAccumulator codePointSet, int tblEntryIndex, int intersectionLo, int intersectionHi) { + switch (ranges[tblEntryIndex * 4 + 2]) { + case INTEGER_OFFSET: + int delta = ranges[tblEntryIndex * 4 + 3]; + addRange(codePointSet, intersectionLo + delta, intersectionHi + delta); + break; + case DIRECT_MAPPING: + CodePointSet set = directMappings[ranges[tblEntryIndex * 4 + 3]]; + assert set.getMax() <= Character.MAX_CODE_POINT : "CaseFoldEquivalenceTable is currently used for single-character mappings only"; + codePointSet.addSet(set); + break; + case ALTERNATING_UL: + int loUL = Math.min(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); + int hiUL = Math.max(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); + if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loUL, hiUL)) { + addRange(codePointSet, loUL, hiUL); + } + break; + case ALTERNATING_AL: + int loAL = Math.min(intersectionLo ^ 1, intersectionHi ^ 1); + int hiAL = Math.max(intersectionLo ^ 1, intersectionHi ^ 1); + if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loAL, hiAL)) { + addRange(codePointSet, loAL, hiAL); + } + break; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + private static void addRange(CodePointSetAccumulator codePointSet, int lo, int hi) { + assert lo <= Character.MAX_CODE_POINT : "CaseFoldEquivalenceTable is currently used for single-character mappings only"; + codePointSet.addRange(lo, hi); + } + + boolean equalsIgnoreCase(int codePointA, int codePointB) { + if (codePointA == codePointB) { + return true; + } + int search = binarySearch(codePointA); + if (binarySearchExactMatch(search, codePointA, codePointA)) { + return equalsIgnoreCase(search, codePointA, codePointB); + } + int firstIntersection = binarySearchGetFirstIntersecting(search, codePointA, codePointA); + if (binarySearchNoIntersectingFound(firstIntersection) || rightOf(firstIntersection, codePointA, codePointA)) { + return parent != null && parent.equalsIgnoreCase(codePointA, codePointB); + } + assert intersects(firstIntersection, codePointA, codePointA); + return equalsIgnoreCase(firstIntersection, codePointA, codePointB); + } + + private boolean equalsIgnoreCase(int tblEntryIndex, int codePointA, int codePointB) { + switch (ranges[tblEntryIndex * 4 + 2]) { + case INTEGER_OFFSET: + int delta = ranges[tblEntryIndex * 4 + 3]; + return codePointA + delta == codePointB; + case DIRECT_MAPPING: + CodePointSet set = directMappings[ranges[tblEntryIndex * 4 + 3]]; + return set.contains(codePointB); + case ALTERNATING_UL: + return ((codePointA - 1) ^ 1) + 1 == codePointB; + case ALTERNATING_AL: + return (codePointA ^ 1) == codePointB; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + @Override + public int getLo(int i) { + return ranges[i * 4]; + } + + @Override + public int getHi(int i) { + return ranges[i * 4 + 1]; + } + + @Override + public int size() { + return ranges.length / 4; + } + + @Override + public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static final class CaseFoldTable implements SortedListOfRanges { + + private final CaseFoldTable parent; + private final int[] ranges; + + CaseFoldTable(CaseFoldTable parent, int[] ranges) { + this.parent = parent; + this.ranges = ranges; + } + + public int[] caseFold(int codepoint) { + final int[][] ret = new int[1][]; + caseFold(new Range(codepoint, codepoint), (cp, caseFolded) -> ret[0] = caseFolded); + return ret[0]; + } + + public void caseFold(CodePointSetAccumulator cps, BiConsumer caseFoldItem) { + for (Range r : cps) { + caseFold(r, caseFoldItem); + } + } + + private void caseFold(Range r, BiConsumer caseFoldItem) { + int search = binarySearch(r.lo); + if (binarySearchExactMatch(search, r.lo, r.hi)) { + apply(search, r.lo, r.hi, caseFoldItem); + return; + } + int firstIntersection = binarySearchGetFirstIntersecting(search, r.lo, r.hi); + if (binarySearchNoIntersectingFound(firstIntersection)) { + if (parent != null) { + parent.caseFold(r, caseFoldItem); + } + return; + } + int lastIntersectionHi = r.lo - 1; + for (int j = firstIntersection; j < size(); j++) { + if (rightOf(j, r.lo, r.hi)) { + break; + } + assert intersects(j, r.lo, r.hi); + int intersectionLo = Math.max(getLo(j), r.lo); + int intersectionHi = Math.min(getHi(j), r.hi); + apply(j, intersectionLo, intersectionHi, caseFoldItem); + if (parent != null && intersectionLo > lastIntersectionHi + 1) { + parent.caseFold(new Range(lastIntersectionHi + 1, intersectionLo - 1), caseFoldItem); + } + lastIntersectionHi = intersectionHi; + } + if (parent != null && r.hi > lastIntersectionHi) { + parent.caseFold(new Range(lastIntersectionHi + 1, r.hi), caseFoldItem); + } + } + + private void apply(int tblEntryIndex, int intersectionLo, int intersectionHi, BiConsumer caseFoldItem) { + int kind = ranges[tblEntryIndex * 4 + 2]; + switch (kind) { + case INTEGER_OFFSET: + int delta = ranges[tblEntryIndex * 4 + 3]; + if (delta != 0) { + for (int i = intersectionLo; i <= intersectionHi; i++) { + applyMapping(i, i + delta, caseFoldItem); + } + } + break; + case ALTERNATING_AL, ALTERNATING_UL: + int loUL = kind == ALTERNATING_UL ? intersectionLo | 1 : intersectionLo + (intersectionLo & 1); + for (int i = loUL; i <= intersectionHi; i += 2) { + applyMapping(i, i + 1, caseFoldItem); + } + break; + case DIRECT_SINGLE: + int dst = ranges[tblEntryIndex * 4 + 3]; + for (int i = intersectionLo; i <= intersectionHi; i++) { + applyMapping(i, dst, caseFoldItem); + } + break; + default: + throw CompilerDirectives.shouldNotReachHere(); + } + } + + private static void applyMapping(int from, int to, BiConsumer caseFoldItem) { + assert from <= 0x10_ffff; + caseFoldItem.accept(from, mappingToCodepoints(to)); + } + + private static int[] mappingToCodepoints(int mapping) { + if (mapping > 0x10_ffff) { + return MULTI_CHAR_SEQUENCES[mapping - 0x11_0000].codePoints().toArray(); + } else { + return new int[]{mapping}; + } + } + + private CaseUnfoldingTrie createCaseUnfoldTrie() { + CaseUnfoldingTrie trie = new CaseUnfoldingTrie(0); + if (parent == null) { + for (int i = 0; i < ranges.length; i += 4) { + switch (ranges[i + 2]) { + case INTEGER_OFFSET -> { + for (int j = ranges[i]; j <= ranges[i + 1]; j++) { + trie.add(j, mappingToCodepoints(j + ranges[i + 3]), 0); + } + } + case ALTERNATING_UL, ALTERNATING_AL -> { + for (int j = ranges[i]; j <= ranges[i + 1]; j += 2) { + trie.add(j, mappingToCodepoints(j + 1), 0); + } + } + case DIRECT_SINGLE -> { + for (int j = ranges[i]; j <= ranges[i + 1]; j++) { + trie.add(j, mappingToCodepoints(ranges[i + 3]), 0); + } + } + default -> throw CompilerDirectives.shouldNotReachHere(); + } + } + } else { + caseFold(new Range(0, 0x10_ffff), (from, to) -> trie.add(from, to, 0)); + } + return trie; + } + + @Override + public int getLo(int i) { + return ranges[i * 4]; + } + + @Override + public int getHi(int i) { + return ranges[i * 4 + 1]; + } + + @Override + public int size() { + return ranges.length / 4; + } + + @Override + public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + public static final CaseFoldEquivalenceTable PYTHON_ASCII = new CaseFoldEquivalenceTable(null, new CodePointSet[0], new int[]{ + 0x000041, 0x00005a, INTEGER_OFFSET, 32, + 0x000061, 0x00007a, INTEGER_OFFSET, -32 + }); + + /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + public static final String[] MULTI_CHAR_SEQUENCES = { + "i\u0307", + "SS", + "FF", + "FI", + "FL", + "FFI", + "FFL", + "ST", + "\u0535\u0552", + "\u0544\u0546", + "\u0544\u0535", + "\u0544\u053b", + "\u054e\u0546", + "\u0544\u053d", + "\u02bcN", + "\u0399\u0308\u0301", + "\u03a5\u0308\u0301", + "J\u030c", + "H\u0331", + "T\u0308", + "W\u030a", + "Y\u030a", + "A\u02be", + "\u03a5\u0313", + "\u03a5\u0313\u0300", + "\u03a5\u0313\u0301", + "\u03a5\u0313\u0342", + "\u0391\u0342", + "\u0397\u0342", + "\u0399\u0308\u0300", + "\u0399\u0342", + "\u0399\u0308\u0342", + "\u03a5\u0308\u0300", + "\u03a1\u0313", + "\u03a5\u0342", + "\u03a5\u0308\u0342", + "\u03a9\u0342", + "\u1f08\u0399", + "\u1f09\u0399", + "\u1f0a\u0399", + "\u1f0b\u0399", + "\u1f0c\u0399", + "\u1f0d\u0399", + "\u1f0e\u0399", + "\u1f0f\u0399", + "\u1f28\u0399", + "\u1f29\u0399", + "\u1f2a\u0399", + "\u1f2b\u0399", + "\u1f2c\u0399", + "\u1f2d\u0399", + "\u1f2e\u0399", + "\u1f2f\u0399", + "\u1f68\u0399", + "\u1f69\u0399", + "\u1f6a\u0399", + "\u1f6b\u0399", + "\u1f6c\u0399", + "\u1f6d\u0399", + "\u1f6e\u0399", + "\u1f6f\u0399", + "\u0391\u0399", + "\u0397\u0399", + "\u03a9\u0399", + "\u1fba\u0399", + "\u0386\u0399", + "\u1fca\u0399", + "\u0389\u0399", + "\u1ffa\u0399", + "\u038f\u0399", + "\u0391\u0342\u0399", + "\u0397\u0342\u0399", + "\u03a9\u0342\u0399", + "ss", + "\u02bcn", + "j\u030c", + "\u03b9\u0308\u0301", + "\u03c5\u0308\u0301", + "\u0565\u0582", + "h\u0331", + "t\u0308", + "w\u030a", + "y\u030a", + "a\u02be", + "\u03c5\u0313", + "\u03c5\u0313\u0300", + "\u03c5\u0313\u0301", + "\u03c5\u0313\u0342", + "\u1f00\u03b9", + "\u1f01\u03b9", + "\u1f02\u03b9", + "\u1f03\u03b9", + "\u1f04\u03b9", + "\u1f05\u03b9", + "\u1f06\u03b9", + "\u1f07\u03b9", + "\u1f20\u03b9", + "\u1f21\u03b9", + "\u1f22\u03b9", + "\u1f23\u03b9", + "\u1f24\u03b9", + "\u1f25\u03b9", + "\u1f26\u03b9", + "\u1f27\u03b9", + "\u1f60\u03b9", + "\u1f61\u03b9", + "\u1f62\u03b9", + "\u1f63\u03b9", + "\u1f64\u03b9", + "\u1f65\u03b9", + "\u1f66\u03b9", + "\u1f67\u03b9", + "\u1f70\u03b9", + "\u03b1\u03b9", + "\u03ac\u03b9", + "\u03b1\u0342", + "\u03b1\u0342\u03b9", + "\u1f74\u03b9", + "\u03b7\u03b9", + "\u03ae\u03b9", + "\u03b7\u0342", + "\u03b7\u0342\u03b9", + "\u03b9\u0308\u0300", + "\u03b9\u0342", + "\u03b9\u0308\u0342", + "\u03c5\u0308\u0300", + "\u03c1\u0313", + "\u03c5\u0342", + "\u03c5\u0308\u0342", + "\u1f7c\u03b9", + "\u03c9\u03b9", + "\u03ce\u03b9", + "\u03c9\u0342", + "\u03c9\u0342\u03b9", + "ff", + "fi", + "fl", + "ffi", + "ffl", + "st", + "\u0574\u0576", + "\u0574\u0565", + "\u0574\u056b", + "\u057e\u0576", + "\u0574\u056d", + "ij", + "oe", + "lj", + "nj", + "dz", + "d\u0292", + "d\u0291", + "ts", + "t\u0283", + "t\u0255", + "co", + "no", + "sm", + "del", + "tm", + "ii", + "iii", + "iv", + "vi", + "vii", + "ix", + "xi", + "xii", + "fo", + }; + private static final CaseFoldEquivalenceTable UNICODE_15_0_0_SIMPLE = new CaseFoldEquivalenceTable(null, new CodePointSet[]{ + rangeSet(0x00004b, 0x00004b, 0x00006b, 0x00006b, 0x00212a, 0x00212a), + rangeSet(0x000053, 0x000053, 0x000073, 0x000073, 0x00017f, 0x00017f), + rangeSet(0x0000b5, 0x0000b5, 0x00039c, 0x00039c, 0x0003bc, 0x0003bc), + rangeSet(0x0000c5, 0x0000c5, 0x0000e5, 0x0000e5, 0x00212b, 0x00212b), + rangeSet(0x0001c4, 0x0001c6), + rangeSet(0x0001c7, 0x0001c9), + rangeSet(0x0001ca, 0x0001cc), + rangeSet(0x0001f1, 0x0001f3), + rangeSet(0x000345, 0x000345, 0x000399, 0x000399, 0x0003b9, 0x0003b9, 0x001fbe, 0x001fbe), + rangeSet(0x000392, 0x000392, 0x0003b2, 0x0003b2, 0x0003d0, 0x0003d0), + rangeSet(0x000395, 0x000395, 0x0003b5, 0x0003b5, 0x0003f5, 0x0003f5), + rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1, 0x0003f4, 0x0003f4), + rangeSet(0x00039a, 0x00039a, 0x0003ba, 0x0003ba, 0x0003f0, 0x0003f0), + rangeSet(0x0003a0, 0x0003a0, 0x0003c0, 0x0003c0, 0x0003d6, 0x0003d6), + rangeSet(0x0003a1, 0x0003a1, 0x0003c1, 0x0003c1, 0x0003f1, 0x0003f1), + rangeSet(0x0003a3, 0x0003a3, 0x0003c2, 0x0003c3), + rangeSet(0x0003a6, 0x0003a6, 0x0003c6, 0x0003c6, 0x0003d5, 0x0003d5), + rangeSet(0x0003a9, 0x0003a9, 0x0003c9, 0x0003c9, 0x002126, 0x002126), + rangeSet(0x000412, 0x000412, 0x000432, 0x000432, 0x001c80, 0x001c80), + rangeSet(0x000414, 0x000414, 0x000434, 0x000434, 0x001c81, 0x001c81), + rangeSet(0x00041e, 0x00041e, 0x00043e, 0x00043e, 0x001c82, 0x001c82), + rangeSet(0x000421, 0x000421, 0x000441, 0x000441, 0x001c83, 0x001c83), + rangeSet(0x000422, 0x000422, 0x000442, 0x000442, 0x001c84, 0x001c85), + rangeSet(0x00042a, 0x00042a, 0x00044a, 0x00044a, 0x001c86, 0x001c86), + rangeSet(0x000462, 0x000463, 0x001c87, 0x001c87), + rangeSet(0x001c88, 0x001c88, 0x00a64a, 0x00a64b), + rangeSet(0x001e60, 0x001e61, 0x001e9b, 0x001e9b), + }, new int[]{ + 0x000041, 0x00004a, INTEGER_OFFSET, 32, + 0x00004b, 0x00004b, DIRECT_MAPPING, 0, + 0x00004c, 0x000052, INTEGER_OFFSET, 32, + 0x000053, 0x000053, DIRECT_MAPPING, 1, + 0x000054, 0x00005a, INTEGER_OFFSET, 32, + 0x000061, 0x00006a, INTEGER_OFFSET, -32, + 0x00006b, 0x00006b, DIRECT_MAPPING, 0, + 0x00006c, 0x000072, INTEGER_OFFSET, -32, + 0x000073, 0x000073, DIRECT_MAPPING, 1, + 0x000074, 0x00007a, INTEGER_OFFSET, -32, + 0x0000b5, 0x0000b5, DIRECT_MAPPING, 2, + 0x0000c0, 0x0000c4, INTEGER_OFFSET, 32, + 0x0000c5, 0x0000c5, DIRECT_MAPPING, 3, + 0x0000c6, 0x0000d6, INTEGER_OFFSET, 32, + 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 7615, + 0x0000e0, 0x0000e4, INTEGER_OFFSET, -32, + 0x0000e5, 0x0000e5, DIRECT_MAPPING, 3, + 0x0000e6, 0x0000f6, INTEGER_OFFSET, -32, + 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, + 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, + 0x000100, 0x00012f, ALTERNATING_AL, 0, + 0x000132, 0x000137, ALTERNATING_AL, 0, + 0x000139, 0x000148, ALTERNATING_UL, 0, + 0x00014a, 0x000177, ALTERNATING_AL, 0, + 0x000178, 0x000178, INTEGER_OFFSET, -121, + 0x000179, 0x00017e, ALTERNATING_UL, 0, + 0x00017f, 0x00017f, DIRECT_MAPPING, 1, + 0x000180, 0x000180, INTEGER_OFFSET, 195, + 0x000181, 0x000181, INTEGER_OFFSET, 210, + 0x000182, 0x000185, ALTERNATING_AL, 0, + 0x000186, 0x000186, INTEGER_OFFSET, 206, + 0x000187, 0x000188, ALTERNATING_UL, 0, + 0x000189, 0x00018a, INTEGER_OFFSET, 205, + 0x00018b, 0x00018c, ALTERNATING_UL, 0, + 0x00018e, 0x00018e, INTEGER_OFFSET, 79, + 0x00018f, 0x00018f, INTEGER_OFFSET, 202, + 0x000190, 0x000190, INTEGER_OFFSET, 203, + 0x000191, 0x000192, ALTERNATING_UL, 0, + 0x000193, 0x000193, INTEGER_OFFSET, 205, + 0x000194, 0x000194, INTEGER_OFFSET, 207, + 0x000195, 0x000195, INTEGER_OFFSET, 97, + 0x000196, 0x000196, INTEGER_OFFSET, 211, + 0x000197, 0x000197, INTEGER_OFFSET, 209, + 0x000198, 0x000199, ALTERNATING_AL, 0, + 0x00019a, 0x00019a, INTEGER_OFFSET, 163, + 0x00019c, 0x00019c, INTEGER_OFFSET, 211, + 0x00019d, 0x00019d, INTEGER_OFFSET, 213, + 0x00019e, 0x00019e, INTEGER_OFFSET, 130, + 0x00019f, 0x00019f, INTEGER_OFFSET, 214, + 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, + 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, + 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, + 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, + 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, + 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, + 0x0001af, 0x0001b0, ALTERNATING_UL, 0, + 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, + 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, + 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, + 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, + 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, + 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, + 0x0001c4, 0x0001c6, DIRECT_MAPPING, 4, + 0x0001c7, 0x0001c9, DIRECT_MAPPING, 5, + 0x0001ca, 0x0001cc, DIRECT_MAPPING, 6, + 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, + 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, + 0x0001de, 0x0001ef, ALTERNATING_AL, 0, + 0x0001f1, 0x0001f3, DIRECT_MAPPING, 7, + 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, + 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, + 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, + 0x0001f8, 0x00021f, ALTERNATING_AL, 0, + 0x000220, 0x000220, INTEGER_OFFSET, -130, + 0x000222, 0x000233, ALTERNATING_AL, 0, + 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, + 0x00023b, 0x00023c, ALTERNATING_UL, 0, + 0x00023d, 0x00023d, INTEGER_OFFSET, -163, + 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, + 0x00023f, 0x000240, INTEGER_OFFSET, 10815, + 0x000241, 0x000242, ALTERNATING_UL, 0, + 0x000243, 0x000243, INTEGER_OFFSET, -195, + 0x000244, 0x000244, INTEGER_OFFSET, 69, + 0x000245, 0x000245, INTEGER_OFFSET, 71, + 0x000246, 0x00024f, ALTERNATING_AL, 0, + 0x000250, 0x000250, INTEGER_OFFSET, 10783, + 0x000251, 0x000251, INTEGER_OFFSET, 10780, + 0x000252, 0x000252, INTEGER_OFFSET, 10782, + 0x000253, 0x000253, INTEGER_OFFSET, -210, + 0x000254, 0x000254, INTEGER_OFFSET, -206, + 0x000256, 0x000257, INTEGER_OFFSET, -205, + 0x000259, 0x000259, INTEGER_OFFSET, -202, + 0x00025b, 0x00025b, INTEGER_OFFSET, -203, + 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, + 0x000260, 0x000260, INTEGER_OFFSET, -205, + 0x000261, 0x000261, INTEGER_OFFSET, 42315, + 0x000263, 0x000263, INTEGER_OFFSET, -207, + 0x000265, 0x000265, INTEGER_OFFSET, 42280, + 0x000266, 0x000266, INTEGER_OFFSET, 42308, + 0x000268, 0x000268, INTEGER_OFFSET, -209, + 0x000269, 0x000269, INTEGER_OFFSET, -211, + 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, + 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, + 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, + 0x00026f, 0x00026f, INTEGER_OFFSET, -211, + 0x000271, 0x000271, INTEGER_OFFSET, 10749, + 0x000272, 0x000272, INTEGER_OFFSET, -213, + 0x000275, 0x000275, INTEGER_OFFSET, -214, + 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, + 0x000280, 0x000280, INTEGER_OFFSET, -218, + 0x000282, 0x000282, INTEGER_OFFSET, 42307, + 0x000283, 0x000283, INTEGER_OFFSET, -218, + 0x000287, 0x000287, INTEGER_OFFSET, 42282, + 0x000288, 0x000288, INTEGER_OFFSET, -218, + 0x000289, 0x000289, INTEGER_OFFSET, -69, + 0x00028a, 0x00028b, INTEGER_OFFSET, -217, + 0x00028c, 0x00028c, INTEGER_OFFSET, -71, + 0x000292, 0x000292, INTEGER_OFFSET, -219, + 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, + 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, + 0x000345, 0x000345, DIRECT_MAPPING, 8, + 0x000370, 0x000373, ALTERNATING_AL, 0, + 0x000376, 0x000377, ALTERNATING_AL, 0, + 0x00037b, 0x00037d, INTEGER_OFFSET, 130, + 0x00037f, 0x00037f, INTEGER_OFFSET, 116, + 0x000386, 0x000386, INTEGER_OFFSET, 38, + 0x000388, 0x00038a, INTEGER_OFFSET, 37, + 0x00038c, 0x00038c, INTEGER_OFFSET, 64, + 0x00038e, 0x00038f, INTEGER_OFFSET, 63, + 0x000391, 0x000391, INTEGER_OFFSET, 32, + 0x000392, 0x000392, DIRECT_MAPPING, 9, + 0x000393, 0x000394, INTEGER_OFFSET, 32, + 0x000395, 0x000395, DIRECT_MAPPING, 10, + 0x000396, 0x000397, INTEGER_OFFSET, 32, + 0x000398, 0x000398, DIRECT_MAPPING, 11, + 0x000399, 0x000399, DIRECT_MAPPING, 8, + 0x00039a, 0x00039a, DIRECT_MAPPING, 12, + 0x00039b, 0x00039b, INTEGER_OFFSET, 32, + 0x00039c, 0x00039c, DIRECT_MAPPING, 2, + 0x00039d, 0x00039f, INTEGER_OFFSET, 32, + 0x0003a0, 0x0003a0, DIRECT_MAPPING, 13, + 0x0003a1, 0x0003a1, DIRECT_MAPPING, 14, + 0x0003a3, 0x0003a3, DIRECT_MAPPING, 15, + 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, + 0x0003a6, 0x0003a6, DIRECT_MAPPING, 16, + 0x0003a7, 0x0003a8, INTEGER_OFFSET, 32, + 0x0003a9, 0x0003a9, DIRECT_MAPPING, 17, + 0x0003aa, 0x0003ab, INTEGER_OFFSET, 32, + 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, + 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, + 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, + 0x0003b2, 0x0003b2, DIRECT_MAPPING, 9, + 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, + 0x0003b5, 0x0003b5, DIRECT_MAPPING, 10, + 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, + 0x0003b8, 0x0003b8, DIRECT_MAPPING, 11, + 0x0003b9, 0x0003b9, DIRECT_MAPPING, 8, + 0x0003ba, 0x0003ba, DIRECT_MAPPING, 12, + 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, + 0x0003bc, 0x0003bc, DIRECT_MAPPING, 2, + 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, + 0x0003c0, 0x0003c0, DIRECT_MAPPING, 13, + 0x0003c1, 0x0003c1, DIRECT_MAPPING, 14, + 0x0003c2, 0x0003c3, DIRECT_MAPPING, 15, + 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, + 0x0003c6, 0x0003c6, DIRECT_MAPPING, 16, + 0x0003c7, 0x0003c8, INTEGER_OFFSET, -32, + 0x0003c9, 0x0003c9, DIRECT_MAPPING, 17, + 0x0003ca, 0x0003cb, INTEGER_OFFSET, -32, + 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, + 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, + 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, + 0x0003d0, 0x0003d0, DIRECT_MAPPING, 9, + 0x0003d1, 0x0003d1, DIRECT_MAPPING, 11, + 0x0003d5, 0x0003d5, DIRECT_MAPPING, 16, + 0x0003d6, 0x0003d6, DIRECT_MAPPING, 13, + 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, + 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, + 0x0003f0, 0x0003f0, DIRECT_MAPPING, 12, + 0x0003f1, 0x0003f1, DIRECT_MAPPING, 14, + 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, + 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, + 0x0003f4, 0x0003f4, DIRECT_MAPPING, 11, + 0x0003f5, 0x0003f5, DIRECT_MAPPING, 10, + 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, + 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, + 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, + 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, + 0x000400, 0x00040f, INTEGER_OFFSET, 80, + 0x000410, 0x000411, INTEGER_OFFSET, 32, + 0x000412, 0x000412, DIRECT_MAPPING, 18, + 0x000413, 0x000413, INTEGER_OFFSET, 32, + 0x000414, 0x000414, DIRECT_MAPPING, 19, + 0x000415, 0x00041d, INTEGER_OFFSET, 32, + 0x00041e, 0x00041e, DIRECT_MAPPING, 20, + 0x00041f, 0x000420, INTEGER_OFFSET, 32, + 0x000421, 0x000421, DIRECT_MAPPING, 21, + 0x000422, 0x000422, DIRECT_MAPPING, 22, + 0x000423, 0x000429, INTEGER_OFFSET, 32, + 0x00042a, 0x00042a, DIRECT_MAPPING, 23, + 0x00042b, 0x00042f, INTEGER_OFFSET, 32, + 0x000430, 0x000431, INTEGER_OFFSET, -32, + 0x000432, 0x000432, DIRECT_MAPPING, 18, + 0x000433, 0x000433, INTEGER_OFFSET, -32, + 0x000434, 0x000434, DIRECT_MAPPING, 19, + 0x000435, 0x00043d, INTEGER_OFFSET, -32, + 0x00043e, 0x00043e, DIRECT_MAPPING, 20, + 0x00043f, 0x000440, INTEGER_OFFSET, -32, + 0x000441, 0x000441, DIRECT_MAPPING, 21, + 0x000442, 0x000442, DIRECT_MAPPING, 22, + 0x000443, 0x000449, INTEGER_OFFSET, -32, + 0x00044a, 0x00044a, DIRECT_MAPPING, 23, + 0x00044b, 0x00044f, INTEGER_OFFSET, -32, + 0x000450, 0x00045f, INTEGER_OFFSET, -80, + 0x000460, 0x000461, ALTERNATING_AL, 0, + 0x000462, 0x000463, DIRECT_MAPPING, 24, + 0x000464, 0x000481, ALTERNATING_AL, 0, + 0x00048a, 0x0004bf, ALTERNATING_AL, 0, + 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, + 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, + 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, + 0x0004d0, 0x00052f, ALTERNATING_AL, 0, + 0x000531, 0x000556, INTEGER_OFFSET, 48, + 0x000561, 0x000586, INTEGER_OFFSET, -48, + 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, + 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, + 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, + 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, + 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, + 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, + 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, + 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, + 0x001c80, 0x001c80, DIRECT_MAPPING, 18, + 0x001c81, 0x001c81, DIRECT_MAPPING, 19, + 0x001c82, 0x001c82, DIRECT_MAPPING, 20, + 0x001c83, 0x001c83, DIRECT_MAPPING, 21, + 0x001c84, 0x001c85, DIRECT_MAPPING, 22, + 0x001c86, 0x001c86, DIRECT_MAPPING, 23, + 0x001c87, 0x001c87, DIRECT_MAPPING, 24, + 0x001c88, 0x001c88, DIRECT_MAPPING, 25, + 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, + 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, + 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, + 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, + 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, + 0x001e00, 0x001e5f, ALTERNATING_AL, 0, + 0x001e60, 0x001e61, DIRECT_MAPPING, 26, + 0x001e62, 0x001e95, ALTERNATING_AL, 0, + 0x001e9b, 0x001e9b, DIRECT_MAPPING, 26, + 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, + 0x001ea0, 0x001eff, ALTERNATING_AL, 0, + 0x001f00, 0x001f07, INTEGER_OFFSET, 8, + 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, + 0x001f10, 0x001f15, INTEGER_OFFSET, 8, + 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, + 0x001f20, 0x001f27, INTEGER_OFFSET, 8, + 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, + 0x001f30, 0x001f37, INTEGER_OFFSET, 8, + 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, + 0x001f40, 0x001f45, INTEGER_OFFSET, 8, + 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, + 0x001f51, 0x001f51, INTEGER_OFFSET, 8, + 0x001f53, 0x001f53, INTEGER_OFFSET, 8, + 0x001f55, 0x001f55, INTEGER_OFFSET, 8, + 0x001f57, 0x001f57, INTEGER_OFFSET, 8, + 0x001f59, 0x001f59, INTEGER_OFFSET, -8, + 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, + 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, + 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, + 0x001f60, 0x001f67, INTEGER_OFFSET, 8, + 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, + 0x001f70, 0x001f71, INTEGER_OFFSET, 74, + 0x001f72, 0x001f75, INTEGER_OFFSET, 86, + 0x001f76, 0x001f77, INTEGER_OFFSET, 100, + 0x001f78, 0x001f79, INTEGER_OFFSET, 128, + 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, + 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, + 0x001f80, 0x001f87, INTEGER_OFFSET, 8, + 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, + 0x001f90, 0x001f97, INTEGER_OFFSET, 8, + 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, + 0x001fa0, 0x001fa7, INTEGER_OFFSET, 8, + 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, + 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, + 0x001fb3, 0x001fb3, INTEGER_OFFSET, 9, + 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, + 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, + 0x001fbe, 0x001fbe, DIRECT_MAPPING, 8, + 0x001fc3, 0x001fc3, INTEGER_OFFSET, 9, + 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, + 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, + 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, + 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, + 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, + 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, + 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, + 0x001fea, 0x001feb, INTEGER_OFFSET, -112, + 0x001fec, 0x001fec, INTEGER_OFFSET, -7, + 0x001ff3, 0x001ff3, INTEGER_OFFSET, 9, + 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, + 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, + 0x002126, 0x002126, DIRECT_MAPPING, 17, + 0x00212a, 0x00212a, DIRECT_MAPPING, 0, + 0x00212b, 0x00212b, DIRECT_MAPPING, 3, + 0x002132, 0x002132, INTEGER_OFFSET, 28, + 0x00214e, 0x00214e, INTEGER_OFFSET, -28, + 0x002160, 0x00216f, INTEGER_OFFSET, 16, + 0x002170, 0x00217f, INTEGER_OFFSET, -16, + 0x002183, 0x002184, ALTERNATING_UL, 0, + 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, + 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, + 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, + 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, + 0x002c60, 0x002c61, ALTERNATING_AL, 0, + 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, + 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, + 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, + 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, + 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, + 0x002c67, 0x002c6c, ALTERNATING_UL, 0, + 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, + 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, + 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, + 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, + 0x002c72, 0x002c73, ALTERNATING_AL, 0, + 0x002c75, 0x002c76, ALTERNATING_UL, 0, + 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, + 0x002c80, 0x002ce3, ALTERNATING_AL, 0, + 0x002ceb, 0x002cee, ALTERNATING_UL, 0, + 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, + 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, + 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, + 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, + 0x00a640, 0x00a649, ALTERNATING_AL, 0, + 0x00a64a, 0x00a64b, DIRECT_MAPPING, 25, + 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, + 0x00a680, 0x00a69b, ALTERNATING_AL, 0, + 0x00a722, 0x00a72f, ALTERNATING_AL, 0, + 0x00a732, 0x00a76f, ALTERNATING_AL, 0, + 0x00a779, 0x00a77c, ALTERNATING_UL, 0, + 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, + 0x00a77e, 0x00a787, ALTERNATING_AL, 0, + 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, + 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, + 0x00a790, 0x00a793, ALTERNATING_AL, 0, + 0x00a794, 0x00a794, INTEGER_OFFSET, 48, + 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, + 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, + 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, + 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, + 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, + 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, + 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, + 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, + 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, + 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, + 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, + 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, + 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, + 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, + 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, + 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, + 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, + 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, + 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, + 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, + 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, + 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32, + 0x010400, 0x010427, INTEGER_OFFSET, 40, + 0x010428, 0x01044f, INTEGER_OFFSET, -40, + 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, + 0x0104d8, 0x0104fb, INTEGER_OFFSET, -40, + 0x010570, 0x01057a, INTEGER_OFFSET, 39, + 0x01057c, 0x01058a, INTEGER_OFFSET, 39, + 0x01058c, 0x010592, INTEGER_OFFSET, 39, + 0x010594, 0x010595, INTEGER_OFFSET, 39, + 0x010597, 0x0105a1, INTEGER_OFFSET, -39, + 0x0105a3, 0x0105b1, INTEGER_OFFSET, -39, + 0x0105b3, 0x0105b9, INTEGER_OFFSET, -39, + 0x0105bb, 0x0105bc, INTEGER_OFFSET, -39, + 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, + 0x010cc0, 0x010cf2, INTEGER_OFFSET, -64, + 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, + 0x0118c0, 0x0118df, INTEGER_OFFSET, -32, + 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, + 0x016e60, 0x016e7f, INTEGER_OFFSET, -32, + 0x01e900, 0x01e921, INTEGER_OFFSET, 34, + 0x01e922, 0x01e943, INTEGER_OFFSET, -34, + }); + private static final CaseFoldEquivalenceTable JS_NON_UNICODE = new CaseFoldEquivalenceTable(UNICODE_15_0_0_SIMPLE, new CodePointSet[]{ + rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1), + }, new int[]{ + 0x00004b, 0x00005a, INTEGER_OFFSET, 32, + 0x00006b, 0x00007a, INTEGER_OFFSET, -32, + 0x0000c5, 0x0000d6, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 0, + 0x0000e5, 0x0000f6, INTEGER_OFFSET, -32, + 0x00017f, 0x00017f, INTEGER_OFFSET, 0, + 0x000398, 0x000398, DIRECT_MAPPING, 0, + 0x0003a9, 0x0003ab, INTEGER_OFFSET, 32, + 0x0003b8, 0x0003b8, DIRECT_MAPPING, 0, + 0x0003c9, 0x0003cb, INTEGER_OFFSET, -32, + 0x0003d1, 0x0003d1, DIRECT_MAPPING, 0, + 0x0003f4, 0x0003f4, INTEGER_OFFSET, 0, + 0x001e9e, 0x001e9e, INTEGER_OFFSET, 0, + 0x001f80, 0x001f87, INTEGER_OFFSET, 0, + 0x001f88, 0x001f8f, INTEGER_OFFSET, 0, + 0x001f90, 0x001f97, INTEGER_OFFSET, 0, + 0x001f98, 0x001f9f, INTEGER_OFFSET, 0, + 0x001fa0, 0x001fa7, INTEGER_OFFSET, 0, + 0x001fa8, 0x001faf, INTEGER_OFFSET, 0, + 0x001fb3, 0x001fb3, INTEGER_OFFSET, 0, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, 0, + 0x001fc3, 0x001fc3, INTEGER_OFFSET, 0, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, 0, + 0x001ff3, 0x001ff3, INTEGER_OFFSET, 0, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, 0, + 0x002126, 0x002126, INTEGER_OFFSET, 0, + 0x00212a, 0x00212a, INTEGER_OFFSET, 0, + 0x00212b, 0x00212b, INTEGER_OFFSET, 0, + 0x010400, 0x010427, INTEGER_OFFSET, 0, + 0x010428, 0x01044f, INTEGER_OFFSET, 0, + 0x0104b0, 0x0104d3, INTEGER_OFFSET, 0, + 0x0104d8, 0x0104fb, INTEGER_OFFSET, 0, + 0x010570, 0x01057a, INTEGER_OFFSET, 0, + 0x01057c, 0x01058a, INTEGER_OFFSET, 0, + 0x01058c, 0x010592, INTEGER_OFFSET, 0, + 0x010594, 0x010595, INTEGER_OFFSET, 0, + 0x010597, 0x0105a1, INTEGER_OFFSET, 0, + 0x0105a3, 0x0105b1, INTEGER_OFFSET, 0, + 0x0105b3, 0x0105b9, INTEGER_OFFSET, 0, + 0x0105bb, 0x0105bc, INTEGER_OFFSET, 0, + 0x010c80, 0x010cb2, INTEGER_OFFSET, 0, + 0x010cc0, 0x010cf2, INTEGER_OFFSET, 0, + 0x0118a0, 0x0118bf, INTEGER_OFFSET, 0, + 0x0118c0, 0x0118df, INTEGER_OFFSET, 0, + 0x016e40, 0x016e5f, INTEGER_OFFSET, 0, + 0x016e60, 0x016e7f, INTEGER_OFFSET, 0, + 0x01e900, 0x01e921, INTEGER_OFFSET, 0, + 0x01e922, 0x01e943, INTEGER_OFFSET, 0, + }); + private static final CaseFoldEquivalenceTable PYTHON_UNICODE = new CaseFoldEquivalenceTable(UNICODE_15_0_0_SIMPLE, new CodePointSet[]{ + rangeSet(0x000049, 0x000049, 0x000069, 0x000069, 0x000130, 0x000131), + }, new int[]{ + 0x000049, 0x000049, DIRECT_MAPPING, 0, + 0x000069, 0x000069, DIRECT_MAPPING, 0, + 0x000130, 0x000131, DIRECT_MAPPING, 0, + 0x000390, 0x000390, INTEGER_OFFSET, 7235, + 0x0003b0, 0x0003b0, INTEGER_OFFSET, 7219, + 0x001fd3, 0x001fd3, INTEGER_OFFSET, -7235, + 0x001fe3, 0x001fe3, INTEGER_OFFSET, -7219, + 0x00fb05, 0x00fb06, ALTERNATING_UL, 0, + }); + private static final CaseFoldTable UNICODE_15_0_0_FULL = new CaseFoldTable(null, new int[]{ + 0x000041, 0x00005a, INTEGER_OFFSET, 32, + 0x0000b5, 0x0000b5, INTEGER_OFFSET, 775, + 0x0000c0, 0x0000d6, INTEGER_OFFSET, 32, + 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 1113962, + 0x000100, 0x00012e, ALTERNATING_AL, 0, + 0x000130, 0x000130, INTEGER_OFFSET, 1113808, + 0x000132, 0x000136, ALTERNATING_AL, 0, + 0x000139, 0x000147, ALTERNATING_UL, 0, + 0x000149, 0x000149, INTEGER_OFFSET, 1113857, + 0x00014a, 0x000176, ALTERNATING_AL, 0, + 0x000178, 0x000178, INTEGER_OFFSET, -121, + 0x000179, 0x00017d, ALTERNATING_UL, 0, + 0x00017f, 0x00017f, INTEGER_OFFSET, -268, + 0x000181, 0x000181, INTEGER_OFFSET, 210, + 0x000182, 0x000184, ALTERNATING_AL, 0, + 0x000186, 0x000186, INTEGER_OFFSET, 206, + 0x000187, 0x000187, ALTERNATING_UL, 0, + 0x000189, 0x00018a, INTEGER_OFFSET, 205, + 0x00018b, 0x00018b, ALTERNATING_UL, 0, + 0x00018e, 0x00018e, INTEGER_OFFSET, 79, + 0x00018f, 0x00018f, INTEGER_OFFSET, 202, + 0x000190, 0x000190, INTEGER_OFFSET, 203, + 0x000191, 0x000191, ALTERNATING_UL, 0, + 0x000193, 0x000193, INTEGER_OFFSET, 205, + 0x000194, 0x000194, INTEGER_OFFSET, 207, + 0x000196, 0x000196, INTEGER_OFFSET, 211, + 0x000197, 0x000197, INTEGER_OFFSET, 209, + 0x000198, 0x000198, ALTERNATING_AL, 0, + 0x00019c, 0x00019c, INTEGER_OFFSET, 211, + 0x00019d, 0x00019d, INTEGER_OFFSET, 213, + 0x00019f, 0x00019f, INTEGER_OFFSET, 214, + 0x0001a0, 0x0001a4, ALTERNATING_AL, 0, + 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, + 0x0001a7, 0x0001a7, ALTERNATING_UL, 0, + 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, + 0x0001ac, 0x0001ac, ALTERNATING_AL, 0, + 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, + 0x0001af, 0x0001af, ALTERNATING_UL, 0, + 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, + 0x0001b3, 0x0001b5, ALTERNATING_UL, 0, + 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, + 0x0001b8, 0x0001b8, ALTERNATING_AL, 0, + 0x0001bc, 0x0001bc, ALTERNATING_AL, 0, + 0x0001c4, 0x0001c5, DIRECT_SINGLE, 454, + 0x0001c7, 0x0001c8, DIRECT_SINGLE, 457, + 0x0001ca, 0x0001cb, DIRECT_SINGLE, 460, + 0x0001cd, 0x0001db, ALTERNATING_UL, 0, + 0x0001de, 0x0001ee, ALTERNATING_AL, 0, + 0x0001f0, 0x0001f0, INTEGER_OFFSET, 1113691, + 0x0001f1, 0x0001f2, DIRECT_SINGLE, 499, + 0x0001f4, 0x0001f4, ALTERNATING_AL, 0, + 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, + 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, + 0x0001f8, 0x00021e, ALTERNATING_AL, 0, + 0x000220, 0x000220, INTEGER_OFFSET, -130, + 0x000222, 0x000232, ALTERNATING_AL, 0, + 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, + 0x00023b, 0x00023b, ALTERNATING_UL, 0, + 0x00023d, 0x00023d, INTEGER_OFFSET, -163, + 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, + 0x000241, 0x000241, ALTERNATING_UL, 0, + 0x000243, 0x000243, INTEGER_OFFSET, -195, + 0x000244, 0x000244, INTEGER_OFFSET, 69, + 0x000245, 0x000245, INTEGER_OFFSET, 71, + 0x000246, 0x00024e, ALTERNATING_AL, 0, + 0x000345, 0x000345, INTEGER_OFFSET, 116, + 0x000370, 0x000372, ALTERNATING_AL, 0, + 0x000376, 0x000376, ALTERNATING_AL, 0, + 0x00037f, 0x00037f, INTEGER_OFFSET, 116, + 0x000386, 0x000386, INTEGER_OFFSET, 38, + 0x000388, 0x00038a, INTEGER_OFFSET, 37, + 0x00038c, 0x00038c, INTEGER_OFFSET, 64, + 0x00038e, 0x00038f, INTEGER_OFFSET, 63, + 0x000390, 0x000390, INTEGER_OFFSET, 1113276, + 0x000391, 0x0003a1, INTEGER_OFFSET, 32, + 0x0003a3, 0x0003ab, INTEGER_OFFSET, 32, + 0x0003b0, 0x0003b0, INTEGER_OFFSET, 1113245, + 0x0003c2, 0x0003c2, ALTERNATING_AL, 0, + 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, + 0x0003d0, 0x0003d0, INTEGER_OFFSET, -30, + 0x0003d1, 0x0003d1, INTEGER_OFFSET, -25, + 0x0003d5, 0x0003d5, INTEGER_OFFSET, -15, + 0x0003d6, 0x0003d6, INTEGER_OFFSET, -22, + 0x0003d8, 0x0003ee, ALTERNATING_AL, 0, + 0x0003f0, 0x0003f0, INTEGER_OFFSET, -54, + 0x0003f1, 0x0003f1, INTEGER_OFFSET, -48, + 0x0003f4, 0x0003f4, INTEGER_OFFSET, -60, + 0x0003f5, 0x0003f5, INTEGER_OFFSET, -64, + 0x0003f7, 0x0003f7, ALTERNATING_UL, 0, + 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, + 0x0003fa, 0x0003fa, ALTERNATING_AL, 0, + 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, + 0x000400, 0x00040f, INTEGER_OFFSET, 80, + 0x000410, 0x00042f, INTEGER_OFFSET, 32, + 0x000460, 0x000480, ALTERNATING_AL, 0, + 0x00048a, 0x0004be, ALTERNATING_AL, 0, + 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, + 0x0004c1, 0x0004cd, ALTERNATING_UL, 0, + 0x0004d0, 0x00052e, ALTERNATING_AL, 0, + 0x000531, 0x000556, INTEGER_OFFSET, 48, + 0x000587, 0x000587, INTEGER_OFFSET, 1112775, + 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, + 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, + 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, + 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, + 0x001c80, 0x001c80, INTEGER_OFFSET, -6222, + 0x001c81, 0x001c81, INTEGER_OFFSET, -6221, + 0x001c82, 0x001c82, INTEGER_OFFSET, -6212, + 0x001c83, 0x001c84, INTEGER_OFFSET, -6210, + 0x001c85, 0x001c85, INTEGER_OFFSET, -6211, + 0x001c86, 0x001c86, INTEGER_OFFSET, -6204, + 0x001c87, 0x001c87, INTEGER_OFFSET, -6180, + 0x001c88, 0x001c88, INTEGER_OFFSET, 35267, + 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, + 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, + 0x001e00, 0x001e94, ALTERNATING_AL, 0, + 0x001e96, 0x001e9a, INTEGER_OFFSET, 1106361, + 0x001e9b, 0x001e9b, INTEGER_OFFSET, -58, + 0x001e9e, 0x001e9e, INTEGER_OFFSET, 1106347, + 0x001ea0, 0x001efe, ALTERNATING_AL, 0, + 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, + 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, + 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, + 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, + 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, + 0x001f50, 0x001f50, INTEGER_OFFSET, 1106180, + 0x001f52, 0x001f52, INTEGER_OFFSET, 1106179, + 0x001f54, 0x001f54, INTEGER_OFFSET, 1106178, + 0x001f56, 0x001f56, INTEGER_OFFSET, 1106177, + 0x001f59, 0x001f59, INTEGER_OFFSET, -8, + 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, + 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, + 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, + 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, + 0x001f80, 0x001f87, INTEGER_OFFSET, 1106136, + 0x001f88, 0x001f97, INTEGER_OFFSET, 1106128, + 0x001f98, 0x001fa7, INTEGER_OFFSET, 1106120, + 0x001fa8, 0x001faf, INTEGER_OFFSET, 1106112, + 0x001fb2, 0x001fb4, INTEGER_OFFSET, 1106110, + 0x001fb6, 0x001fb7, INTEGER_OFFSET, 1106109, + 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, + 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, 1106101, + 0x001fbe, 0x001fbe, INTEGER_OFFSET, -7173, + 0x001fc2, 0x001fc4, INTEGER_OFFSET, 1106099, + 0x001fc6, 0x001fc7, INTEGER_OFFSET, 1106098, + 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, 1106090, + 0x001fd2, 0x001fd2, INTEGER_OFFSET, 1106088, + 0x001fd3, 0x001fd3, INTEGER_OFFSET, 1106041, + 0x001fd6, 0x001fd7, INTEGER_OFFSET, 1106085, + 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, + 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, + 0x001fe2, 0x001fe2, INTEGER_OFFSET, 1106075, + 0x001fe3, 0x001fe3, INTEGER_OFFSET, 1106026, + 0x001fe4, 0x001fe4, INTEGER_OFFSET, 1106074, + 0x001fe6, 0x001fe7, INTEGER_OFFSET, 1106073, + 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, + 0x001fea, 0x001feb, INTEGER_OFFSET, -112, + 0x001fec, 0x001fec, INTEGER_OFFSET, -7, + 0x001ff2, 0x001ff4, INTEGER_OFFSET, 1106063, + 0x001ff6, 0x001ff7, INTEGER_OFFSET, 1106062, + 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, + 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, 1106054, + 0x002126, 0x002126, INTEGER_OFFSET, -7517, + 0x00212a, 0x00212a, INTEGER_OFFSET, -8383, + 0x00212b, 0x00212b, INTEGER_OFFSET, -8262, + 0x002132, 0x002132, INTEGER_OFFSET, 28, + 0x002160, 0x00216f, INTEGER_OFFSET, 16, + 0x002183, 0x002183, ALTERNATING_UL, 0, + 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, + 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, + 0x002c60, 0x002c60, ALTERNATING_AL, 0, + 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, + 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, + 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, + 0x002c67, 0x002c6b, ALTERNATING_UL, 0, + 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, + 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, + 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, + 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, + 0x002c72, 0x002c72, ALTERNATING_AL, 0, + 0x002c75, 0x002c75, ALTERNATING_UL, 0, + 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, + 0x002c80, 0x002ce2, ALTERNATING_AL, 0, + 0x002ceb, 0x002ced, ALTERNATING_UL, 0, + 0x002cf2, 0x002cf2, ALTERNATING_AL, 0, + 0x00a640, 0x00a66c, ALTERNATING_AL, 0, + 0x00a680, 0x00a69a, ALTERNATING_AL, 0, + 0x00a722, 0x00a72e, ALTERNATING_AL, 0, + 0x00a732, 0x00a76e, ALTERNATING_AL, 0, + 0x00a779, 0x00a77b, ALTERNATING_UL, 0, + 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, + 0x00a77e, 0x00a786, ALTERNATING_AL, 0, + 0x00a78b, 0x00a78b, ALTERNATING_UL, 0, + 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, + 0x00a790, 0x00a792, ALTERNATING_AL, 0, + 0x00a796, 0x00a7a8, ALTERNATING_AL, 0, + 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, + 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, + 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, + 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, + 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, + 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, + 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, + 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, + 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, + 0x00a7b4, 0x00a7c2, ALTERNATING_AL, 0, + 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, + 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, + 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, + 0x00a7c7, 0x00a7c9, ALTERNATING_UL, 0, + 0x00a7d0, 0x00a7d0, ALTERNATING_AL, 0, + 0x00a7d6, 0x00a7d8, ALTERNATING_AL, 0, + 0x00a7f5, 0x00a7f5, ALTERNATING_UL, 0, + 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, + 0x00fb00, 0x00fb05, INTEGER_OFFSET, 1049990, + 0x00fb06, 0x00fb06, INTEGER_OFFSET, 1049989, + 0x00fb13, 0x00fb17, INTEGER_OFFSET, 1049977, + 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, + 0x010400, 0x010427, INTEGER_OFFSET, 40, + 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, + 0x010570, 0x01057a, INTEGER_OFFSET, 39, + 0x01057c, 0x01058a, INTEGER_OFFSET, 39, + 0x01058c, 0x010592, INTEGER_OFFSET, 39, + 0x010594, 0x010595, INTEGER_OFFSET, 39, + 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, + 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, + 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, + 0x01e900, 0x01e921, INTEGER_OFFSET, 34, + }); + private static final CaseFoldTable ORACLE_DB = new CaseFoldTable(UNICODE_15_0_0_FULL, new int[]{ + 0x002c2f, 0x002c2f, INTEGER_OFFSET, 0, + 0x00a7bf, 0x00a7c1, INTEGER_OFFSET, 0, + 0x00a7c7, 0x00a7c9, INTEGER_OFFSET, 0, + 0x00a7d0, 0x00a7d0, INTEGER_OFFSET, 0, + 0x00a7d6, 0x00a7d8, INTEGER_OFFSET, 0, + 0x00a7f5, 0x00a7f5, INTEGER_OFFSET, 0, + 0x010570, 0x01057a, INTEGER_OFFSET, 0, + 0x01057c, 0x01058a, INTEGER_OFFSET, 0, + 0x01058c, 0x010592, INTEGER_OFFSET, 0, + 0x010594, 0x010595, INTEGER_OFFSET, 0, + }); + private static final CaseFoldTable ORACLE_DB_AI = new CaseFoldTable(null, new int[]{ + 0x000041, 0x00005a, INTEGER_OFFSET, 32, + 0x000084, 0x000084, ALTERNATING_AL, 0, + 0x0000a9, 0x0000a9, INTEGER_OFFSET, -70, + 0x0000aa, 0x0000aa, INTEGER_OFFSET, -73, + 0x0000ae, 0x0000ae, INTEGER_OFFSET, -60, + 0x0000b2, 0x0000b3, INTEGER_OFFSET, -128, + 0x0000b5, 0x0000b5, INTEGER_OFFSET, 775, + 0x0000b9, 0x0000b9, INTEGER_OFFSET, -136, + 0x0000ba, 0x0000ba, INTEGER_OFFSET, -75, + 0x0000c0, 0x0000c5, DIRECT_SINGLE, 97, + 0x0000c6, 0x0000c6, INTEGER_OFFSET, 32, + 0x0000c7, 0x0000c7, INTEGER_OFFSET, -100, + 0x0000c8, 0x0000cb, DIRECT_SINGLE, 101, + 0x0000cc, 0x0000cf, DIRECT_SINGLE, 105, + 0x0000d0, 0x0000d0, INTEGER_OFFSET, 32, + 0x0000d1, 0x0000d2, INTEGER_OFFSET, -99, + 0x0000d3, 0x0000d8, DIRECT_SINGLE, 111, + 0x0000d9, 0x0000dc, DIRECT_SINGLE, 117, + 0x0000dd, 0x0000dd, INTEGER_OFFSET, -100, + 0x0000de, 0x0000de, INTEGER_OFFSET, 32, + 0x0000df, 0x0000df, INTEGER_OFFSET, 1113962, + 0x0000e0, 0x0000e5, DIRECT_SINGLE, 97, + 0x0000e7, 0x0000e7, INTEGER_OFFSET, -132, + 0x0000e8, 0x0000eb, DIRECT_SINGLE, 101, + 0x0000ec, 0x0000ef, DIRECT_SINGLE, 105, + 0x0000f1, 0x0000f2, INTEGER_OFFSET, -131, + 0x0000f3, 0x0000f8, DIRECT_SINGLE, 111, + 0x0000f9, 0x0000fc, DIRECT_SINGLE, 117, + 0x0000fd, 0x0000ff, DIRECT_SINGLE, 121, + 0x000100, 0x000105, DIRECT_SINGLE, 97, + 0x000106, 0x00010d, DIRECT_SINGLE, 99, + 0x00010e, 0x000111, DIRECT_SINGLE, 100, + 0x000112, 0x00011b, DIRECT_SINGLE, 101, + 0x00011c, 0x000123, DIRECT_SINGLE, 103, + 0x000124, 0x000127, DIRECT_SINGLE, 104, + 0x000128, 0x000131, DIRECT_SINGLE, 105, + 0x000132, 0x000133, DIRECT_SINGLE, 1114257, + 0x000134, 0x000135, DIRECT_SINGLE, 106, + 0x000136, 0x000138, DIRECT_SINGLE, 107, + 0x000139, 0x000140, DIRECT_SINGLE, 108, + 0x000141, 0x000141, ALTERNATING_UL, 0, + 0x000142, 0x000142, INTEGER_OFFSET, -214, + 0x000143, 0x000148, DIRECT_SINGLE, 110, + 0x00014a, 0x00014a, ALTERNATING_AL, 0, + 0x00014c, 0x000151, DIRECT_SINGLE, 111, + 0x000152, 0x000153, DIRECT_SINGLE, 1114258, + 0x000154, 0x000159, DIRECT_SINGLE, 114, + 0x00015a, 0x000161, DIRECT_SINGLE, 115, + 0x000162, 0x000165, DIRECT_SINGLE, 116, + 0x000166, 0x000166, ALTERNATING_AL, 0, + 0x000167, 0x000168, INTEGER_OFFSET, -243, + 0x000169, 0x000173, DIRECT_SINGLE, 117, + 0x000174, 0x000175, DIRECT_SINGLE, 119, + 0x000176, 0x000178, DIRECT_SINGLE, 121, + 0x000179, 0x00017e, DIRECT_SINGLE, 122, + 0x00017f, 0x00017f, INTEGER_OFFSET, -268, + 0x000181, 0x000181, INTEGER_OFFSET, 210, + 0x000182, 0x000184, ALTERNATING_AL, 0, + 0x000186, 0x000186, INTEGER_OFFSET, 206, + 0x000187, 0x000187, ALTERNATING_UL, 0, + 0x000189, 0x00018a, INTEGER_OFFSET, 205, + 0x00018b, 0x00018b, ALTERNATING_UL, 0, + 0x00018e, 0x00018e, INTEGER_OFFSET, 79, + 0x00018f, 0x00018f, INTEGER_OFFSET, 202, + 0x000190, 0x000190, INTEGER_OFFSET, 203, + 0x000191, 0x000191, ALTERNATING_UL, 0, + 0x000193, 0x000193, INTEGER_OFFSET, 205, + 0x000194, 0x000194, INTEGER_OFFSET, 207, + 0x000196, 0x000196, INTEGER_OFFSET, 211, + 0x000197, 0x000197, INTEGER_OFFSET, 209, + 0x000198, 0x000198, ALTERNATING_AL, 0, + 0x00019c, 0x00019c, INTEGER_OFFSET, 211, + 0x00019d, 0x00019d, INTEGER_OFFSET, 213, + 0x00019f, 0x00019f, INTEGER_OFFSET, 214, + 0x0001a0, 0x0001a1, DIRECT_SINGLE, 111, + 0x0001a2, 0x0001a4, ALTERNATING_AL, 0, + 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, + 0x0001a7, 0x0001a7, ALTERNATING_UL, 0, + 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, + 0x0001ac, 0x0001ac, ALTERNATING_AL, 0, + 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, + 0x0001af, 0x0001b0, DIRECT_SINGLE, 117, + 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, + 0x0001b3, 0x0001b5, ALTERNATING_UL, 0, + 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, + 0x0001b8, 0x0001b8, ALTERNATING_AL, 0, + 0x0001bc, 0x0001bc, ALTERNATING_AL, 0, + 0x0001c4, 0x0001c6, DIRECT_SINGLE, 499, + 0x0001c7, 0x0001c9, DIRECT_SINGLE, 1114259, + 0x0001ca, 0x0001cc, DIRECT_SINGLE, 1114260, + 0x0001cd, 0x0001ce, DIRECT_SINGLE, 97, + 0x0001cf, 0x0001d0, DIRECT_SINGLE, 105, + 0x0001d1, 0x0001d2, DIRECT_SINGLE, 111, + 0x0001d3, 0x0001dc, DIRECT_SINGLE, 117, + 0x0001de, 0x0001e1, DIRECT_SINGLE, 97, + 0x0001e2, 0x0001e3, DIRECT_SINGLE, 230, + 0x0001e4, 0x0001e4, ALTERNATING_AL, 0, + 0x0001e6, 0x0001e7, DIRECT_SINGLE, 103, + 0x0001e8, 0x0001e9, DIRECT_SINGLE, 107, + 0x0001ea, 0x0001ed, DIRECT_SINGLE, 111, + 0x0001ee, 0x0001ee, INTEGER_OFFSET, -55, + 0x0001ef, 0x0001ef, INTEGER_OFFSET, 163, + 0x0001f0, 0x0001f0, INTEGER_OFFSET, -390, + 0x0001f1, 0x0001f3, DIRECT_SINGLE, 1114261, + 0x0001f4, 0x0001f5, DIRECT_SINGLE, 103, + 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, + 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, + 0x0001f8, 0x0001f9, DIRECT_SINGLE, 110, + 0x0001fa, 0x0001fb, DIRECT_SINGLE, 97, + 0x0001fc, 0x0001fd, DIRECT_SINGLE, 230, + 0x0001fe, 0x0001ff, DIRECT_SINGLE, 111, + 0x000200, 0x000203, DIRECT_SINGLE, 97, + 0x000204, 0x000207, DIRECT_SINGLE, 101, + 0x000208, 0x00020b, DIRECT_SINGLE, 105, + 0x00020c, 0x00020f, DIRECT_SINGLE, 111, + 0x000210, 0x000213, DIRECT_SINGLE, 114, + 0x000214, 0x000217, DIRECT_SINGLE, 117, + 0x000218, 0x000219, DIRECT_SINGLE, 115, + 0x00021a, 0x00021b, DIRECT_SINGLE, 116, + 0x00021c, 0x00021c, ALTERNATING_AL, 0, + 0x00021e, 0x00021f, DIRECT_SINGLE, 104, + 0x000222, 0x000224, ALTERNATING_AL, 0, + 0x000226, 0x000227, DIRECT_SINGLE, 97, + 0x000228, 0x000229, DIRECT_SINGLE, 101, + 0x00022a, 0x000231, DIRECT_SINGLE, 111, + 0x000232, 0x000233, DIRECT_SINGLE, 121, + 0x0002a3, 0x0002a8, INTEGER_OFFSET, 1113586, + 0x0002b0, 0x0002b0, INTEGER_OFFSET, -584, + 0x0002b1, 0x0002b1, INTEGER_OFFSET, -75, + 0x0002b2, 0x0002b2, INTEGER_OFFSET, -584, + 0x0002b3, 0x0002b3, INTEGER_OFFSET, -577, + 0x0002b4, 0x0002b4, INTEGER_OFFSET, -59, + 0x0002b5, 0x0002b5, INTEGER_OFFSET, -58, + 0x0002b6, 0x0002b6, INTEGER_OFFSET, -53, + 0x0002b7, 0x0002b7, INTEGER_OFFSET, -576, + 0x0002b8, 0x0002b8, INTEGER_OFFSET, -575, + 0x0002e0, 0x0002e0, INTEGER_OFFSET, -125, + 0x0002e1, 0x0002e1, INTEGER_OFFSET, -629, + 0x0002e2, 0x0002e2, INTEGER_OFFSET, -623, + 0x0002e4, 0x0002e4, INTEGER_OFFSET, -79, + 0x000344, 0x000344, INTEGER_OFFSET, -60, + 0x000385, 0x000385, INTEGER_OFFSET, -733, + 0x000386, 0x000386, INTEGER_OFFSET, 43, + 0x000388, 0x000388, INTEGER_OFFSET, 45, + 0x000389, 0x000389, INTEGER_OFFSET, 46, + 0x00038a, 0x00038a, INTEGER_OFFSET, 47, + 0x00038c, 0x00038c, INTEGER_OFFSET, 51, + 0x00038e, 0x00038e, INTEGER_OFFSET, 55, + 0x00038f, 0x00038f, INTEGER_OFFSET, 58, + 0x000390, 0x000390, INTEGER_OFFSET, 41, + 0x000391, 0x0003a1, INTEGER_OFFSET, 32, + 0x0003a3, 0x0003a9, INTEGER_OFFSET, 32, + 0x0003aa, 0x0003aa, INTEGER_OFFSET, 15, + 0x0003ab, 0x0003ab, INTEGER_OFFSET, 26, + 0x0003ac, 0x0003ac, INTEGER_OFFSET, 5, + 0x0003ad, 0x0003ad, INTEGER_OFFSET, 8, + 0x0003ae, 0x0003ae, INTEGER_OFFSET, 9, + 0x0003af, 0x0003af, INTEGER_OFFSET, 10, + 0x0003b0, 0x0003b0, INTEGER_OFFSET, 21, + 0x0003c2, 0x0003c2, ALTERNATING_AL, 0, + 0x0003ca, 0x0003ca, INTEGER_OFFSET, -17, + 0x0003cb, 0x0003cb, INTEGER_OFFSET, -6, + 0x0003cc, 0x0003cc, INTEGER_OFFSET, -13, + 0x0003cd, 0x0003cd, INTEGER_OFFSET, -8, + 0x0003ce, 0x0003ce, INTEGER_OFFSET, -5, + 0x0003d0, 0x0003d0, INTEGER_OFFSET, -30, + 0x0003d1, 0x0003d1, INTEGER_OFFSET, -25, + 0x0003d2, 0x0003d4, DIRECT_SINGLE, 965, + 0x0003d5, 0x0003d5, INTEGER_OFFSET, -15, + 0x0003d6, 0x0003d6, INTEGER_OFFSET, -22, + 0x0003da, 0x0003ee, ALTERNATING_AL, 0, + 0x0003f0, 0x0003f0, INTEGER_OFFSET, -54, + 0x0003f1, 0x0003f1, INTEGER_OFFSET, -48, + 0x0003f2, 0x0003f2, INTEGER_OFFSET, -47, + 0x0003f4, 0x0003f4, INTEGER_OFFSET, -60, + 0x000400, 0x000401, DIRECT_SINGLE, 1077, + 0x000402, 0x000402, INTEGER_OFFSET, 80, + 0x000403, 0x000403, INTEGER_OFFSET, 48, + 0x000404, 0x000406, INTEGER_OFFSET, 80, + 0x000407, 0x000407, INTEGER_OFFSET, 79, + 0x000408, 0x00040b, INTEGER_OFFSET, 80, + 0x00040c, 0x00040c, INTEGER_OFFSET, 46, + 0x00040d, 0x00040d, INTEGER_OFFSET, 43, + 0x00040e, 0x00040e, INTEGER_OFFSET, 53, + 0x00040f, 0x00040f, INTEGER_OFFSET, 80, + 0x000410, 0x000418, INTEGER_OFFSET, 32, + 0x000419, 0x000419, INTEGER_OFFSET, 31, + 0x00041a, 0x00042f, INTEGER_OFFSET, 32, + 0x000439, 0x000439, INTEGER_OFFSET, -1, + 0x000450, 0x000451, DIRECT_SINGLE, 1077, + 0x000453, 0x000453, INTEGER_OFFSET, -32, + 0x000457, 0x000457, INTEGER_OFFSET, -1, + 0x00045c, 0x00045c, INTEGER_OFFSET, -34, + 0x00045d, 0x00045d, INTEGER_OFFSET, -37, + 0x00045e, 0x00045e, INTEGER_OFFSET, -27, + 0x000460, 0x000474, ALTERNATING_AL, 0, + 0x000476, 0x000477, DIRECT_SINGLE, 1141, + 0x000478, 0x000480, ALTERNATING_AL, 0, + 0x00048c, 0x00048e, ALTERNATING_AL, 0, + 0x000490, 0x000491, DIRECT_SINGLE, 1075, + 0x000492, 0x0004be, ALTERNATING_AL, 0, + 0x0004c1, 0x0004c2, DIRECT_SINGLE, 1078, + 0x0004c3, 0x0004c3, ALTERNATING_UL, 0, + 0x0004c7, 0x0004c7, ALTERNATING_UL, 0, + 0x0004cb, 0x0004cb, ALTERNATING_UL, 0, + 0x0004d0, 0x0004d3, DIRECT_SINGLE, 1072, + 0x0004d4, 0x0004d4, ALTERNATING_AL, 0, + 0x0004d6, 0x0004d7, DIRECT_SINGLE, 1077, + 0x0004d8, 0x0004d8, ALTERNATING_AL, 0, + 0x0004da, 0x0004db, DIRECT_SINGLE, 1241, + 0x0004dc, 0x0004dd, DIRECT_SINGLE, 1078, + 0x0004de, 0x0004df, DIRECT_SINGLE, 1079, + 0x0004e0, 0x0004e0, ALTERNATING_AL, 0, + 0x0004e2, 0x0004e5, DIRECT_SINGLE, 1080, + 0x0004e6, 0x0004e7, DIRECT_SINGLE, 1086, + 0x0004e8, 0x0004e8, ALTERNATING_AL, 0, + 0x0004ea, 0x0004eb, DIRECT_SINGLE, 1257, + 0x0004ec, 0x0004ed, DIRECT_SINGLE, 1101, + 0x0004ee, 0x0004f3, DIRECT_SINGLE, 1091, + 0x0004f4, 0x0004f5, DIRECT_SINGLE, 1095, + 0x0004f8, 0x0004f9, DIRECT_SINGLE, 1099, + 0x000531, 0x000556, INTEGER_OFFSET, 48, + 0x0005da, 0x0005da, ALTERNATING_AL, 0, + 0x0005dd, 0x0005df, ALTERNATING_UL, 0, + 0x0005e3, 0x0005e5, ALTERNATING_UL, 0, + 0x000622, 0x000623, DIRECT_SINGLE, 1575, + 0x000624, 0x000624, INTEGER_OFFSET, 36, + 0x000625, 0x000625, INTEGER_OFFSET, 2, + 0x000626, 0x000626, INTEGER_OFFSET, 36, + 0x000660, 0x000669, INTEGER_OFFSET, -1584, + 0x0006c0, 0x0006c0, INTEGER_OFFSET, 21, + 0x0006c2, 0x0006c2, INTEGER_OFFSET, -1, + 0x0006d3, 0x0006d3, INTEGER_OFFSET, -1, + 0x0006f0, 0x0006f9, INTEGER_OFFSET, -1728, + 0x000929, 0x000929, INTEGER_OFFSET, -1, + 0x000931, 0x000931, INTEGER_OFFSET, -1, + 0x000934, 0x000934, INTEGER_OFFSET, -1, + 0x0009cb, 0x0009cc, DIRECT_SINGLE, 2503, + 0x000b48, 0x000b4c, DIRECT_SINGLE, 2887, + 0x000b94, 0x000b94, INTEGER_OFFSET, -2, + 0x000bca, 0x000bcb, INTEGER_OFFSET, -4, + 0x000bcc, 0x000bcc, INTEGER_OFFSET, -6, + 0x000c48, 0x000c48, INTEGER_OFFSET, -2, + 0x000cc0, 0x000cc0, INTEGER_OFFSET, -1, + 0x000cc7, 0x000ccb, DIRECT_SINGLE, 3270, + 0x000d4a, 0x000d4b, INTEGER_OFFSET, -4, + 0x000d4c, 0x000d4c, INTEGER_OFFSET, -6, + 0x000dda, 0x000dde, DIRECT_SINGLE, 3545, + 0x000f73, 0x000f81, DIRECT_SINGLE, 3953, + 0x001026, 0x001026, INTEGER_OFFSET, -1, + 0x0010a0, 0x0010c5, INTEGER_OFFSET, 48, + 0x001e00, 0x001e01, DIRECT_SINGLE, 97, + 0x001e02, 0x001e07, DIRECT_SINGLE, 98, + 0x001e08, 0x001e09, DIRECT_SINGLE, 99, + 0x001e0a, 0x001e13, DIRECT_SINGLE, 100, + 0x001e14, 0x001e1d, DIRECT_SINGLE, 101, + 0x001e1e, 0x001e1f, DIRECT_SINGLE, 102, + 0x001e20, 0x001e21, DIRECT_SINGLE, 103, + 0x001e22, 0x001e2b, DIRECT_SINGLE, 104, + 0x001e2c, 0x001e2f, DIRECT_SINGLE, 105, + 0x001e30, 0x001e35, DIRECT_SINGLE, 107, + 0x001e36, 0x001e3d, DIRECT_SINGLE, 108, + 0x001e3e, 0x001e43, DIRECT_SINGLE, 109, + 0x001e44, 0x001e4b, DIRECT_SINGLE, 110, + 0x001e4c, 0x001e53, DIRECT_SINGLE, 111, + 0x001e54, 0x001e57, DIRECT_SINGLE, 112, + 0x001e58, 0x001e5f, DIRECT_SINGLE, 114, + 0x001e60, 0x001e69, DIRECT_SINGLE, 115, + 0x001e6a, 0x001e71, DIRECT_SINGLE, 116, + 0x001e72, 0x001e7b, DIRECT_SINGLE, 117, + 0x001e7c, 0x001e7f, DIRECT_SINGLE, 118, + 0x001e80, 0x001e89, DIRECT_SINGLE, 119, + 0x001e8a, 0x001e8d, DIRECT_SINGLE, 120, + 0x001e8e, 0x001e8f, DIRECT_SINGLE, 121, + 0x001e90, 0x001e95, DIRECT_SINGLE, 122, + 0x001e96, 0x001e96, INTEGER_OFFSET, -7726, + 0x001e97, 0x001e97, INTEGER_OFFSET, -7715, + 0x001e98, 0x001e98, INTEGER_OFFSET, -7713, + 0x001e99, 0x001e99, INTEGER_OFFSET, -7712, + 0x001e9a, 0x001e9a, INTEGER_OFFSET, 1106361, + 0x001e9b, 0x001e9b, INTEGER_OFFSET, -7720, + 0x001ea0, 0x001eb7, DIRECT_SINGLE, 97, + 0x001eb8, 0x001ec7, DIRECT_SINGLE, 101, + 0x001ec8, 0x001ecb, DIRECT_SINGLE, 105, + 0x001ecc, 0x001ee3, DIRECT_SINGLE, 111, + 0x001ee4, 0x001ef1, DIRECT_SINGLE, 117, + 0x001ef2, 0x001ef9, DIRECT_SINGLE, 121, + 0x001f00, 0x001f0f, DIRECT_SINGLE, 945, + 0x001f10, 0x001f1d, DIRECT_SINGLE, 949, + 0x001f20, 0x001f2f, DIRECT_SINGLE, 951, + 0x001f30, 0x001f3f, DIRECT_SINGLE, 953, + 0x001f40, 0x001f4d, DIRECT_SINGLE, 959, + 0x001f50, 0x001f5f, DIRECT_SINGLE, 965, + 0x001f60, 0x001f6f, DIRECT_SINGLE, 969, + 0x001f70, 0x001f70, INTEGER_OFFSET, -7103, + 0x001f72, 0x001f72, INTEGER_OFFSET, -7101, + 0x001f74, 0x001f74, INTEGER_OFFSET, -7101, + 0x001f76, 0x001f76, INTEGER_OFFSET, -7101, + 0x001f78, 0x001f78, INTEGER_OFFSET, -7097, + 0x001f7a, 0x001f7a, INTEGER_OFFSET, -7093, + 0x001f7c, 0x001f7c, INTEGER_OFFSET, -7091, + 0x001f80, 0x001f8f, DIRECT_SINGLE, 945, + 0x001f90, 0x001f9f, DIRECT_SINGLE, 951, + 0x001fa0, 0x001faf, DIRECT_SINGLE, 969, + 0x001fb0, 0x001fba, DIRECT_SINGLE, 945, + 0x001fbb, 0x001fbb, INTEGER_OFFSET, -74, + 0x001fbc, 0x001fbc, INTEGER_OFFSET, -7179, + 0x001fbe, 0x001fbe, INTEGER_OFFSET, -7173, + 0x001fc1, 0x001fc1, INTEGER_OFFSET, -7961, + 0x001fc2, 0x001fc7, DIRECT_SINGLE, 951, + 0x001fc8, 0x001fc8, INTEGER_OFFSET, -7187, + 0x001fc9, 0x001fc9, INTEGER_OFFSET, -86, + 0x001fca, 0x001fca, INTEGER_OFFSET, -7187, + 0x001fcb, 0x001fcb, INTEGER_OFFSET, -86, + 0x001fcc, 0x001fcc, INTEGER_OFFSET, -7189, + 0x001fcd, 0x001fcf, DIRECT_SINGLE, 8127, + 0x001fd0, 0x001fda, DIRECT_SINGLE, 953, + 0x001fdb, 0x001fdb, INTEGER_OFFSET, -100, + 0x001fdd, 0x001fdf, DIRECT_SINGLE, 8190, + 0x001fe0, 0x001fe2, DIRECT_SINGLE, 965, + 0x001fe4, 0x001fe5, DIRECT_SINGLE, 961, + 0x001fe6, 0x001fea, DIRECT_SINGLE, 965, + 0x001feb, 0x001feb, INTEGER_OFFSET, -112, + 0x001fec, 0x001fec, INTEGER_OFFSET, -7211, + 0x001fed, 0x001fed, INTEGER_OFFSET, -8005, + 0x001ff2, 0x001ff7, DIRECT_SINGLE, 969, + 0x001ff8, 0x001ff8, INTEGER_OFFSET, -7225, + 0x001ff9, 0x001ff9, INTEGER_OFFSET, -128, + 0x001ffa, 0x001ffa, INTEGER_OFFSET, -7217, + 0x001ffb, 0x001ffb, INTEGER_OFFSET, -126, + 0x001ffc, 0x001ffc, INTEGER_OFFSET, -7219, + 0x002070, 0x002070, INTEGER_OFFSET, -8256, + 0x002074, 0x002079, INTEGER_OFFSET, -8256, + 0x00207f, 0x00207f, INTEGER_OFFSET, -8209, + 0x002080, 0x002089, INTEGER_OFFSET, -8272, + 0x002102, 0x002103, DIRECT_SINGLE, 99, + 0x002105, 0x002105, INTEGER_OFFSET, 1105814, + 0x002109, 0x00210b, INTEGER_OFFSET, -8355, + 0x00210c, 0x00210f, DIRECT_SINGLE, 104, + 0x002110, 0x002111, DIRECT_SINGLE, 105, + 0x002112, 0x002113, DIRECT_SINGLE, 108, + 0x002115, 0x002115, INTEGER_OFFSET, -8359, + 0x002116, 0x002116, INTEGER_OFFSET, 1105798, + 0x002119, 0x00211b, INTEGER_OFFSET, -8361, + 0x00211c, 0x00211d, DIRECT_SINGLE, 114, + 0x002120, 0x002122, INTEGER_OFFSET, 1105789, + 0x002124, 0x002124, INTEGER_OFFSET, -8362, + 0x002126, 0x002126, INTEGER_OFFSET, -7517, + 0x002128, 0x002128, INTEGER_OFFSET, -8366, + 0x00212a, 0x00212a, INTEGER_OFFSET, -8383, + 0x00212b, 0x00212c, INTEGER_OFFSET, -8394, + 0x00212f, 0x002130, DIRECT_SINGLE, 101, + 0x002131, 0x002131, INTEGER_OFFSET, -8395, + 0x002133, 0x002133, INTEGER_OFFSET, -8390, + 0x002134, 0x002134, INTEGER_OFFSET, -8389, + 0x00215f, 0x00215f, INTEGER_OFFSET, -8494, + 0x002160, 0x002160, INTEGER_OFFSET, -8439, + 0x002161, 0x002163, INTEGER_OFFSET, 1105727, + 0x002164, 0x002164, INTEGER_OFFSET, -8430, + 0x002165, 0x002166, INTEGER_OFFSET, 1105726, + 0x002167, 0x002168, INTEGER_OFFSET, 1105725, + 0x002169, 0x002169, INTEGER_OFFSET, -8433, + 0x00216a, 0x00216b, INTEGER_OFFSET, 1105724, + 0x00216c, 0x00216c, INTEGER_OFFSET, -8448, + 0x00216d, 0x00216e, INTEGER_OFFSET, -8458, + 0x00216f, 0x00216f, INTEGER_OFFSET, -8450, + 0x002170, 0x002170, INTEGER_OFFSET, -8455, + 0x002171, 0x002173, INTEGER_OFFSET, 1105711, + 0x002174, 0x002174, INTEGER_OFFSET, -8446, + 0x002175, 0x002176, INTEGER_OFFSET, 1105710, + 0x002177, 0x002178, INTEGER_OFFSET, 1105709, + 0x002179, 0x002179, INTEGER_OFFSET, -8449, + 0x00217a, 0x00217b, INTEGER_OFFSET, 1105708, + 0x00217c, 0x00217c, INTEGER_OFFSET, -8464, + 0x00217d, 0x00217e, INTEGER_OFFSET, -8474, + 0x00217f, 0x00217f, INTEGER_OFFSET, -8466, + 0x00219a, 0x00219a, INTEGER_OFFSET, -10, + 0x00219b, 0x00219b, INTEGER_OFFSET, -9, + 0x0021ae, 0x0021ae, INTEGER_OFFSET, -26, + 0x0021cd, 0x0021cd, INTEGER_OFFSET, 3, + 0x0021ce, 0x0021ce, INTEGER_OFFSET, 6, + 0x0021cf, 0x0021cf, INTEGER_OFFSET, 3, + 0x002204, 0x002204, INTEGER_OFFSET, -1, + 0x002209, 0x002209, INTEGER_OFFSET, -1, + 0x00220c, 0x00220c, INTEGER_OFFSET, -1, + 0x002222, 0x002222, ALTERNATING_AL, 0, + 0x002224, 0x002224, INTEGER_OFFSET, -1, + 0x002226, 0x002226, INTEGER_OFFSET, -1, + 0x002241, 0x002241, INTEGER_OFFSET, -5, + 0x002244, 0x002244, INTEGER_OFFSET, -1, + 0x002247, 0x002247, INTEGER_OFFSET, -2, + 0x002249, 0x002249, INTEGER_OFFSET, -1, + 0x002260, 0x002260, INTEGER_OFFSET, -8739, + 0x002262, 0x002262, INTEGER_OFFSET, -1, + 0x00226d, 0x00226d, INTEGER_OFFSET, -32, + 0x00226e, 0x00226e, INTEGER_OFFSET, -8754, + 0x00226f, 0x00226f, INTEGER_OFFSET, -8753, + 0x002270, 0x002271, INTEGER_OFFSET, -12, + 0x002274, 0x002275, INTEGER_OFFSET, -2, + 0x002278, 0x002279, INTEGER_OFFSET, -2, + 0x002280, 0x002281, INTEGER_OFFSET, -6, + 0x002284, 0x002285, INTEGER_OFFSET, -2, + 0x002288, 0x002289, INTEGER_OFFSET, -2, + 0x0022ac, 0x0022ac, INTEGER_OFFSET, -10, + 0x0022ad, 0x0022ae, INTEGER_OFFSET, -5, + 0x0022af, 0x0022af, INTEGER_OFFSET, -4, + 0x0022e0, 0x0022e1, INTEGER_OFFSET, -100, + 0x0022e2, 0x0022e3, INTEGER_OFFSET, -81, + 0x0022ea, 0x0022ed, INTEGER_OFFSET, -56, + 0x002460, 0x002468, INTEGER_OFFSET, -9263, + 0x002474, 0x00247c, INTEGER_OFFSET, -9283, + 0x002488, 0x002490, INTEGER_OFFSET, -9303, + 0x00249c, 0x0024b5, INTEGER_OFFSET, -9275, + 0x0024b6, 0x0024cf, INTEGER_OFFSET, -9301, + 0x0024d0, 0x0024e9, INTEGER_OFFSET, -9327, + 0x0024ea, 0x0024ea, INTEGER_OFFSET, -9402, + 0x00277d, 0x00277e, INTEGER_OFFSET, -10053, + 0x002787, 0x002788, INTEGER_OFFSET, -10063, + 0x002791, 0x002792, INTEGER_OFFSET, -10073, + 0x003007, 0x003007, INTEGER_OFFSET, -12247, + 0x003021, 0x003029, INTEGER_OFFSET, -12272, + 0x00304c, 0x00304c, INTEGER_OFFSET, -1, + 0x00304e, 0x00304e, INTEGER_OFFSET, -1, + 0x003050, 0x003050, INTEGER_OFFSET, -1, + 0x003052, 0x003052, INTEGER_OFFSET, -1, + 0x003054, 0x003054, INTEGER_OFFSET, -1, + 0x003056, 0x003056, INTEGER_OFFSET, -1, + 0x003058, 0x003058, INTEGER_OFFSET, -1, + 0x00305a, 0x00305a, INTEGER_OFFSET, -1, + 0x00305c, 0x00305c, INTEGER_OFFSET, -1, + 0x00305e, 0x00305e, INTEGER_OFFSET, -1, + 0x003060, 0x003060, INTEGER_OFFSET, -1, + 0x003062, 0x003062, INTEGER_OFFSET, -1, + 0x003065, 0x003065, INTEGER_OFFSET, -1, + 0x003067, 0x003067, INTEGER_OFFSET, -1, + 0x003069, 0x003069, INTEGER_OFFSET, -1, + 0x003070, 0x003071, DIRECT_SINGLE, 12399, + 0x003073, 0x003074, DIRECT_SINGLE, 12402, + 0x003076, 0x003077, DIRECT_SINGLE, 12405, + 0x003079, 0x00307a, DIRECT_SINGLE, 12408, + 0x00307c, 0x00307d, DIRECT_SINGLE, 12411, + 0x003094, 0x003094, INTEGER_OFFSET, -78, + 0x00309e, 0x00309e, INTEGER_OFFSET, -1, + 0x0030ac, 0x0030ac, INTEGER_OFFSET, -1, + 0x0030ae, 0x0030ae, INTEGER_OFFSET, -1, + 0x0030b0, 0x0030b0, INTEGER_OFFSET, -1, + 0x0030b2, 0x0030b2, INTEGER_OFFSET, -1, + 0x0030b4, 0x0030b4, INTEGER_OFFSET, -1, + 0x0030b6, 0x0030b6, INTEGER_OFFSET, -1, + 0x0030b8, 0x0030b8, INTEGER_OFFSET, -1, + 0x0030ba, 0x0030ba, INTEGER_OFFSET, -1, + 0x0030bc, 0x0030bc, INTEGER_OFFSET, -1, + 0x0030be, 0x0030be, INTEGER_OFFSET, -1, + 0x0030c0, 0x0030c0, INTEGER_OFFSET, -1, + 0x0030c2, 0x0030c2, INTEGER_OFFSET, -1, + 0x0030c5, 0x0030c5, INTEGER_OFFSET, -1, + 0x0030c7, 0x0030c7, INTEGER_OFFSET, -1, + 0x0030c9, 0x0030c9, INTEGER_OFFSET, -1, + 0x0030d0, 0x0030d1, DIRECT_SINGLE, 12495, + 0x0030d3, 0x0030d4, DIRECT_SINGLE, 12498, + 0x0030d6, 0x0030d7, DIRECT_SINGLE, 12501, + 0x0030d9, 0x0030da, DIRECT_SINGLE, 12504, + 0x0030dc, 0x0030dd, DIRECT_SINGLE, 12507, + 0x0030f4, 0x0030f4, INTEGER_OFFSET, -78, + 0x0030f7, 0x0030fa, INTEGER_OFFSET, -8, + 0x0030fe, 0x0030fe, INTEGER_OFFSET, -1, + 0x00f8e2, 0x00f8e3, DIRECT_SINGLE, 1102, + 0x00f8e4, 0x00f8e5, DIRECT_SINGLE, 1099, + 0x00f8e6, 0x00f8e7, DIRECT_SINGLE, 1098, + 0x00f8e8, 0x00f8e9, DIRECT_SINGLE, 1091, + 0x00f8ea, 0x00f8eb, DIRECT_SINGLE, 1086, + 0x00f8ec, 0x00f8ed, DIRECT_SINGLE, 1080, + 0x00f8ee, 0x00f8ef, DIRECT_SINGLE, 1101, + 0x00f8f0, 0x00f8f1, DIRECT_SINGLE, 1072, + 0x00f8f6, 0x00f8f6, INTEGER_OFFSET, -63615, + 0x00f8f7, 0x00f8f7, INTEGER_OFFSET, -63625, + 0x00f8f8, 0x00f8f8, INTEGER_OFFSET, -63631, + 0x00f8f9, 0x00f8f9, INTEGER_OFFSET, -63618, + 0x00f8fa, 0x00f8fa, INTEGER_OFFSET, -63633, + 0x00fb00, 0x00fb00, INTEGER_OFFSET, 1050024, + 0x00fb01, 0x00fb05, INTEGER_OFFSET, 1049990, + 0x00fb06, 0x00fb06, INTEGER_OFFSET, 1049989, + 0x00ff10, 0x00ff19, INTEGER_OFFSET, -65248, + 0x00ff21, 0x00ff3a, INTEGER_OFFSET, -65216, + 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -65248, + 0x010400, 0x010425, INTEGER_OFFSET, 40, + }); + public static final CodePointSet FOLDABLE_CHARACTERS = rangeSet(0x000041, 0x00005a, 0x0000b5, 0x0000b5, 0x0000c0, 0x0000d6, 0x0000d8, 0x0000de, 0x000100, 0x000100, 0x000102, 0x000102, 0x000104, + 0x000104, 0x000106, 0x000106, 0x000108, 0x000108, 0x00010a, 0x00010a, 0x00010c, 0x00010c, 0x00010e, 0x00010e, 0x000110, 0x000110, 0x000112, 0x000112, 0x000114, 0x000114, 0x000116, + 0x000116, 0x000118, 0x000118, 0x00011a, 0x00011a, 0x00011c, 0x00011c, 0x00011e, 0x00011e, 0x000120, 0x000120, 0x000122, 0x000122, 0x000124, 0x000124, 0x000126, 0x000126, 0x000128, + 0x000128, 0x00012a, 0x00012a, 0x00012c, 0x00012c, 0x00012e, 0x00012e, 0x000132, 0x000132, 0x000134, 0x000134, 0x000136, 0x000136, 0x000139, 0x000139, 0x00013b, 0x00013b, 0x00013d, + 0x00013d, 0x00013f, 0x00013f, 0x000141, 0x000141, 0x000143, 0x000143, 0x000145, 0x000145, 0x000147, 0x000147, 0x00014a, 0x00014a, 0x00014c, 0x00014c, 0x00014e, 0x00014e, 0x000150, + 0x000150, 0x000152, 0x000152, 0x000154, 0x000154, 0x000156, 0x000156, 0x000158, 0x000158, 0x00015a, 0x00015a, 0x00015c, 0x00015c, 0x00015e, 0x00015e, 0x000160, 0x000160, 0x000162, + 0x000162, 0x000164, 0x000164, 0x000166, 0x000166, 0x000168, 0x000168, 0x00016a, 0x00016a, 0x00016c, 0x00016c, 0x00016e, 0x00016e, 0x000170, 0x000170, 0x000172, 0x000172, 0x000174, + 0x000174, 0x000176, 0x000176, 0x000178, 0x000179, 0x00017b, 0x00017b, 0x00017d, 0x00017d, 0x00017f, 0x00017f, 0x000181, 0x000182, 0x000184, 0x000184, 0x000186, 0x000187, 0x000189, + 0x00018b, 0x00018e, 0x000191, 0x000193, 0x000194, 0x000196, 0x000198, 0x00019c, 0x00019d, 0x00019f, 0x0001a0, 0x0001a2, 0x0001a2, 0x0001a4, 0x0001a4, 0x0001a6, 0x0001a7, 0x0001a9, + 0x0001a9, 0x0001ac, 0x0001ac, 0x0001ae, 0x0001af, 0x0001b1, 0x0001b3, 0x0001b5, 0x0001b5, 0x0001b7, 0x0001b8, 0x0001bc, 0x0001bc, 0x0001c4, 0x0001c5, 0x0001c7, 0x0001c8, 0x0001ca, + 0x0001cb, 0x0001cd, 0x0001cd, 0x0001cf, 0x0001cf, 0x0001d1, 0x0001d1, 0x0001d3, 0x0001d3, 0x0001d5, 0x0001d5, 0x0001d7, 0x0001d7, 0x0001d9, 0x0001d9, 0x0001db, 0x0001db, 0x0001de, + 0x0001de, 0x0001e0, 0x0001e0, 0x0001e2, 0x0001e2, 0x0001e4, 0x0001e4, 0x0001e6, 0x0001e6, 0x0001e8, 0x0001e8, 0x0001ea, 0x0001ea, 0x0001ec, 0x0001ec, 0x0001ee, 0x0001ee, 0x0001f1, + 0x0001f2, 0x0001f4, 0x0001f4, 0x0001f6, 0x0001f8, 0x0001fa, 0x0001fa, 0x0001fc, 0x0001fc, 0x0001fe, 0x0001fe, 0x000200, 0x000200, 0x000202, 0x000202, 0x000204, 0x000204, 0x000206, + 0x000206, 0x000208, 0x000208, 0x00020a, 0x00020a, 0x00020c, 0x00020c, 0x00020e, 0x00020e, 0x000210, 0x000210, 0x000212, 0x000212, 0x000214, 0x000214, 0x000216, 0x000216, 0x000218, + 0x000218, 0x00021a, 0x00021a, 0x00021c, 0x00021c, 0x00021e, 0x00021e, 0x000220, 0x000220, 0x000222, 0x000222, 0x000224, 0x000224, 0x000226, 0x000226, 0x000228, 0x000228, 0x00022a, + 0x00022a, 0x00022c, 0x00022c, 0x00022e, 0x00022e, 0x000230, 0x000230, 0x000232, 0x000232, 0x00023a, 0x00023b, 0x00023d, 0x00023e, 0x000241, 0x000241, 0x000243, 0x000246, 0x000248, + 0x000248, 0x00024a, 0x00024a, 0x00024c, 0x00024c, 0x00024e, 0x00024e, 0x000345, 0x000345, 0x000370, 0x000370, 0x000372, 0x000372, 0x000376, 0x000376, 0x00037f, 0x00037f, 0x000386, + 0x000386, 0x000388, 0x00038a, 0x00038c, 0x00038c, 0x00038e, 0x00038f, 0x000391, 0x0003a1, 0x0003a3, 0x0003ab, 0x0003c2, 0x0003c2, 0x0003cf, 0x0003d1, 0x0003d5, 0x0003d6, 0x0003d8, + 0x0003d8, 0x0003da, 0x0003da, 0x0003dc, 0x0003dc, 0x0003de, 0x0003de, 0x0003e0, 0x0003e0, 0x0003e2, 0x0003e2, 0x0003e4, 0x0003e4, 0x0003e6, 0x0003e6, 0x0003e8, 0x0003e8, 0x0003ea, + 0x0003ea, 0x0003ec, 0x0003ec, 0x0003ee, 0x0003ee, 0x0003f0, 0x0003f1, 0x0003f4, 0x0003f5, 0x0003f7, 0x0003f7, 0x0003f9, 0x0003fa, 0x0003fd, 0x00042f, 0x000460, 0x000460, 0x000462, + 0x000462, 0x000464, 0x000464, 0x000466, 0x000466, 0x000468, 0x000468, 0x00046a, 0x00046a, 0x00046c, 0x00046c, 0x00046e, 0x00046e, 0x000470, 0x000470, 0x000472, 0x000472, 0x000474, + 0x000474, 0x000476, 0x000476, 0x000478, 0x000478, 0x00047a, 0x00047a, 0x00047c, 0x00047c, 0x00047e, 0x00047e, 0x000480, 0x000480, 0x00048a, 0x00048a, 0x00048c, 0x00048c, 0x00048e, + 0x00048e, 0x000490, 0x000490, 0x000492, 0x000492, 0x000494, 0x000494, 0x000496, 0x000496, 0x000498, 0x000498, 0x00049a, 0x00049a, 0x00049c, 0x00049c, 0x00049e, 0x00049e, 0x0004a0, + 0x0004a0, 0x0004a2, 0x0004a2, 0x0004a4, 0x0004a4, 0x0004a6, 0x0004a6, 0x0004a8, 0x0004a8, 0x0004aa, 0x0004aa, 0x0004ac, 0x0004ac, 0x0004ae, 0x0004ae, 0x0004b0, 0x0004b0, 0x0004b2, + 0x0004b2, 0x0004b4, 0x0004b4, 0x0004b6, 0x0004b6, 0x0004b8, 0x0004b8, 0x0004ba, 0x0004ba, 0x0004bc, 0x0004bc, 0x0004be, 0x0004be, 0x0004c0, 0x0004c1, 0x0004c3, 0x0004c3, 0x0004c5, + 0x0004c5, 0x0004c7, 0x0004c7, 0x0004c9, 0x0004c9, 0x0004cb, 0x0004cb, 0x0004cd, 0x0004cd, 0x0004d0, 0x0004d0, 0x0004d2, 0x0004d2, 0x0004d4, 0x0004d4, 0x0004d6, 0x0004d6, 0x0004d8, + 0x0004d8, 0x0004da, 0x0004da, 0x0004dc, 0x0004dc, 0x0004de, 0x0004de, 0x0004e0, 0x0004e0, 0x0004e2, 0x0004e2, 0x0004e4, 0x0004e4, 0x0004e6, 0x0004e6, 0x0004e8, 0x0004e8, 0x0004ea, + 0x0004ea, 0x0004ec, 0x0004ec, 0x0004ee, 0x0004ee, 0x0004f0, 0x0004f0, 0x0004f2, 0x0004f2, 0x0004f4, 0x0004f4, 0x0004f6, 0x0004f6, 0x0004f8, 0x0004f8, 0x0004fa, 0x0004fa, 0x0004fc, + 0x0004fc, 0x0004fe, 0x0004fe, 0x000500, 0x000500, 0x000502, 0x000502, 0x000504, 0x000504, 0x000506, 0x000506, 0x000508, 0x000508, 0x00050a, 0x00050a, 0x00050c, 0x00050c, 0x00050e, + 0x00050e, 0x000510, 0x000510, 0x000512, 0x000512, 0x000514, 0x000514, 0x000516, 0x000516, 0x000518, 0x000518, 0x00051a, 0x00051a, 0x00051c, 0x00051c, 0x00051e, 0x00051e, 0x000520, + 0x000520, 0x000522, 0x000522, 0x000524, 0x000524, 0x000526, 0x000526, 0x000528, 0x000528, 0x00052a, 0x00052a, 0x00052c, 0x00052c, 0x00052e, 0x00052e, 0x000531, 0x000556, 0x0010a0, + 0x0010c5, 0x0010c7, 0x0010c7, 0x0010cd, 0x0010cd, 0x0013f8, 0x0013fd, 0x001c80, 0x001c88, 0x001c90, 0x001cba, 0x001cbd, 0x001cbf, 0x001e00, 0x001e00, 0x001e02, 0x001e02, 0x001e04, + 0x001e04, 0x001e06, 0x001e06, 0x001e08, 0x001e08, 0x001e0a, 0x001e0a, 0x001e0c, 0x001e0c, 0x001e0e, 0x001e0e, 0x001e10, 0x001e10, 0x001e12, 0x001e12, 0x001e14, 0x001e14, 0x001e16, + 0x001e16, 0x001e18, 0x001e18, 0x001e1a, 0x001e1a, 0x001e1c, 0x001e1c, 0x001e1e, 0x001e1e, 0x001e20, 0x001e20, 0x001e22, 0x001e22, 0x001e24, 0x001e24, 0x001e26, 0x001e26, 0x001e28, + 0x001e28, 0x001e2a, 0x001e2a, 0x001e2c, 0x001e2c, 0x001e2e, 0x001e2e, 0x001e30, 0x001e30, 0x001e32, 0x001e32, 0x001e34, 0x001e34, 0x001e36, 0x001e36, 0x001e38, 0x001e38, 0x001e3a, + 0x001e3a, 0x001e3c, 0x001e3c, 0x001e3e, 0x001e3e, 0x001e40, 0x001e40, 0x001e42, 0x001e42, 0x001e44, 0x001e44, 0x001e46, 0x001e46, 0x001e48, 0x001e48, 0x001e4a, 0x001e4a, 0x001e4c, + 0x001e4c, 0x001e4e, 0x001e4e, 0x001e50, 0x001e50, 0x001e52, 0x001e52, 0x001e54, 0x001e54, 0x001e56, 0x001e56, 0x001e58, 0x001e58, 0x001e5a, 0x001e5a, 0x001e5c, 0x001e5c, 0x001e5e, + 0x001e5e, 0x001e60, 0x001e60, 0x001e62, 0x001e62, 0x001e64, 0x001e64, 0x001e66, 0x001e66, 0x001e68, 0x001e68, 0x001e6a, 0x001e6a, 0x001e6c, 0x001e6c, 0x001e6e, 0x001e6e, 0x001e70, + 0x001e70, 0x001e72, 0x001e72, 0x001e74, 0x001e74, 0x001e76, 0x001e76, 0x001e78, 0x001e78, 0x001e7a, 0x001e7a, 0x001e7c, 0x001e7c, 0x001e7e, 0x001e7e, 0x001e80, 0x001e80, 0x001e82, + 0x001e82, 0x001e84, 0x001e84, 0x001e86, 0x001e86, 0x001e88, 0x001e88, 0x001e8a, 0x001e8a, 0x001e8c, 0x001e8c, 0x001e8e, 0x001e8e, 0x001e90, 0x001e90, 0x001e92, 0x001e92, 0x001e94, + 0x001e94, 0x001e9b, 0x001e9b, 0x001e9e, 0x001e9e, 0x001ea0, 0x001ea0, 0x001ea2, 0x001ea2, 0x001ea4, 0x001ea4, 0x001ea6, 0x001ea6, 0x001ea8, 0x001ea8, 0x001eaa, 0x001eaa, 0x001eac, + 0x001eac, 0x001eae, 0x001eae, 0x001eb0, 0x001eb0, 0x001eb2, 0x001eb2, 0x001eb4, 0x001eb4, 0x001eb6, 0x001eb6, 0x001eb8, 0x001eb8, 0x001eba, 0x001eba, 0x001ebc, 0x001ebc, 0x001ebe, + 0x001ebe, 0x001ec0, 0x001ec0, 0x001ec2, 0x001ec2, 0x001ec4, 0x001ec4, 0x001ec6, 0x001ec6, 0x001ec8, 0x001ec8, 0x001eca, 0x001eca, 0x001ecc, 0x001ecc, 0x001ece, 0x001ece, 0x001ed0, + 0x001ed0, 0x001ed2, 0x001ed2, 0x001ed4, 0x001ed4, 0x001ed6, 0x001ed6, 0x001ed8, 0x001ed8, 0x001eda, 0x001eda, 0x001edc, 0x001edc, 0x001ede, 0x001ede, 0x001ee0, 0x001ee0, 0x001ee2, + 0x001ee2, 0x001ee4, 0x001ee4, 0x001ee6, 0x001ee6, 0x001ee8, 0x001ee8, 0x001eea, 0x001eea, 0x001eec, 0x001eec, 0x001eee, 0x001eee, 0x001ef0, 0x001ef0, 0x001ef2, 0x001ef2, 0x001ef4, + 0x001ef4, 0x001ef6, 0x001ef6, 0x001ef8, 0x001ef8, 0x001efa, 0x001efa, 0x001efc, 0x001efc, 0x001efe, 0x001efe, 0x001f08, 0x001f0f, 0x001f18, 0x001f1d, 0x001f28, 0x001f2f, 0x001f38, + 0x001f3f, 0x001f48, 0x001f4d, 0x001f59, 0x001f59, 0x001f5b, 0x001f5b, 0x001f5d, 0x001f5d, 0x001f5f, 0x001f5f, 0x001f68, 0x001f6f, 0x001f88, 0x001f8f, 0x001f98, 0x001f9f, 0x001fa8, + 0x001faf, 0x001fb8, 0x001fbc, 0x001fbe, 0x001fbe, 0x001fc8, 0x001fcc, 0x001fd8, 0x001fdb, 0x001fe8, 0x001fec, 0x001ff8, 0x001ffc, 0x002126, 0x002126, 0x00212a, 0x00212b, 0x002132, + 0x002132, 0x002160, 0x00216f, 0x002183, 0x002183, 0x0024b6, 0x0024cf, 0x002c00, 0x002c2f, 0x002c60, 0x002c60, 0x002c62, 0x002c64, 0x002c67, 0x002c67, 0x002c69, 0x002c69, 0x002c6b, + 0x002c6b, 0x002c6d, 0x002c70, 0x002c72, 0x002c72, 0x002c75, 0x002c75, 0x002c7e, 0x002c80, 0x002c82, 0x002c82, 0x002c84, 0x002c84, 0x002c86, 0x002c86, 0x002c88, 0x002c88, 0x002c8a, + 0x002c8a, 0x002c8c, 0x002c8c, 0x002c8e, 0x002c8e, 0x002c90, 0x002c90, 0x002c92, 0x002c92, 0x002c94, 0x002c94, 0x002c96, 0x002c96, 0x002c98, 0x002c98, 0x002c9a, 0x002c9a, 0x002c9c, + 0x002c9c, 0x002c9e, 0x002c9e, 0x002ca0, 0x002ca0, 0x002ca2, 0x002ca2, 0x002ca4, 0x002ca4, 0x002ca6, 0x002ca6, 0x002ca8, 0x002ca8, 0x002caa, 0x002caa, 0x002cac, 0x002cac, 0x002cae, + 0x002cae, 0x002cb0, 0x002cb0, 0x002cb2, 0x002cb2, 0x002cb4, 0x002cb4, 0x002cb6, 0x002cb6, 0x002cb8, 0x002cb8, 0x002cba, 0x002cba, 0x002cbc, 0x002cbc, 0x002cbe, 0x002cbe, 0x002cc0, + 0x002cc0, 0x002cc2, 0x002cc2, 0x002cc4, 0x002cc4, 0x002cc6, 0x002cc6, 0x002cc8, 0x002cc8, 0x002cca, 0x002cca, 0x002ccc, 0x002ccc, 0x002cce, 0x002cce, 0x002cd0, 0x002cd0, 0x002cd2, + 0x002cd2, 0x002cd4, 0x002cd4, 0x002cd6, 0x002cd6, 0x002cd8, 0x002cd8, 0x002cda, 0x002cda, 0x002cdc, 0x002cdc, 0x002cde, 0x002cde, 0x002ce0, 0x002ce0, 0x002ce2, 0x002ce2, 0x002ceb, + 0x002ceb, 0x002ced, 0x002ced, 0x002cf2, 0x002cf2, 0x00a640, 0x00a640, 0x00a642, 0x00a642, 0x00a644, 0x00a644, 0x00a646, 0x00a646, 0x00a648, 0x00a648, 0x00a64a, 0x00a64a, 0x00a64c, + 0x00a64c, 0x00a64e, 0x00a64e, 0x00a650, 0x00a650, 0x00a652, 0x00a652, 0x00a654, 0x00a654, 0x00a656, 0x00a656, 0x00a658, 0x00a658, 0x00a65a, 0x00a65a, 0x00a65c, 0x00a65c, 0x00a65e, + 0x00a65e, 0x00a660, 0x00a660, 0x00a662, 0x00a662, 0x00a664, 0x00a664, 0x00a666, 0x00a666, 0x00a668, 0x00a668, 0x00a66a, 0x00a66a, 0x00a66c, 0x00a66c, 0x00a680, 0x00a680, 0x00a682, + 0x00a682, 0x00a684, 0x00a684, 0x00a686, 0x00a686, 0x00a688, 0x00a688, 0x00a68a, 0x00a68a, 0x00a68c, 0x00a68c, 0x00a68e, 0x00a68e, 0x00a690, 0x00a690, 0x00a692, 0x00a692, 0x00a694, + 0x00a694, 0x00a696, 0x00a696, 0x00a698, 0x00a698, 0x00a69a, 0x00a69a, 0x00a722, 0x00a722, 0x00a724, 0x00a724, 0x00a726, 0x00a726, 0x00a728, 0x00a728, 0x00a72a, 0x00a72a, 0x00a72c, + 0x00a72c, 0x00a72e, 0x00a72e, 0x00a732, 0x00a732, 0x00a734, 0x00a734, 0x00a736, 0x00a736, 0x00a738, 0x00a738, 0x00a73a, 0x00a73a, 0x00a73c, 0x00a73c, 0x00a73e, 0x00a73e, 0x00a740, + 0x00a740, 0x00a742, 0x00a742, 0x00a744, 0x00a744, 0x00a746, 0x00a746, 0x00a748, 0x00a748, 0x00a74a, 0x00a74a, 0x00a74c, 0x00a74c, 0x00a74e, 0x00a74e, 0x00a750, 0x00a750, 0x00a752, + 0x00a752, 0x00a754, 0x00a754, 0x00a756, 0x00a756, 0x00a758, 0x00a758, 0x00a75a, 0x00a75a, 0x00a75c, 0x00a75c, 0x00a75e, 0x00a75e, 0x00a760, 0x00a760, 0x00a762, 0x00a762, 0x00a764, + 0x00a764, 0x00a766, 0x00a766, 0x00a768, 0x00a768, 0x00a76a, 0x00a76a, 0x00a76c, 0x00a76c, 0x00a76e, 0x00a76e, 0x00a779, 0x00a779, 0x00a77b, 0x00a77b, 0x00a77d, 0x00a77e, 0x00a780, + 0x00a780, 0x00a782, 0x00a782, 0x00a784, 0x00a784, 0x00a786, 0x00a786, 0x00a78b, 0x00a78b, 0x00a78d, 0x00a78d, 0x00a790, 0x00a790, 0x00a792, 0x00a792, 0x00a796, 0x00a796, 0x00a798, + 0x00a798, 0x00a79a, 0x00a79a, 0x00a79c, 0x00a79c, 0x00a79e, 0x00a79e, 0x00a7a0, 0x00a7a0, 0x00a7a2, 0x00a7a2, 0x00a7a4, 0x00a7a4, 0x00a7a6, 0x00a7a6, 0x00a7a8, 0x00a7a8, 0x00a7aa, + 0x00a7ae, 0x00a7b0, 0x00a7b4, 0x00a7b6, 0x00a7b6, 0x00a7b8, 0x00a7b8, 0x00a7ba, 0x00a7ba, 0x00a7bc, 0x00a7bc, 0x00a7be, 0x00a7be, 0x00a7c0, 0x00a7c0, 0x00a7c2, 0x00a7c2, 0x00a7c4, + 0x00a7c7, 0x00a7c9, 0x00a7c9, 0x00a7d0, 0x00a7d0, 0x00a7d6, 0x00a7d6, 0x00a7d8, 0x00a7d8, 0x00a7f5, 0x00a7f5, 0x00ab70, 0x00abbf, 0x00ff21, 0x00ff3a, 0x010400, 0x010427, 0x0104b0, + 0x0104d3, 0x010570, 0x01057a, 0x01057c, 0x01058a, 0x01058c, 0x010592, 0x010594, 0x010595, 0x010c80, 0x010cb2, 0x0118a0, 0x0118bf, 0x016e40, 0x016e5f, 0x01e900, 0x01e921); + + /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + private static final CaseUnfoldingTrie UNFOLDING_TRIE_RUBY = UNICODE_15_0_0_FULL.createCaseUnfoldTrie(); + private static final CaseUnfoldingTrie UNFOLDING_TRIE_ORACLE_DB = ORACLE_DB.createCaseUnfoldTrie(); + private static final CaseUnfoldingTrie UNFOLDING_TRIE_ORACLE_DB_AI = ORACLE_DB_AI.createCaseUnfoldTrie(); + + public static final CodePointSet FOLDED_CHARACTERS = FOLDABLE_CHARACTERS.createInverse(Encodings.UTF_32); + +} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java deleted file mode 100644 index aa926064402c..000000000000 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldTable.java +++ /dev/null @@ -1,2098 +0,0 @@ -/* - * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.truffle.regex.tregex.parser; - -import java.util.function.BiPredicate; - -import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; - -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.regex.charset.CodePointSet; -import com.oracle.truffle.regex.charset.CodePointSetAccumulator; -import com.oracle.truffle.regex.charset.Constants; -import com.oracle.truffle.regex.charset.Range; -import com.oracle.truffle.regex.charset.RangesBuffer; -import com.oracle.truffle.regex.charset.SortedListOfRanges; - -public class CaseFoldTable { - - public enum CaseFoldingAlgorithm { - ECMAScriptNonUnicode, - ECMAScriptUnicode, - PythonAscii, - PythonUnicode; - - public BiPredicate getEqualsPredicate() { - return (codePointA, codePointB) -> getTable(this).equalsIgnoreCase(codePointA, codePointB); - } - } - - private static CaseFoldTableImpl getTable(CaseFoldingAlgorithm algorithm) { - switch (algorithm) { - case ECMAScriptNonUnicode: - return NON_UNICODE_TABLE_ENTRIES; - case ECMAScriptUnicode: - return UNICODE_TABLE_ENTRIES; - case PythonAscii: - return PYTHON_ASCII_TABLE_ENTRIES; - case PythonUnicode: - return PYTHON_UNICODE_TABLE_ENTRIES; - default: - throw CompilerDirectives.shouldNotReachHere(); - } - } - - public static void applyCaseFoldUnfold(CodePointSetAccumulator codePointSet, CodePointSetAccumulator tmp, CaseFoldingAlgorithm algorithm) { - codePointSet.copyTo(tmp); - getTable(algorithm).applyCaseFold(codePointSet, tmp); - } - - public static CodePointSet simpleCaseFold(CodePointSet codePointSet, CodePointSetAccumulator tmp) { - tmp.addSet(codePointSet); - SIMPLE_CASE_FOLDING_ENTRIES.applyCaseFold(tmp, codePointSet); - tmp.intersectWith(Constants.FOLDED_CHARACTERS); - return tmp.toCodePointSet(); - } - - public static String simpleCaseFold(String string) { - int[] folded = string.codePoints().map(CaseFoldTable::simpleCaseFold).toArray(); - return new String(folded, 0, folded.length); - } - - public static int simpleCaseFold(int codePoint) { - return UCharacter.foldCase(codePoint, UCharacter.FOLD_CASE_DEFAULT); - } - - private static CodePointSet rangeSet(int... ranges) { - return CodePointSet.createNoDedup(ranges); - } - - private static final int INTEGER_OFFSET = 1; - private static final int DIRECT_MAPPING = 2; - private static final int ALTERNATING_UL = 3; - private static final int ALTERNATING_AL = 4; - - private static final class CaseFoldTableImpl implements SortedListOfRanges { - - private final int[] ranges; - - CaseFoldTableImpl(int[] ranges) { - this.ranges = ranges; - } - - void applyCaseFold(CodePointSetAccumulator dst, Iterable src) { - for (Range r : src) { - int search = binarySearch(r.lo); - if (binarySearchExactMatch(search, r.lo, r.hi)) { - apply(dst, search, r.lo, r.hi); - continue; - } - int firstIntersection = binarySearchGetFirstIntersecting(search, r.lo, r.hi); - if (binarySearchNoIntersectingFound(firstIntersection)) { - continue; - } - for (int j = firstIntersection; j < size(); j++) { - if (rightOf(j, r.lo, r.hi)) { - break; - } - assert intersects(j, r.lo, r.hi); - int intersectionLo = Math.max(getLo(j), r.lo); - int intersectionHi = Math.min(getHi(j), r.hi); - apply(dst, j, intersectionLo, intersectionHi); - } - } - } - - private void apply(CodePointSetAccumulator codePointSet, int tblEntryIndex, int intersectionLo, int intersectionHi) { - switch (ranges[tblEntryIndex * 4 + 2]) { - case INTEGER_OFFSET: - int delta = ranges[tblEntryIndex * 4 + 3]; - codePointSet.addRange(intersectionLo + delta, intersectionHi + delta); - break; - case DIRECT_MAPPING: - CodePointSet set = CHARACTER_SET_TABLE[ranges[tblEntryIndex * 4 + 3]]; - codePointSet.addSet(set); - break; - case ALTERNATING_UL: - int loUL = Math.min(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); - int hiUL = Math.max(((intersectionLo - 1) ^ 1) + 1, ((intersectionHi - 1) ^ 1) + 1); - if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loUL, hiUL)) { - codePointSet.addRange(loUL, hiUL); - } - break; - case ALTERNATING_AL: - int loAL = Math.min(intersectionLo ^ 1, intersectionHi ^ 1); - int hiAL = Math.max(intersectionLo ^ 1, intersectionHi ^ 1); - if (!SortedListOfRanges.contains(intersectionLo, intersectionHi, loAL, hiAL)) { - codePointSet.addRange(loAL, hiAL); - } - break; - default: - throw CompilerDirectives.shouldNotReachHere(); - } - } - - boolean equalsIgnoreCase(int codePointA, int codePointB) { - if (codePointA == codePointB) { - return true; - } - int search = binarySearch(codePointA); - if (binarySearchExactMatch(search, codePointA, codePointA)) { - return equalsIgnoreCase(search, codePointA, codePointB); - } - int firstIntersection = binarySearchGetFirstIntersecting(search, codePointA, codePointA); - if (binarySearchNoIntersectingFound(firstIntersection) || rightOf(firstIntersection, codePointA, codePointA)) { - return false; - } - assert intersects(firstIntersection, codePointA, codePointA); - return equalsIgnoreCase(firstIntersection, codePointA, codePointB); - } - - private boolean equalsIgnoreCase(int tblEntryIndex, int codePointA, int codePointB) { - switch (ranges[tblEntryIndex * 4 + 2]) { - case INTEGER_OFFSET: - int delta = ranges[tblEntryIndex * 4 + 3]; - return codePointA + delta == codePointB; - case DIRECT_MAPPING: - CodePointSet set = CHARACTER_SET_TABLE[ranges[tblEntryIndex * 4 + 3]]; - return set.contains(codePointB); - case ALTERNATING_UL: - return ((codePointA - 1) ^ 1) + 1 == codePointB; - case ALTERNATING_AL: - return (codePointA ^ 1) == codePointB; - default: - throw CompilerDirectives.shouldNotReachHere(); - } - } - - @Override - public int getLo(int i) { - return ranges[i * 4]; - } - - @Override - public int getHi(int i) { - return ranges[i * 4 + 1]; - } - - @Override - public int size() { - return ranges.length / 4; - } - - @Override - public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) { - throw CompilerDirectives.shouldNotReachHere(); - } - } - - /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ - - private static final CodePointSet[] CHARACTER_SET_TABLE = new CodePointSet[]{ - rangeSet(0x0000b5, 0x0000b5, 0x00039c, 0x00039c, 0x0003bc, 0x0003bc), - rangeSet(0x0001c4, 0x0001c6), - rangeSet(0x0001c7, 0x0001c9), - rangeSet(0x0001ca, 0x0001cc), - rangeSet(0x0001f1, 0x0001f3), - rangeSet(0x000345, 0x000345, 0x000399, 0x000399, 0x0003b9, 0x0003b9, 0x001fbe, 0x001fbe), - rangeSet(0x000392, 0x000392, 0x0003b2, 0x0003b2, 0x0003d0, 0x0003d0), - rangeSet(0x000395, 0x000395, 0x0003b5, 0x0003b5, 0x0003f5, 0x0003f5), - rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1), - rangeSet(0x00039a, 0x00039a, 0x0003ba, 0x0003ba, 0x0003f0, 0x0003f0), - rangeSet(0x0003a0, 0x0003a0, 0x0003c0, 0x0003c0, 0x0003d6, 0x0003d6), - rangeSet(0x0003a1, 0x0003a1, 0x0003c1, 0x0003c1, 0x0003f1, 0x0003f1), - rangeSet(0x0003a3, 0x0003a3, 0x0003c2, 0x0003c3), - rangeSet(0x0003a6, 0x0003a6, 0x0003c6, 0x0003c6, 0x0003d5, 0x0003d5), - rangeSet(0x000412, 0x000412, 0x000432, 0x000432, 0x001c80, 0x001c80), - rangeSet(0x000414, 0x000414, 0x000434, 0x000434, 0x001c81, 0x001c81), - rangeSet(0x00041e, 0x00041e, 0x00043e, 0x00043e, 0x001c82, 0x001c82), - rangeSet(0x000421, 0x000421, 0x000441, 0x000441, 0x001c83, 0x001c83), - rangeSet(0x000422, 0x000422, 0x000442, 0x000442, 0x001c84, 0x001c85), - rangeSet(0x00042a, 0x00042a, 0x00044a, 0x00044a, 0x001c86, 0x001c86), - rangeSet(0x000462, 0x000463, 0x001c87, 0x001c87), - rangeSet(0x001c88, 0x001c88, 0x00a64a, 0x00a64b), - rangeSet(0x001e60, 0x001e61, 0x001e9b, 0x001e9b), - rangeSet(0x00004b, 0x00004b, 0x00006b, 0x00006b, 0x00212a, 0x00212a), - rangeSet(0x000053, 0x000053, 0x000073, 0x000073, 0x00017f, 0x00017f), - rangeSet(0x0000c5, 0x0000c5, 0x0000e5, 0x0000e5, 0x00212b, 0x00212b), - rangeSet(0x000398, 0x000398, 0x0003b8, 0x0003b8, 0x0003d1, 0x0003d1, 0x0003f4, 0x0003f4), - rangeSet(0x0003a9, 0x0003a9, 0x0003c9, 0x0003c9, 0x002126, 0x002126), - rangeSet(0x000049, 0x000049, 0x000069, 0x000069, 0x000130, 0x000131)}; - - public static final CaseFoldTableImpl NON_UNICODE_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x00007a, INTEGER_OFFSET, -32, - 0x0000b5, 0x0000b5, DIRECT_MAPPING, 0, - 0x0000c0, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x0000e0, 0x0000f6, INTEGER_OFFSET, -32, - 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, - 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, - 0x000100, 0x00012f, ALTERNATING_AL, 0, - 0x000132, 0x000137, ALTERNATING_AL, 0, - 0x000139, 0x000148, ALTERNATING_UL, 0, - 0x00014a, 0x000177, ALTERNATING_AL, 0, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x00017e, ALTERNATING_UL, 0, - 0x000180, 0x000180, INTEGER_OFFSET, 195, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000185, ALTERNATING_AL, 0, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000188, ALTERNATING_UL, 0, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018c, ALTERNATING_UL, 0, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000192, ALTERNATING_UL, 0, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000195, 0x000195, INTEGER_OFFSET, 97, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000199, ALTERNATING_AL, 0, - 0x00019a, 0x00019a, INTEGER_OFFSET, 163, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019e, 0x00019e, INTEGER_OFFSET, 130, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001b0, ALTERNATING_UL, 0, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, - 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, - 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, - 0x0001c4, 0x0001c6, DIRECT_MAPPING, 1, - 0x0001c7, 0x0001c9, DIRECT_MAPPING, 2, - 0x0001ca, 0x0001cc, DIRECT_MAPPING, 3, - 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, - 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, - 0x0001de, 0x0001ef, ALTERNATING_AL, 0, - 0x0001f1, 0x0001f3, DIRECT_MAPPING, 4, - 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x00021f, ALTERNATING_AL, 0, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000233, ALTERNATING_AL, 0, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023c, ALTERNATING_UL, 0, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x00023f, 0x000240, INTEGER_OFFSET, 10815, - 0x000241, 0x000242, ALTERNATING_UL, 0, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x00024f, ALTERNATING_AL, 0, - 0x000250, 0x000250, INTEGER_OFFSET, 10783, - 0x000251, 0x000251, INTEGER_OFFSET, 10780, - 0x000252, 0x000252, INTEGER_OFFSET, 10782, - 0x000253, 0x000253, INTEGER_OFFSET, -210, - 0x000254, 0x000254, INTEGER_OFFSET, -206, - 0x000256, 0x000257, INTEGER_OFFSET, -205, - 0x000259, 0x000259, INTEGER_OFFSET, -202, - 0x00025b, 0x00025b, INTEGER_OFFSET, -203, - 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, - 0x000260, 0x000260, INTEGER_OFFSET, -205, - 0x000261, 0x000261, INTEGER_OFFSET, 42315, - 0x000263, 0x000263, INTEGER_OFFSET, -207, - 0x000265, 0x000265, INTEGER_OFFSET, 42280, - 0x000266, 0x000266, INTEGER_OFFSET, 42308, - 0x000268, 0x000268, INTEGER_OFFSET, -209, - 0x000269, 0x000269, INTEGER_OFFSET, -211, - 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, - 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, - 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, - 0x00026f, 0x00026f, INTEGER_OFFSET, -211, - 0x000271, 0x000271, INTEGER_OFFSET, 10749, - 0x000272, 0x000272, INTEGER_OFFSET, -213, - 0x000275, 0x000275, INTEGER_OFFSET, -214, - 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, - 0x000280, 0x000280, INTEGER_OFFSET, -218, - 0x000282, 0x000282, INTEGER_OFFSET, 42307, - 0x000283, 0x000283, INTEGER_OFFSET, -218, - 0x000287, 0x000287, INTEGER_OFFSET, 42282, - 0x000288, 0x000288, INTEGER_OFFSET, -218, - 0x000289, 0x000289, INTEGER_OFFSET, -69, - 0x00028a, 0x00028b, INTEGER_OFFSET, -217, - 0x00028c, 0x00028c, INTEGER_OFFSET, -71, - 0x000292, 0x000292, INTEGER_OFFSET, -219, - 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, - 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, - 0x000345, 0x000345, DIRECT_MAPPING, 5, - 0x000370, 0x000373, ALTERNATING_AL, 0, - 0x000376, 0x000377, ALTERNATING_AL, 0, - 0x00037b, 0x00037d, INTEGER_OFFSET, 130, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000391, 0x000391, INTEGER_OFFSET, 32, - 0x000392, 0x000392, DIRECT_MAPPING, 6, - 0x000393, 0x000394, INTEGER_OFFSET, 32, - 0x000395, 0x000395, DIRECT_MAPPING, 7, - 0x000396, 0x000397, INTEGER_OFFSET, 32, - 0x000398, 0x000398, DIRECT_MAPPING, 8, - 0x000399, 0x000399, DIRECT_MAPPING, 5, - 0x00039a, 0x00039a, DIRECT_MAPPING, 9, - 0x00039b, 0x00039b, INTEGER_OFFSET, 32, - 0x00039c, 0x00039c, DIRECT_MAPPING, 0, - 0x00039d, 0x00039f, INTEGER_OFFSET, 32, - 0x0003a0, 0x0003a0, DIRECT_MAPPING, 10, - 0x0003a1, 0x0003a1, DIRECT_MAPPING, 11, - 0x0003a3, 0x0003a3, DIRECT_MAPPING, 12, - 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, - 0x0003a6, 0x0003a6, DIRECT_MAPPING, 13, - 0x0003a7, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, - 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, - 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, - 0x0003b2, 0x0003b2, DIRECT_MAPPING, 6, - 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, - 0x0003b5, 0x0003b5, DIRECT_MAPPING, 7, - 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, - 0x0003b8, 0x0003b8, DIRECT_MAPPING, 8, - 0x0003b9, 0x0003b9, DIRECT_MAPPING, 5, - 0x0003ba, 0x0003ba, DIRECT_MAPPING, 9, - 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, - 0x0003bc, 0x0003bc, DIRECT_MAPPING, 0, - 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, - 0x0003c0, 0x0003c0, DIRECT_MAPPING, 10, - 0x0003c1, 0x0003c1, DIRECT_MAPPING, 11, - 0x0003c2, 0x0003c3, DIRECT_MAPPING, 12, - 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, - 0x0003c6, 0x0003c6, DIRECT_MAPPING, 13, - 0x0003c7, 0x0003cb, INTEGER_OFFSET, -32, - 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, - 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, DIRECT_MAPPING, 6, - 0x0003d1, 0x0003d1, DIRECT_MAPPING, 8, - 0x0003d5, 0x0003d5, DIRECT_MAPPING, 13, - 0x0003d6, 0x0003d6, DIRECT_MAPPING, 10, - 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, - 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, - 0x0003f0, 0x0003f0, DIRECT_MAPPING, 9, - 0x0003f1, 0x0003f1, DIRECT_MAPPING, 11, - 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, - 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, - 0x0003f5, 0x0003f5, DIRECT_MAPPING, 7, - 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x000411, INTEGER_OFFSET, 32, - 0x000412, 0x000412, DIRECT_MAPPING, 14, - 0x000413, 0x000413, INTEGER_OFFSET, 32, - 0x000414, 0x000414, DIRECT_MAPPING, 15, - 0x000415, 0x00041d, INTEGER_OFFSET, 32, - 0x00041e, 0x00041e, DIRECT_MAPPING, 16, - 0x00041f, 0x000420, INTEGER_OFFSET, 32, - 0x000421, 0x000421, DIRECT_MAPPING, 17, - 0x000422, 0x000422, DIRECT_MAPPING, 18, - 0x000423, 0x000429, INTEGER_OFFSET, 32, - 0x00042a, 0x00042a, DIRECT_MAPPING, 19, - 0x00042b, 0x00042f, INTEGER_OFFSET, 32, - 0x000430, 0x000431, INTEGER_OFFSET, -32, - 0x000432, 0x000432, DIRECT_MAPPING, 14, - 0x000433, 0x000433, INTEGER_OFFSET, -32, - 0x000434, 0x000434, DIRECT_MAPPING, 15, - 0x000435, 0x00043d, INTEGER_OFFSET, -32, - 0x00043e, 0x00043e, DIRECT_MAPPING, 16, - 0x00043f, 0x000440, INTEGER_OFFSET, -32, - 0x000441, 0x000441, DIRECT_MAPPING, 17, - 0x000442, 0x000442, DIRECT_MAPPING, 18, - 0x000443, 0x000449, INTEGER_OFFSET, -32, - 0x00044a, 0x00044a, DIRECT_MAPPING, 19, - 0x00044b, 0x00044f, INTEGER_OFFSET, -32, - 0x000450, 0x00045f, INTEGER_OFFSET, -80, - 0x000460, 0x000461, ALTERNATING_AL, 0, - 0x000462, 0x000463, DIRECT_MAPPING, 20, - 0x000464, 0x000481, ALTERNATING_AL, 0, - 0x00048a, 0x0004bf, ALTERNATING_AL, 0, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, - 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, - 0x0004d0, 0x00052f, ALTERNATING_AL, 0, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x000561, 0x000586, INTEGER_OFFSET, -48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, - 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, - 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, - 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, DIRECT_MAPPING, 14, - 0x001c81, 0x001c81, DIRECT_MAPPING, 15, - 0x001c82, 0x001c82, DIRECT_MAPPING, 16, - 0x001c83, 0x001c83, DIRECT_MAPPING, 17, - 0x001c84, 0x001c85, DIRECT_MAPPING, 18, - 0x001c86, 0x001c86, DIRECT_MAPPING, 19, - 0x001c87, 0x001c87, DIRECT_MAPPING, 20, - 0x001c88, 0x001c88, DIRECT_MAPPING, 21, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, - 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, - 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, - 0x001e00, 0x001e5f, ALTERNATING_AL, 0, - 0x001e60, 0x001e61, DIRECT_MAPPING, 22, - 0x001e62, 0x001e95, ALTERNATING_AL, 0, - 0x001e9b, 0x001e9b, DIRECT_MAPPING, 22, - 0x001ea0, 0x001eff, ALTERNATING_AL, 0, - 0x001f00, 0x001f07, INTEGER_OFFSET, 8, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f10, 0x001f15, INTEGER_OFFSET, 8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f20, 0x001f27, INTEGER_OFFSET, 8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f30, 0x001f37, INTEGER_OFFSET, 8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f40, 0x001f45, INTEGER_OFFSET, 8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f51, 0x001f51, INTEGER_OFFSET, 8, - 0x001f53, 0x001f53, INTEGER_OFFSET, 8, - 0x001f55, 0x001f55, INTEGER_OFFSET, 8, - 0x001f57, 0x001f57, INTEGER_OFFSET, 8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f60, 0x001f67, INTEGER_OFFSET, 8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f70, 0x001f71, INTEGER_OFFSET, 74, - 0x001f72, 0x001f75, INTEGER_OFFSET, 86, - 0x001f76, 0x001f77, INTEGER_OFFSET, 100, - 0x001f78, 0x001f79, INTEGER_OFFSET, 128, - 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, - 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, - 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbe, 0x001fbe, DIRECT_MAPPING, 5, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, - 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x00214e, 0x00214e, INTEGER_OFFSET, -28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002170, 0x00217f, INTEGER_OFFSET, -16, - 0x002183, 0x002184, ALTERNATING_UL, 0, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, - 0x002c60, 0x002c61, ALTERNATING_AL, 0, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, - 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, - 0x002c67, 0x002c6c, ALTERNATING_UL, 0, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c73, ALTERNATING_AL, 0, - 0x002c75, 0x002c76, ALTERNATING_UL, 0, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002ce3, ALTERNATING_AL, 0, - 0x002ceb, 0x002cee, ALTERNATING_UL, 0, - 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, - 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, - 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, - 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, - 0x00a640, 0x00a649, ALTERNATING_AL, 0, - 0x00a64a, 0x00a64b, DIRECT_MAPPING, 21, - 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, - 0x00a680, 0x00a69b, ALTERNATING_AL, 0, - 0x00a722, 0x00a72f, ALTERNATING_AL, 0, - 0x00a732, 0x00a76f, ALTERNATING_AL, 0, - 0x00a779, 0x00a77c, ALTERNATING_UL, 0, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a787, ALTERNATING_AL, 0, - 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a793, ALTERNATING_AL, 0, - 0x00a794, 0x00a794, INTEGER_OFFSET, 48, - 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, - 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, - 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, - 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, - 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32 - }); - - public static final CaseFoldTableImpl UNICODE_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00004a, INTEGER_OFFSET, 32, - 0x00004b, 0x00004b, DIRECT_MAPPING, 23, - 0x00004c, 0x000052, INTEGER_OFFSET, 32, - 0x000053, 0x000053, DIRECT_MAPPING, 24, - 0x000054, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x00006a, INTEGER_OFFSET, -32, - 0x00006b, 0x00006b, DIRECT_MAPPING, 23, - 0x00006c, 0x000072, INTEGER_OFFSET, -32, - 0x000073, 0x000073, DIRECT_MAPPING, 24, - 0x000074, 0x00007a, INTEGER_OFFSET, -32, - 0x0000b5, 0x0000b5, DIRECT_MAPPING, 0, - 0x0000c0, 0x0000c4, INTEGER_OFFSET, 32, - 0x0000c5, 0x0000c5, DIRECT_MAPPING, 25, - 0x0000c6, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x0000df, 0x0000df, INTEGER_OFFSET, 7615, - 0x0000e0, 0x0000e4, INTEGER_OFFSET, -32, - 0x0000e5, 0x0000e5, DIRECT_MAPPING, 25, - 0x0000e6, 0x0000f6, INTEGER_OFFSET, -32, - 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, - 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, - 0x000100, 0x00012f, ALTERNATING_AL, 0, - 0x000132, 0x000137, ALTERNATING_AL, 0, - 0x000139, 0x000148, ALTERNATING_UL, 0, - 0x00014a, 0x000177, ALTERNATING_AL, 0, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x00017e, ALTERNATING_UL, 0, - 0x00017f, 0x00017f, DIRECT_MAPPING, 24, - 0x000180, 0x000180, INTEGER_OFFSET, 195, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000185, ALTERNATING_AL, 0, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000188, ALTERNATING_UL, 0, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018c, ALTERNATING_UL, 0, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000192, ALTERNATING_UL, 0, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000195, 0x000195, INTEGER_OFFSET, 97, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000199, ALTERNATING_AL, 0, - 0x00019a, 0x00019a, INTEGER_OFFSET, 163, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019e, 0x00019e, INTEGER_OFFSET, 130, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001b0, ALTERNATING_UL, 0, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, - 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, - 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, - 0x0001c4, 0x0001c6, DIRECT_MAPPING, 1, - 0x0001c7, 0x0001c9, DIRECT_MAPPING, 2, - 0x0001ca, 0x0001cc, DIRECT_MAPPING, 3, - 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, - 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, - 0x0001de, 0x0001ef, ALTERNATING_AL, 0, - 0x0001f1, 0x0001f3, DIRECT_MAPPING, 4, - 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x00021f, ALTERNATING_AL, 0, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000233, ALTERNATING_AL, 0, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023c, ALTERNATING_UL, 0, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x00023f, 0x000240, INTEGER_OFFSET, 10815, - 0x000241, 0x000242, ALTERNATING_UL, 0, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x00024f, ALTERNATING_AL, 0, - 0x000250, 0x000250, INTEGER_OFFSET, 10783, - 0x000251, 0x000251, INTEGER_OFFSET, 10780, - 0x000252, 0x000252, INTEGER_OFFSET, 10782, - 0x000253, 0x000253, INTEGER_OFFSET, -210, - 0x000254, 0x000254, INTEGER_OFFSET, -206, - 0x000256, 0x000257, INTEGER_OFFSET, -205, - 0x000259, 0x000259, INTEGER_OFFSET, -202, - 0x00025b, 0x00025b, INTEGER_OFFSET, -203, - 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, - 0x000260, 0x000260, INTEGER_OFFSET, -205, - 0x000261, 0x000261, INTEGER_OFFSET, 42315, - 0x000263, 0x000263, INTEGER_OFFSET, -207, - 0x000265, 0x000265, INTEGER_OFFSET, 42280, - 0x000266, 0x000266, INTEGER_OFFSET, 42308, - 0x000268, 0x000268, INTEGER_OFFSET, -209, - 0x000269, 0x000269, INTEGER_OFFSET, -211, - 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, - 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, - 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, - 0x00026f, 0x00026f, INTEGER_OFFSET, -211, - 0x000271, 0x000271, INTEGER_OFFSET, 10749, - 0x000272, 0x000272, INTEGER_OFFSET, -213, - 0x000275, 0x000275, INTEGER_OFFSET, -214, - 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, - 0x000280, 0x000280, INTEGER_OFFSET, -218, - 0x000282, 0x000282, INTEGER_OFFSET, 42307, - 0x000283, 0x000283, INTEGER_OFFSET, -218, - 0x000287, 0x000287, INTEGER_OFFSET, 42282, - 0x000288, 0x000288, INTEGER_OFFSET, -218, - 0x000289, 0x000289, INTEGER_OFFSET, -69, - 0x00028a, 0x00028b, INTEGER_OFFSET, -217, - 0x00028c, 0x00028c, INTEGER_OFFSET, -71, - 0x000292, 0x000292, INTEGER_OFFSET, -219, - 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, - 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, - 0x000345, 0x000345, DIRECT_MAPPING, 5, - 0x000370, 0x000373, ALTERNATING_AL, 0, - 0x000376, 0x000377, ALTERNATING_AL, 0, - 0x00037b, 0x00037d, INTEGER_OFFSET, 130, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000391, 0x000391, INTEGER_OFFSET, 32, - 0x000392, 0x000392, DIRECT_MAPPING, 6, - 0x000393, 0x000394, INTEGER_OFFSET, 32, - 0x000395, 0x000395, DIRECT_MAPPING, 7, - 0x000396, 0x000397, INTEGER_OFFSET, 32, - 0x000398, 0x000398, DIRECT_MAPPING, 26, - 0x000399, 0x000399, DIRECT_MAPPING, 5, - 0x00039a, 0x00039a, DIRECT_MAPPING, 9, - 0x00039b, 0x00039b, INTEGER_OFFSET, 32, - 0x00039c, 0x00039c, DIRECT_MAPPING, 0, - 0x00039d, 0x00039f, INTEGER_OFFSET, 32, - 0x0003a0, 0x0003a0, DIRECT_MAPPING, 10, - 0x0003a1, 0x0003a1, DIRECT_MAPPING, 11, - 0x0003a3, 0x0003a3, DIRECT_MAPPING, 12, - 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, - 0x0003a6, 0x0003a6, DIRECT_MAPPING, 13, - 0x0003a7, 0x0003a8, INTEGER_OFFSET, 32, - 0x0003a9, 0x0003a9, DIRECT_MAPPING, 27, - 0x0003aa, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, - 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, - 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, - 0x0003b2, 0x0003b2, DIRECT_MAPPING, 6, - 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, - 0x0003b5, 0x0003b5, DIRECT_MAPPING, 7, - 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, - 0x0003b8, 0x0003b8, DIRECT_MAPPING, 26, - 0x0003b9, 0x0003b9, DIRECT_MAPPING, 5, - 0x0003ba, 0x0003ba, DIRECT_MAPPING, 9, - 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, - 0x0003bc, 0x0003bc, DIRECT_MAPPING, 0, - 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, - 0x0003c0, 0x0003c0, DIRECT_MAPPING, 10, - 0x0003c1, 0x0003c1, DIRECT_MAPPING, 11, - 0x0003c2, 0x0003c3, DIRECT_MAPPING, 12, - 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, - 0x0003c6, 0x0003c6, DIRECT_MAPPING, 13, - 0x0003c7, 0x0003c8, INTEGER_OFFSET, -32, - 0x0003c9, 0x0003c9, DIRECT_MAPPING, 27, - 0x0003ca, 0x0003cb, INTEGER_OFFSET, -32, - 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, - 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, DIRECT_MAPPING, 6, - 0x0003d1, 0x0003d1, DIRECT_MAPPING, 26, - 0x0003d5, 0x0003d5, DIRECT_MAPPING, 13, - 0x0003d6, 0x0003d6, DIRECT_MAPPING, 10, - 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, - 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, - 0x0003f0, 0x0003f0, DIRECT_MAPPING, 9, - 0x0003f1, 0x0003f1, DIRECT_MAPPING, 11, - 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, - 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, - 0x0003f4, 0x0003f4, DIRECT_MAPPING, 26, - 0x0003f5, 0x0003f5, DIRECT_MAPPING, 7, - 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x000411, INTEGER_OFFSET, 32, - 0x000412, 0x000412, DIRECT_MAPPING, 14, - 0x000413, 0x000413, INTEGER_OFFSET, 32, - 0x000414, 0x000414, DIRECT_MAPPING, 15, - 0x000415, 0x00041d, INTEGER_OFFSET, 32, - 0x00041e, 0x00041e, DIRECT_MAPPING, 16, - 0x00041f, 0x000420, INTEGER_OFFSET, 32, - 0x000421, 0x000421, DIRECT_MAPPING, 17, - 0x000422, 0x000422, DIRECT_MAPPING, 18, - 0x000423, 0x000429, INTEGER_OFFSET, 32, - 0x00042a, 0x00042a, DIRECT_MAPPING, 19, - 0x00042b, 0x00042f, INTEGER_OFFSET, 32, - 0x000430, 0x000431, INTEGER_OFFSET, -32, - 0x000432, 0x000432, DIRECT_MAPPING, 14, - 0x000433, 0x000433, INTEGER_OFFSET, -32, - 0x000434, 0x000434, DIRECT_MAPPING, 15, - 0x000435, 0x00043d, INTEGER_OFFSET, -32, - 0x00043e, 0x00043e, DIRECT_MAPPING, 16, - 0x00043f, 0x000440, INTEGER_OFFSET, -32, - 0x000441, 0x000441, DIRECT_MAPPING, 17, - 0x000442, 0x000442, DIRECT_MAPPING, 18, - 0x000443, 0x000449, INTEGER_OFFSET, -32, - 0x00044a, 0x00044a, DIRECT_MAPPING, 19, - 0x00044b, 0x00044f, INTEGER_OFFSET, -32, - 0x000450, 0x00045f, INTEGER_OFFSET, -80, - 0x000460, 0x000461, ALTERNATING_AL, 0, - 0x000462, 0x000463, DIRECT_MAPPING, 20, - 0x000464, 0x000481, ALTERNATING_AL, 0, - 0x00048a, 0x0004bf, ALTERNATING_AL, 0, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, - 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, - 0x0004d0, 0x00052f, ALTERNATING_AL, 0, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x000561, 0x000586, INTEGER_OFFSET, -48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, - 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, - 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, - 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, DIRECT_MAPPING, 14, - 0x001c81, 0x001c81, DIRECT_MAPPING, 15, - 0x001c82, 0x001c82, DIRECT_MAPPING, 16, - 0x001c83, 0x001c83, DIRECT_MAPPING, 17, - 0x001c84, 0x001c85, DIRECT_MAPPING, 18, - 0x001c86, 0x001c86, DIRECT_MAPPING, 19, - 0x001c87, 0x001c87, DIRECT_MAPPING, 20, - 0x001c88, 0x001c88, DIRECT_MAPPING, 21, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, - 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, - 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, - 0x001e00, 0x001e5f, ALTERNATING_AL, 0, - 0x001e60, 0x001e61, DIRECT_MAPPING, 22, - 0x001e62, 0x001e95, ALTERNATING_AL, 0, - 0x001e9b, 0x001e9b, DIRECT_MAPPING, 22, - 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, - 0x001ea0, 0x001eff, ALTERNATING_AL, 0, - 0x001f00, 0x001f07, INTEGER_OFFSET, 8, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f10, 0x001f15, INTEGER_OFFSET, 8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f20, 0x001f27, INTEGER_OFFSET, 8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f30, 0x001f37, INTEGER_OFFSET, 8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f40, 0x001f45, INTEGER_OFFSET, 8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f51, 0x001f51, INTEGER_OFFSET, 8, - 0x001f53, 0x001f53, INTEGER_OFFSET, 8, - 0x001f55, 0x001f55, INTEGER_OFFSET, 8, - 0x001f57, 0x001f57, INTEGER_OFFSET, 8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f60, 0x001f67, INTEGER_OFFSET, 8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f70, 0x001f71, INTEGER_OFFSET, 74, - 0x001f72, 0x001f75, INTEGER_OFFSET, 86, - 0x001f76, 0x001f77, INTEGER_OFFSET, 100, - 0x001f78, 0x001f79, INTEGER_OFFSET, 128, - 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, - 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, - 0x001f80, 0x001f87, INTEGER_OFFSET, 8, - 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, - 0x001f90, 0x001f97, INTEGER_OFFSET, 8, - 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, - 0x001fa0, 0x001fa7, INTEGER_OFFSET, 8, - 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, - 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, - 0x001fb3, 0x001fb3, INTEGER_OFFSET, 9, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, - 0x001fbe, 0x001fbe, DIRECT_MAPPING, 5, - 0x001fc3, 0x001fc3, INTEGER_OFFSET, 9, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, - 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, - 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff3, 0x001ff3, INTEGER_OFFSET, 9, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, - 0x002126, 0x002126, DIRECT_MAPPING, 27, - 0x00212a, 0x00212a, DIRECT_MAPPING, 23, - 0x00212b, 0x00212b, DIRECT_MAPPING, 25, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x00214e, 0x00214e, INTEGER_OFFSET, -28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002170, 0x00217f, INTEGER_OFFSET, -16, - 0x002183, 0x002184, ALTERNATING_UL, 0, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, - 0x002c60, 0x002c61, ALTERNATING_AL, 0, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, - 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, - 0x002c67, 0x002c6c, ALTERNATING_UL, 0, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c73, ALTERNATING_AL, 0, - 0x002c75, 0x002c76, ALTERNATING_UL, 0, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002ce3, ALTERNATING_AL, 0, - 0x002ceb, 0x002cee, ALTERNATING_UL, 0, - 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, - 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, - 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, - 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, - 0x00a640, 0x00a649, ALTERNATING_AL, 0, - 0x00a64a, 0x00a64b, DIRECT_MAPPING, 21, - 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, - 0x00a680, 0x00a69b, ALTERNATING_AL, 0, - 0x00a722, 0x00a72f, ALTERNATING_AL, 0, - 0x00a732, 0x00a76f, ALTERNATING_AL, 0, - 0x00a779, 0x00a77c, ALTERNATING_UL, 0, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a787, ALTERNATING_AL, 0, - 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a793, ALTERNATING_AL, 0, - 0x00a794, 0x00a794, INTEGER_OFFSET, 48, - 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, - 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, - 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, - 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, - 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32, - 0x010400, 0x010427, INTEGER_OFFSET, 40, - 0x010428, 0x01044f, INTEGER_OFFSET, -40, - 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, - 0x0104d8, 0x0104fb, INTEGER_OFFSET, -40, - 0x010570, 0x01057a, INTEGER_OFFSET, 39, - 0x01057c, 0x01058a, INTEGER_OFFSET, 39, - 0x01058c, 0x010592, INTEGER_OFFSET, 39, - 0x010594, 0x010595, INTEGER_OFFSET, 39, - 0x010597, 0x0105a1, INTEGER_OFFSET, -39, - 0x0105a3, 0x0105b1, INTEGER_OFFSET, -39, - 0x0105b3, 0x0105b9, INTEGER_OFFSET, -39, - 0x0105bb, 0x0105bc, INTEGER_OFFSET, -39, - 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, - 0x010cc0, 0x010cf2, INTEGER_OFFSET, -64, - 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, - 0x0118c0, 0x0118df, INTEGER_OFFSET, -32, - 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, - 0x016e60, 0x016e7f, INTEGER_OFFSET, -32, - 0x01e900, 0x01e921, INTEGER_OFFSET, 34, - 0x01e922, 0x01e943, INTEGER_OFFSET, -34 - }); - - public static final CaseFoldTableImpl PYTHON_ASCII_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x00007a, INTEGER_OFFSET, -32 - }); - - public static final CaseFoldTableImpl PYTHON_UNICODE_TABLE_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x000048, INTEGER_OFFSET, 32, - 0x000049, 0x000049, DIRECT_MAPPING, 28, - 0x00004a, 0x00004a, INTEGER_OFFSET, 32, - 0x00004b, 0x00004b, DIRECT_MAPPING, 23, - 0x00004c, 0x000052, INTEGER_OFFSET, 32, - 0x000053, 0x000053, DIRECT_MAPPING, 24, - 0x000054, 0x00005a, INTEGER_OFFSET, 32, - 0x000061, 0x000068, INTEGER_OFFSET, -32, - 0x000069, 0x000069, DIRECT_MAPPING, 28, - 0x00006a, 0x00006a, INTEGER_OFFSET, -32, - 0x00006b, 0x00006b, DIRECT_MAPPING, 23, - 0x00006c, 0x000072, INTEGER_OFFSET, -32, - 0x000073, 0x000073, DIRECT_MAPPING, 24, - 0x000074, 0x00007a, INTEGER_OFFSET, -32, - 0x0000b5, 0x0000b5, DIRECT_MAPPING, 0, - 0x0000c0, 0x0000c4, INTEGER_OFFSET, 32, - 0x0000c5, 0x0000c5, DIRECT_MAPPING, 25, - 0x0000c6, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x0000df, 0x0000df, INTEGER_OFFSET, 7615, - 0x0000e0, 0x0000e4, INTEGER_OFFSET, -32, - 0x0000e5, 0x0000e5, DIRECT_MAPPING, 25, - 0x0000e6, 0x0000f6, INTEGER_OFFSET, -32, - 0x0000f8, 0x0000fe, INTEGER_OFFSET, -32, - 0x0000ff, 0x0000ff, INTEGER_OFFSET, 121, - 0x000100, 0x00012f, ALTERNATING_AL, 0, - 0x000130, 0x000131, DIRECT_MAPPING, 28, - 0x000132, 0x000137, ALTERNATING_AL, 0, - 0x000139, 0x000148, ALTERNATING_UL, 0, - 0x00014a, 0x000177, ALTERNATING_AL, 0, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x00017e, ALTERNATING_UL, 0, - 0x00017f, 0x00017f, DIRECT_MAPPING, 24, - 0x000180, 0x000180, INTEGER_OFFSET, 195, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000185, ALTERNATING_AL, 0, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000188, ALTERNATING_UL, 0, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018c, ALTERNATING_UL, 0, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000192, ALTERNATING_UL, 0, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000195, 0x000195, INTEGER_OFFSET, 97, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000199, ALTERNATING_AL, 0, - 0x00019a, 0x00019a, INTEGER_OFFSET, 163, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019e, 0x00019e, INTEGER_OFFSET, 130, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a5, ALTERNATING_AL, 0, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a8, ALTERNATING_UL, 0, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ad, ALTERNATING_AL, 0, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001b0, ALTERNATING_UL, 0, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b6, ALTERNATING_UL, 0, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b9, ALTERNATING_AL, 0, - 0x0001bc, 0x0001bd, ALTERNATING_AL, 0, - 0x0001bf, 0x0001bf, INTEGER_OFFSET, 56, - 0x0001c4, 0x0001c6, DIRECT_MAPPING, 1, - 0x0001c7, 0x0001c9, DIRECT_MAPPING, 2, - 0x0001ca, 0x0001cc, DIRECT_MAPPING, 3, - 0x0001cd, 0x0001dc, ALTERNATING_UL, 0, - 0x0001dd, 0x0001dd, INTEGER_OFFSET, -79, - 0x0001de, 0x0001ef, ALTERNATING_AL, 0, - 0x0001f1, 0x0001f3, DIRECT_MAPPING, 4, - 0x0001f4, 0x0001f5, ALTERNATING_AL, 0, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x00021f, ALTERNATING_AL, 0, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000233, ALTERNATING_AL, 0, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023c, ALTERNATING_UL, 0, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x00023f, 0x000240, INTEGER_OFFSET, 10815, - 0x000241, 0x000242, ALTERNATING_UL, 0, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x00024f, ALTERNATING_AL, 0, - 0x000250, 0x000250, INTEGER_OFFSET, 10783, - 0x000251, 0x000251, INTEGER_OFFSET, 10780, - 0x000252, 0x000252, INTEGER_OFFSET, 10782, - 0x000253, 0x000253, INTEGER_OFFSET, -210, - 0x000254, 0x000254, INTEGER_OFFSET, -206, - 0x000256, 0x000257, INTEGER_OFFSET, -205, - 0x000259, 0x000259, INTEGER_OFFSET, -202, - 0x00025b, 0x00025b, INTEGER_OFFSET, -203, - 0x00025c, 0x00025c, INTEGER_OFFSET, 42319, - 0x000260, 0x000260, INTEGER_OFFSET, -205, - 0x000261, 0x000261, INTEGER_OFFSET, 42315, - 0x000263, 0x000263, INTEGER_OFFSET, -207, - 0x000265, 0x000265, INTEGER_OFFSET, 42280, - 0x000266, 0x000266, INTEGER_OFFSET, 42308, - 0x000268, 0x000268, INTEGER_OFFSET, -209, - 0x000269, 0x000269, INTEGER_OFFSET, -211, - 0x00026a, 0x00026a, INTEGER_OFFSET, 42308, - 0x00026b, 0x00026b, INTEGER_OFFSET, 10743, - 0x00026c, 0x00026c, INTEGER_OFFSET, 42305, - 0x00026f, 0x00026f, INTEGER_OFFSET, -211, - 0x000271, 0x000271, INTEGER_OFFSET, 10749, - 0x000272, 0x000272, INTEGER_OFFSET, -213, - 0x000275, 0x000275, INTEGER_OFFSET, -214, - 0x00027d, 0x00027d, INTEGER_OFFSET, 10727, - 0x000280, 0x000280, INTEGER_OFFSET, -218, - 0x000282, 0x000282, INTEGER_OFFSET, 42307, - 0x000283, 0x000283, INTEGER_OFFSET, -218, - 0x000287, 0x000287, INTEGER_OFFSET, 42282, - 0x000288, 0x000288, INTEGER_OFFSET, -218, - 0x000289, 0x000289, INTEGER_OFFSET, -69, - 0x00028a, 0x00028b, INTEGER_OFFSET, -217, - 0x00028c, 0x00028c, INTEGER_OFFSET, -71, - 0x000292, 0x000292, INTEGER_OFFSET, -219, - 0x00029d, 0x00029d, INTEGER_OFFSET, 42261, - 0x00029e, 0x00029e, INTEGER_OFFSET, 42258, - 0x000345, 0x000345, DIRECT_MAPPING, 5, - 0x000370, 0x000373, ALTERNATING_AL, 0, - 0x000376, 0x000377, ALTERNATING_AL, 0, - 0x00037b, 0x00037d, INTEGER_OFFSET, 130, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000390, 0x000390, INTEGER_OFFSET, 7235, - 0x000391, 0x000391, INTEGER_OFFSET, 32, - 0x000392, 0x000392, DIRECT_MAPPING, 6, - 0x000393, 0x000394, INTEGER_OFFSET, 32, - 0x000395, 0x000395, DIRECT_MAPPING, 7, - 0x000396, 0x000397, INTEGER_OFFSET, 32, - 0x000398, 0x000398, DIRECT_MAPPING, 26, - 0x000399, 0x000399, DIRECT_MAPPING, 5, - 0x00039a, 0x00039a, DIRECT_MAPPING, 9, - 0x00039b, 0x00039b, INTEGER_OFFSET, 32, - 0x00039c, 0x00039c, DIRECT_MAPPING, 0, - 0x00039d, 0x00039f, INTEGER_OFFSET, 32, - 0x0003a0, 0x0003a0, DIRECT_MAPPING, 10, - 0x0003a1, 0x0003a1, DIRECT_MAPPING, 11, - 0x0003a3, 0x0003a3, DIRECT_MAPPING, 12, - 0x0003a4, 0x0003a5, INTEGER_OFFSET, 32, - 0x0003a6, 0x0003a6, DIRECT_MAPPING, 13, - 0x0003a7, 0x0003a8, INTEGER_OFFSET, 32, - 0x0003a9, 0x0003a9, DIRECT_MAPPING, 27, - 0x0003aa, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003ac, 0x0003ac, INTEGER_OFFSET, -38, - 0x0003ad, 0x0003af, INTEGER_OFFSET, -37, - 0x0003b0, 0x0003b0, INTEGER_OFFSET, 7219, - 0x0003b1, 0x0003b1, INTEGER_OFFSET, -32, - 0x0003b2, 0x0003b2, DIRECT_MAPPING, 6, - 0x0003b3, 0x0003b4, INTEGER_OFFSET, -32, - 0x0003b5, 0x0003b5, DIRECT_MAPPING, 7, - 0x0003b6, 0x0003b7, INTEGER_OFFSET, -32, - 0x0003b8, 0x0003b8, DIRECT_MAPPING, 26, - 0x0003b9, 0x0003b9, DIRECT_MAPPING, 5, - 0x0003ba, 0x0003ba, DIRECT_MAPPING, 9, - 0x0003bb, 0x0003bb, INTEGER_OFFSET, -32, - 0x0003bc, 0x0003bc, DIRECT_MAPPING, 0, - 0x0003bd, 0x0003bf, INTEGER_OFFSET, -32, - 0x0003c0, 0x0003c0, DIRECT_MAPPING, 10, - 0x0003c1, 0x0003c1, DIRECT_MAPPING, 11, - 0x0003c2, 0x0003c3, DIRECT_MAPPING, 12, - 0x0003c4, 0x0003c5, INTEGER_OFFSET, -32, - 0x0003c6, 0x0003c6, DIRECT_MAPPING, 13, - 0x0003c7, 0x0003c8, INTEGER_OFFSET, -32, - 0x0003c9, 0x0003c9, DIRECT_MAPPING, 27, - 0x0003ca, 0x0003cb, INTEGER_OFFSET, -32, - 0x0003cc, 0x0003cc, INTEGER_OFFSET, -64, - 0x0003cd, 0x0003ce, INTEGER_OFFSET, -63, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, DIRECT_MAPPING, 6, - 0x0003d1, 0x0003d1, DIRECT_MAPPING, 26, - 0x0003d5, 0x0003d5, DIRECT_MAPPING, 13, - 0x0003d6, 0x0003d6, DIRECT_MAPPING, 10, - 0x0003d7, 0x0003d7, INTEGER_OFFSET, -8, - 0x0003d8, 0x0003ef, ALTERNATING_AL, 0, - 0x0003f0, 0x0003f0, DIRECT_MAPPING, 9, - 0x0003f1, 0x0003f1, DIRECT_MAPPING, 11, - 0x0003f2, 0x0003f2, INTEGER_OFFSET, 7, - 0x0003f3, 0x0003f3, INTEGER_OFFSET, -116, - 0x0003f4, 0x0003f4, DIRECT_MAPPING, 26, - 0x0003f5, 0x0003f5, DIRECT_MAPPING, 7, - 0x0003f7, 0x0003f8, ALTERNATING_UL, 0, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fb, ALTERNATING_AL, 0, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x000411, INTEGER_OFFSET, 32, - 0x000412, 0x000412, DIRECT_MAPPING, 14, - 0x000413, 0x000413, INTEGER_OFFSET, 32, - 0x000414, 0x000414, DIRECT_MAPPING, 15, - 0x000415, 0x00041d, INTEGER_OFFSET, 32, - 0x00041e, 0x00041e, DIRECT_MAPPING, 16, - 0x00041f, 0x000420, INTEGER_OFFSET, 32, - 0x000421, 0x000421, DIRECT_MAPPING, 17, - 0x000422, 0x000422, DIRECT_MAPPING, 18, - 0x000423, 0x000429, INTEGER_OFFSET, 32, - 0x00042a, 0x00042a, DIRECT_MAPPING, 19, - 0x00042b, 0x00042f, INTEGER_OFFSET, 32, - 0x000430, 0x000431, INTEGER_OFFSET, -32, - 0x000432, 0x000432, DIRECT_MAPPING, 14, - 0x000433, 0x000433, INTEGER_OFFSET, -32, - 0x000434, 0x000434, DIRECT_MAPPING, 15, - 0x000435, 0x00043d, INTEGER_OFFSET, -32, - 0x00043e, 0x00043e, DIRECT_MAPPING, 16, - 0x00043f, 0x000440, INTEGER_OFFSET, -32, - 0x000441, 0x000441, DIRECT_MAPPING, 17, - 0x000442, 0x000442, DIRECT_MAPPING, 18, - 0x000443, 0x000449, INTEGER_OFFSET, -32, - 0x00044a, 0x00044a, DIRECT_MAPPING, 19, - 0x00044b, 0x00044f, INTEGER_OFFSET, -32, - 0x000450, 0x00045f, INTEGER_OFFSET, -80, - 0x000460, 0x000461, ALTERNATING_AL, 0, - 0x000462, 0x000463, DIRECT_MAPPING, 20, - 0x000464, 0x000481, ALTERNATING_AL, 0, - 0x00048a, 0x0004bf, ALTERNATING_AL, 0, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004ce, ALTERNATING_UL, 0, - 0x0004cf, 0x0004cf, INTEGER_OFFSET, -15, - 0x0004d0, 0x00052f, ALTERNATING_AL, 0, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x000561, 0x000586, INTEGER_OFFSET, -48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0010d0, 0x0010fa, INTEGER_OFFSET, 3008, - 0x0010fd, 0x0010ff, INTEGER_OFFSET, 3008, - 0x0013a0, 0x0013ef, INTEGER_OFFSET, 38864, - 0x0013f0, 0x0013f5, INTEGER_OFFSET, 8, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, DIRECT_MAPPING, 14, - 0x001c81, 0x001c81, DIRECT_MAPPING, 15, - 0x001c82, 0x001c82, DIRECT_MAPPING, 16, - 0x001c83, 0x001c83, DIRECT_MAPPING, 17, - 0x001c84, 0x001c85, DIRECT_MAPPING, 18, - 0x001c86, 0x001c86, DIRECT_MAPPING, 19, - 0x001c87, 0x001c87, DIRECT_MAPPING, 20, - 0x001c88, 0x001c88, DIRECT_MAPPING, 21, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001d79, 0x001d79, INTEGER_OFFSET, 35332, - 0x001d7d, 0x001d7d, INTEGER_OFFSET, 3814, - 0x001d8e, 0x001d8e, INTEGER_OFFSET, 35384, - 0x001e00, 0x001e5f, ALTERNATING_AL, 0, - 0x001e60, 0x001e61, DIRECT_MAPPING, 22, - 0x001e62, 0x001e95, ALTERNATING_AL, 0, - 0x001e9b, 0x001e9b, DIRECT_MAPPING, 22, - 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, - 0x001ea0, 0x001eff, ALTERNATING_AL, 0, - 0x001f00, 0x001f07, INTEGER_OFFSET, 8, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f10, 0x001f15, INTEGER_OFFSET, 8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f20, 0x001f27, INTEGER_OFFSET, 8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f30, 0x001f37, INTEGER_OFFSET, 8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f40, 0x001f45, INTEGER_OFFSET, 8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f51, 0x001f51, INTEGER_OFFSET, 8, - 0x001f53, 0x001f53, INTEGER_OFFSET, 8, - 0x001f55, 0x001f55, INTEGER_OFFSET, 8, - 0x001f57, 0x001f57, INTEGER_OFFSET, 8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f60, 0x001f67, INTEGER_OFFSET, 8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f70, 0x001f71, INTEGER_OFFSET, 74, - 0x001f72, 0x001f75, INTEGER_OFFSET, 86, - 0x001f76, 0x001f77, INTEGER_OFFSET, 100, - 0x001f78, 0x001f79, INTEGER_OFFSET, 128, - 0x001f7a, 0x001f7b, INTEGER_OFFSET, 112, - 0x001f7c, 0x001f7d, INTEGER_OFFSET, 126, - 0x001f80, 0x001f87, INTEGER_OFFSET, 8, - 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, - 0x001f90, 0x001f97, INTEGER_OFFSET, 8, - 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, - 0x001fa0, 0x001fa7, INTEGER_OFFSET, 8, - 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, - 0x001fb0, 0x001fb1, INTEGER_OFFSET, 8, - 0x001fb3, 0x001fb3, INTEGER_OFFSET, 9, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, - 0x001fbe, 0x001fbe, DIRECT_MAPPING, 5, - 0x001fc3, 0x001fc3, INTEGER_OFFSET, 9, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, - 0x001fd0, 0x001fd1, INTEGER_OFFSET, 8, - 0x001fd3, 0x001fd3, INTEGER_OFFSET, -7235, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe0, 0x001fe1, INTEGER_OFFSET, 8, - 0x001fe3, 0x001fe3, INTEGER_OFFSET, -7219, - 0x001fe5, 0x001fe5, INTEGER_OFFSET, 7, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff3, 0x001ff3, INTEGER_OFFSET, 9, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, - 0x002126, 0x002126, DIRECT_MAPPING, 27, - 0x00212a, 0x00212a, DIRECT_MAPPING, 23, - 0x00212b, 0x00212b, DIRECT_MAPPING, 25, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x00214e, 0x00214e, INTEGER_OFFSET, -28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002170, 0x00217f, INTEGER_OFFSET, -16, - 0x002183, 0x002184, ALTERNATING_UL, 0, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x0024d0, 0x0024e9, INTEGER_OFFSET, -26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c30, 0x002c5f, INTEGER_OFFSET, -48, - 0x002c60, 0x002c61, ALTERNATING_AL, 0, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c65, 0x002c65, INTEGER_OFFSET, -10795, - 0x002c66, 0x002c66, INTEGER_OFFSET, -10792, - 0x002c67, 0x002c6c, ALTERNATING_UL, 0, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c73, ALTERNATING_AL, 0, - 0x002c75, 0x002c76, ALTERNATING_UL, 0, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002ce3, ALTERNATING_AL, 0, - 0x002ceb, 0x002cee, ALTERNATING_UL, 0, - 0x002cf2, 0x002cf3, ALTERNATING_AL, 0, - 0x002d00, 0x002d25, INTEGER_OFFSET, -7264, - 0x002d27, 0x002d27, INTEGER_OFFSET, -7264, - 0x002d2d, 0x002d2d, INTEGER_OFFSET, -7264, - 0x00a640, 0x00a649, ALTERNATING_AL, 0, - 0x00a64a, 0x00a64b, DIRECT_MAPPING, 21, - 0x00a64c, 0x00a66d, ALTERNATING_AL, 0, - 0x00a680, 0x00a69b, ALTERNATING_AL, 0, - 0x00a722, 0x00a72f, ALTERNATING_AL, 0, - 0x00a732, 0x00a76f, ALTERNATING_AL, 0, - 0x00a779, 0x00a77c, ALTERNATING_UL, 0, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a787, ALTERNATING_AL, 0, - 0x00a78b, 0x00a78c, ALTERNATING_UL, 0, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a793, ALTERNATING_AL, 0, - 0x00a794, 0x00a794, INTEGER_OFFSET, 48, - 0x00a796, 0x00a7a9, ALTERNATING_AL, 0, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7c3, ALTERNATING_AL, 0, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7ca, ALTERNATING_UL, 0, - 0x00a7d0, 0x00a7d1, ALTERNATING_AL, 0, - 0x00a7d6, 0x00a7d9, ALTERNATING_AL, 0, - 0x00a7f5, 0x00a7f6, ALTERNATING_UL, 0, - 0x00ab53, 0x00ab53, INTEGER_OFFSET, -928, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00fb05, 0x00fb06, ALTERNATING_UL, 0, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x00ff41, 0x00ff5a, INTEGER_OFFSET, -32, - 0x010400, 0x010427, INTEGER_OFFSET, 40, - 0x010428, 0x01044f, INTEGER_OFFSET, -40, - 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, - 0x0104d8, 0x0104fb, INTEGER_OFFSET, -40, - 0x010570, 0x01057a, INTEGER_OFFSET, 39, - 0x01057c, 0x01058a, INTEGER_OFFSET, 39, - 0x01058c, 0x010592, INTEGER_OFFSET, 39, - 0x010594, 0x010595, INTEGER_OFFSET, 39, - 0x010597, 0x0105a1, INTEGER_OFFSET, -39, - 0x0105a3, 0x0105b1, INTEGER_OFFSET, -39, - 0x0105b3, 0x0105b9, INTEGER_OFFSET, -39, - 0x0105bb, 0x0105bc, INTEGER_OFFSET, -39, - 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, - 0x010cc0, 0x010cf2, INTEGER_OFFSET, -64, - 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, - 0x0118c0, 0x0118df, INTEGER_OFFSET, -32, - 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, - 0x016e60, 0x016e7f, INTEGER_OFFSET, -32, - 0x01e900, 0x01e921, INTEGER_OFFSET, 34, - 0x01e922, 0x01e943, INTEGER_OFFSET, -34 - }); - - public static final CaseFoldTableImpl SIMPLE_CASE_FOLDING_ENTRIES = new CaseFoldTableImpl(new int[]{ - 0x000041, 0x00005a, INTEGER_OFFSET, 32, - 0x0000b5, 0x0000b5, INTEGER_OFFSET, 775, - 0x0000c0, 0x0000d6, INTEGER_OFFSET, 32, - 0x0000d8, 0x0000de, INTEGER_OFFSET, 32, - 0x000100, 0x000100, INTEGER_OFFSET, 1, - 0x000102, 0x000102, INTEGER_OFFSET, 1, - 0x000104, 0x000104, INTEGER_OFFSET, 1, - 0x000106, 0x000106, INTEGER_OFFSET, 1, - 0x000108, 0x000108, INTEGER_OFFSET, 1, - 0x00010a, 0x00010a, INTEGER_OFFSET, 1, - 0x00010c, 0x00010c, INTEGER_OFFSET, 1, - 0x00010e, 0x00010e, INTEGER_OFFSET, 1, - 0x000110, 0x000110, INTEGER_OFFSET, 1, - 0x000112, 0x000112, INTEGER_OFFSET, 1, - 0x000114, 0x000114, INTEGER_OFFSET, 1, - 0x000116, 0x000116, INTEGER_OFFSET, 1, - 0x000118, 0x000118, INTEGER_OFFSET, 1, - 0x00011a, 0x00011a, INTEGER_OFFSET, 1, - 0x00011c, 0x00011c, INTEGER_OFFSET, 1, - 0x00011e, 0x00011e, INTEGER_OFFSET, 1, - 0x000120, 0x000120, INTEGER_OFFSET, 1, - 0x000122, 0x000122, INTEGER_OFFSET, 1, - 0x000124, 0x000124, INTEGER_OFFSET, 1, - 0x000126, 0x000126, INTEGER_OFFSET, 1, - 0x000128, 0x000128, INTEGER_OFFSET, 1, - 0x00012a, 0x00012a, INTEGER_OFFSET, 1, - 0x00012c, 0x00012c, INTEGER_OFFSET, 1, - 0x00012e, 0x00012e, INTEGER_OFFSET, 1, - 0x000132, 0x000132, INTEGER_OFFSET, 1, - 0x000134, 0x000134, INTEGER_OFFSET, 1, - 0x000136, 0x000136, INTEGER_OFFSET, 1, - 0x000139, 0x000139, INTEGER_OFFSET, 1, - 0x00013b, 0x00013b, INTEGER_OFFSET, 1, - 0x00013d, 0x00013d, INTEGER_OFFSET, 1, - 0x00013f, 0x00013f, INTEGER_OFFSET, 1, - 0x000141, 0x000141, INTEGER_OFFSET, 1, - 0x000143, 0x000143, INTEGER_OFFSET, 1, - 0x000145, 0x000145, INTEGER_OFFSET, 1, - 0x000147, 0x000147, INTEGER_OFFSET, 1, - 0x00014a, 0x00014a, INTEGER_OFFSET, 1, - 0x00014c, 0x00014c, INTEGER_OFFSET, 1, - 0x00014e, 0x00014e, INTEGER_OFFSET, 1, - 0x000150, 0x000150, INTEGER_OFFSET, 1, - 0x000152, 0x000152, INTEGER_OFFSET, 1, - 0x000154, 0x000154, INTEGER_OFFSET, 1, - 0x000156, 0x000156, INTEGER_OFFSET, 1, - 0x000158, 0x000158, INTEGER_OFFSET, 1, - 0x00015a, 0x00015a, INTEGER_OFFSET, 1, - 0x00015c, 0x00015c, INTEGER_OFFSET, 1, - 0x00015e, 0x00015e, INTEGER_OFFSET, 1, - 0x000160, 0x000160, INTEGER_OFFSET, 1, - 0x000162, 0x000162, INTEGER_OFFSET, 1, - 0x000164, 0x000164, INTEGER_OFFSET, 1, - 0x000166, 0x000166, INTEGER_OFFSET, 1, - 0x000168, 0x000168, INTEGER_OFFSET, 1, - 0x00016a, 0x00016a, INTEGER_OFFSET, 1, - 0x00016c, 0x00016c, INTEGER_OFFSET, 1, - 0x00016e, 0x00016e, INTEGER_OFFSET, 1, - 0x000170, 0x000170, INTEGER_OFFSET, 1, - 0x000172, 0x000172, INTEGER_OFFSET, 1, - 0x000174, 0x000174, INTEGER_OFFSET, 1, - 0x000176, 0x000176, INTEGER_OFFSET, 1, - 0x000178, 0x000178, INTEGER_OFFSET, -121, - 0x000179, 0x000179, INTEGER_OFFSET, 1, - 0x00017b, 0x00017b, INTEGER_OFFSET, 1, - 0x00017d, 0x00017d, INTEGER_OFFSET, 1, - 0x00017f, 0x00017f, INTEGER_OFFSET, -268, - 0x000181, 0x000181, INTEGER_OFFSET, 210, - 0x000182, 0x000182, INTEGER_OFFSET, 1, - 0x000184, 0x000184, INTEGER_OFFSET, 1, - 0x000186, 0x000186, INTEGER_OFFSET, 206, - 0x000187, 0x000187, INTEGER_OFFSET, 1, - 0x000189, 0x00018a, INTEGER_OFFSET, 205, - 0x00018b, 0x00018b, INTEGER_OFFSET, 1, - 0x00018e, 0x00018e, INTEGER_OFFSET, 79, - 0x00018f, 0x00018f, INTEGER_OFFSET, 202, - 0x000190, 0x000190, INTEGER_OFFSET, 203, - 0x000191, 0x000191, INTEGER_OFFSET, 1, - 0x000193, 0x000193, INTEGER_OFFSET, 205, - 0x000194, 0x000194, INTEGER_OFFSET, 207, - 0x000196, 0x000196, INTEGER_OFFSET, 211, - 0x000197, 0x000197, INTEGER_OFFSET, 209, - 0x000198, 0x000198, INTEGER_OFFSET, 1, - 0x00019c, 0x00019c, INTEGER_OFFSET, 211, - 0x00019d, 0x00019d, INTEGER_OFFSET, 213, - 0x00019f, 0x00019f, INTEGER_OFFSET, 214, - 0x0001a0, 0x0001a0, INTEGER_OFFSET, 1, - 0x0001a2, 0x0001a2, INTEGER_OFFSET, 1, - 0x0001a4, 0x0001a4, INTEGER_OFFSET, 1, - 0x0001a6, 0x0001a6, INTEGER_OFFSET, 218, - 0x0001a7, 0x0001a7, INTEGER_OFFSET, 1, - 0x0001a9, 0x0001a9, INTEGER_OFFSET, 218, - 0x0001ac, 0x0001ac, INTEGER_OFFSET, 1, - 0x0001ae, 0x0001ae, INTEGER_OFFSET, 218, - 0x0001af, 0x0001af, INTEGER_OFFSET, 1, - 0x0001b1, 0x0001b2, INTEGER_OFFSET, 217, - 0x0001b3, 0x0001b3, INTEGER_OFFSET, 1, - 0x0001b5, 0x0001b5, INTEGER_OFFSET, 1, - 0x0001b7, 0x0001b7, INTEGER_OFFSET, 219, - 0x0001b8, 0x0001b8, INTEGER_OFFSET, 1, - 0x0001bc, 0x0001bc, INTEGER_OFFSET, 1, - 0x0001c4, 0x0001c4, INTEGER_OFFSET, 2, - 0x0001c5, 0x0001c5, INTEGER_OFFSET, 1, - 0x0001c7, 0x0001c7, INTEGER_OFFSET, 2, - 0x0001c8, 0x0001c8, INTEGER_OFFSET, 1, - 0x0001ca, 0x0001ca, INTEGER_OFFSET, 2, - 0x0001cb, 0x0001cb, INTEGER_OFFSET, 1, - 0x0001cd, 0x0001cd, INTEGER_OFFSET, 1, - 0x0001cf, 0x0001cf, INTEGER_OFFSET, 1, - 0x0001d1, 0x0001d1, INTEGER_OFFSET, 1, - 0x0001d3, 0x0001d3, INTEGER_OFFSET, 1, - 0x0001d5, 0x0001d5, INTEGER_OFFSET, 1, - 0x0001d7, 0x0001d7, INTEGER_OFFSET, 1, - 0x0001d9, 0x0001d9, INTEGER_OFFSET, 1, - 0x0001db, 0x0001db, INTEGER_OFFSET, 1, - 0x0001de, 0x0001de, INTEGER_OFFSET, 1, - 0x0001e0, 0x0001e0, INTEGER_OFFSET, 1, - 0x0001e2, 0x0001e2, INTEGER_OFFSET, 1, - 0x0001e4, 0x0001e4, INTEGER_OFFSET, 1, - 0x0001e6, 0x0001e6, INTEGER_OFFSET, 1, - 0x0001e8, 0x0001e8, INTEGER_OFFSET, 1, - 0x0001ea, 0x0001ea, INTEGER_OFFSET, 1, - 0x0001ec, 0x0001ec, INTEGER_OFFSET, 1, - 0x0001ee, 0x0001ee, INTEGER_OFFSET, 1, - 0x0001f1, 0x0001f1, INTEGER_OFFSET, 2, - 0x0001f2, 0x0001f2, INTEGER_OFFSET, 1, - 0x0001f4, 0x0001f4, INTEGER_OFFSET, 1, - 0x0001f6, 0x0001f6, INTEGER_OFFSET, -97, - 0x0001f7, 0x0001f7, INTEGER_OFFSET, -56, - 0x0001f8, 0x0001f8, INTEGER_OFFSET, 1, - 0x0001fa, 0x0001fa, INTEGER_OFFSET, 1, - 0x0001fc, 0x0001fc, INTEGER_OFFSET, 1, - 0x0001fe, 0x0001fe, INTEGER_OFFSET, 1, - 0x000200, 0x000200, INTEGER_OFFSET, 1, - 0x000202, 0x000202, INTEGER_OFFSET, 1, - 0x000204, 0x000204, INTEGER_OFFSET, 1, - 0x000206, 0x000206, INTEGER_OFFSET, 1, - 0x000208, 0x000208, INTEGER_OFFSET, 1, - 0x00020a, 0x00020a, INTEGER_OFFSET, 1, - 0x00020c, 0x00020c, INTEGER_OFFSET, 1, - 0x00020e, 0x00020e, INTEGER_OFFSET, 1, - 0x000210, 0x000210, INTEGER_OFFSET, 1, - 0x000212, 0x000212, INTEGER_OFFSET, 1, - 0x000214, 0x000214, INTEGER_OFFSET, 1, - 0x000216, 0x000216, INTEGER_OFFSET, 1, - 0x000218, 0x000218, INTEGER_OFFSET, 1, - 0x00021a, 0x00021a, INTEGER_OFFSET, 1, - 0x00021c, 0x00021c, INTEGER_OFFSET, 1, - 0x00021e, 0x00021e, INTEGER_OFFSET, 1, - 0x000220, 0x000220, INTEGER_OFFSET, -130, - 0x000222, 0x000222, INTEGER_OFFSET, 1, - 0x000224, 0x000224, INTEGER_OFFSET, 1, - 0x000226, 0x000226, INTEGER_OFFSET, 1, - 0x000228, 0x000228, INTEGER_OFFSET, 1, - 0x00022a, 0x00022a, INTEGER_OFFSET, 1, - 0x00022c, 0x00022c, INTEGER_OFFSET, 1, - 0x00022e, 0x00022e, INTEGER_OFFSET, 1, - 0x000230, 0x000230, INTEGER_OFFSET, 1, - 0x000232, 0x000232, INTEGER_OFFSET, 1, - 0x00023a, 0x00023a, INTEGER_OFFSET, 10795, - 0x00023b, 0x00023b, INTEGER_OFFSET, 1, - 0x00023d, 0x00023d, INTEGER_OFFSET, -163, - 0x00023e, 0x00023e, INTEGER_OFFSET, 10792, - 0x000241, 0x000241, INTEGER_OFFSET, 1, - 0x000243, 0x000243, INTEGER_OFFSET, -195, - 0x000244, 0x000244, INTEGER_OFFSET, 69, - 0x000245, 0x000245, INTEGER_OFFSET, 71, - 0x000246, 0x000246, INTEGER_OFFSET, 1, - 0x000248, 0x000248, INTEGER_OFFSET, 1, - 0x00024a, 0x00024a, INTEGER_OFFSET, 1, - 0x00024c, 0x00024c, INTEGER_OFFSET, 1, - 0x00024e, 0x00024e, INTEGER_OFFSET, 1, - 0x000345, 0x000345, INTEGER_OFFSET, 116, - 0x000370, 0x000370, INTEGER_OFFSET, 1, - 0x000372, 0x000372, INTEGER_OFFSET, 1, - 0x000376, 0x000376, INTEGER_OFFSET, 1, - 0x00037f, 0x00037f, INTEGER_OFFSET, 116, - 0x000386, 0x000386, INTEGER_OFFSET, 38, - 0x000388, 0x00038a, INTEGER_OFFSET, 37, - 0x00038c, 0x00038c, INTEGER_OFFSET, 64, - 0x00038e, 0x00038f, INTEGER_OFFSET, 63, - 0x000391, 0x0003a1, INTEGER_OFFSET, 32, - 0x0003a3, 0x0003ab, INTEGER_OFFSET, 32, - 0x0003c2, 0x0003c2, INTEGER_OFFSET, 1, - 0x0003cf, 0x0003cf, INTEGER_OFFSET, 8, - 0x0003d0, 0x0003d0, INTEGER_OFFSET, -30, - 0x0003d1, 0x0003d1, INTEGER_OFFSET, -25, - 0x0003d5, 0x0003d5, INTEGER_OFFSET, -15, - 0x0003d6, 0x0003d6, INTEGER_OFFSET, -22, - 0x0003d8, 0x0003d8, INTEGER_OFFSET, 1, - 0x0003da, 0x0003da, INTEGER_OFFSET, 1, - 0x0003dc, 0x0003dc, INTEGER_OFFSET, 1, - 0x0003de, 0x0003de, INTEGER_OFFSET, 1, - 0x0003e0, 0x0003e0, INTEGER_OFFSET, 1, - 0x0003e2, 0x0003e2, INTEGER_OFFSET, 1, - 0x0003e4, 0x0003e4, INTEGER_OFFSET, 1, - 0x0003e6, 0x0003e6, INTEGER_OFFSET, 1, - 0x0003e8, 0x0003e8, INTEGER_OFFSET, 1, - 0x0003ea, 0x0003ea, INTEGER_OFFSET, 1, - 0x0003ec, 0x0003ec, INTEGER_OFFSET, 1, - 0x0003ee, 0x0003ee, INTEGER_OFFSET, 1, - 0x0003f0, 0x0003f0, INTEGER_OFFSET, -54, - 0x0003f1, 0x0003f1, INTEGER_OFFSET, -48, - 0x0003f4, 0x0003f4, INTEGER_OFFSET, -60, - 0x0003f5, 0x0003f5, INTEGER_OFFSET, -64, - 0x0003f7, 0x0003f7, INTEGER_OFFSET, 1, - 0x0003f9, 0x0003f9, INTEGER_OFFSET, -7, - 0x0003fa, 0x0003fa, INTEGER_OFFSET, 1, - 0x0003fd, 0x0003ff, INTEGER_OFFSET, -130, - 0x000400, 0x00040f, INTEGER_OFFSET, 80, - 0x000410, 0x00042f, INTEGER_OFFSET, 32, - 0x000460, 0x000460, INTEGER_OFFSET, 1, - 0x000462, 0x000462, INTEGER_OFFSET, 1, - 0x000464, 0x000464, INTEGER_OFFSET, 1, - 0x000466, 0x000466, INTEGER_OFFSET, 1, - 0x000468, 0x000468, INTEGER_OFFSET, 1, - 0x00046a, 0x00046a, INTEGER_OFFSET, 1, - 0x00046c, 0x00046c, INTEGER_OFFSET, 1, - 0x00046e, 0x00046e, INTEGER_OFFSET, 1, - 0x000470, 0x000470, INTEGER_OFFSET, 1, - 0x000472, 0x000472, INTEGER_OFFSET, 1, - 0x000474, 0x000474, INTEGER_OFFSET, 1, - 0x000476, 0x000476, INTEGER_OFFSET, 1, - 0x000478, 0x000478, INTEGER_OFFSET, 1, - 0x00047a, 0x00047a, INTEGER_OFFSET, 1, - 0x00047c, 0x00047c, INTEGER_OFFSET, 1, - 0x00047e, 0x00047e, INTEGER_OFFSET, 1, - 0x000480, 0x000480, INTEGER_OFFSET, 1, - 0x00048a, 0x00048a, INTEGER_OFFSET, 1, - 0x00048c, 0x00048c, INTEGER_OFFSET, 1, - 0x00048e, 0x00048e, INTEGER_OFFSET, 1, - 0x000490, 0x000490, INTEGER_OFFSET, 1, - 0x000492, 0x000492, INTEGER_OFFSET, 1, - 0x000494, 0x000494, INTEGER_OFFSET, 1, - 0x000496, 0x000496, INTEGER_OFFSET, 1, - 0x000498, 0x000498, INTEGER_OFFSET, 1, - 0x00049a, 0x00049a, INTEGER_OFFSET, 1, - 0x00049c, 0x00049c, INTEGER_OFFSET, 1, - 0x00049e, 0x00049e, INTEGER_OFFSET, 1, - 0x0004a0, 0x0004a0, INTEGER_OFFSET, 1, - 0x0004a2, 0x0004a2, INTEGER_OFFSET, 1, - 0x0004a4, 0x0004a4, INTEGER_OFFSET, 1, - 0x0004a6, 0x0004a6, INTEGER_OFFSET, 1, - 0x0004a8, 0x0004a8, INTEGER_OFFSET, 1, - 0x0004aa, 0x0004aa, INTEGER_OFFSET, 1, - 0x0004ac, 0x0004ac, INTEGER_OFFSET, 1, - 0x0004ae, 0x0004ae, INTEGER_OFFSET, 1, - 0x0004b0, 0x0004b0, INTEGER_OFFSET, 1, - 0x0004b2, 0x0004b2, INTEGER_OFFSET, 1, - 0x0004b4, 0x0004b4, INTEGER_OFFSET, 1, - 0x0004b6, 0x0004b6, INTEGER_OFFSET, 1, - 0x0004b8, 0x0004b8, INTEGER_OFFSET, 1, - 0x0004ba, 0x0004ba, INTEGER_OFFSET, 1, - 0x0004bc, 0x0004bc, INTEGER_OFFSET, 1, - 0x0004be, 0x0004be, INTEGER_OFFSET, 1, - 0x0004c0, 0x0004c0, INTEGER_OFFSET, 15, - 0x0004c1, 0x0004c1, INTEGER_OFFSET, 1, - 0x0004c3, 0x0004c3, INTEGER_OFFSET, 1, - 0x0004c5, 0x0004c5, INTEGER_OFFSET, 1, - 0x0004c7, 0x0004c7, INTEGER_OFFSET, 1, - 0x0004c9, 0x0004c9, INTEGER_OFFSET, 1, - 0x0004cb, 0x0004cb, INTEGER_OFFSET, 1, - 0x0004cd, 0x0004cd, INTEGER_OFFSET, 1, - 0x0004d0, 0x0004d0, INTEGER_OFFSET, 1, - 0x0004d2, 0x0004d2, INTEGER_OFFSET, 1, - 0x0004d4, 0x0004d4, INTEGER_OFFSET, 1, - 0x0004d6, 0x0004d6, INTEGER_OFFSET, 1, - 0x0004d8, 0x0004d8, INTEGER_OFFSET, 1, - 0x0004da, 0x0004da, INTEGER_OFFSET, 1, - 0x0004dc, 0x0004dc, INTEGER_OFFSET, 1, - 0x0004de, 0x0004de, INTEGER_OFFSET, 1, - 0x0004e0, 0x0004e0, INTEGER_OFFSET, 1, - 0x0004e2, 0x0004e2, INTEGER_OFFSET, 1, - 0x0004e4, 0x0004e4, INTEGER_OFFSET, 1, - 0x0004e6, 0x0004e6, INTEGER_OFFSET, 1, - 0x0004e8, 0x0004e8, INTEGER_OFFSET, 1, - 0x0004ea, 0x0004ea, INTEGER_OFFSET, 1, - 0x0004ec, 0x0004ec, INTEGER_OFFSET, 1, - 0x0004ee, 0x0004ee, INTEGER_OFFSET, 1, - 0x0004f0, 0x0004f0, INTEGER_OFFSET, 1, - 0x0004f2, 0x0004f2, INTEGER_OFFSET, 1, - 0x0004f4, 0x0004f4, INTEGER_OFFSET, 1, - 0x0004f6, 0x0004f6, INTEGER_OFFSET, 1, - 0x0004f8, 0x0004f8, INTEGER_OFFSET, 1, - 0x0004fa, 0x0004fa, INTEGER_OFFSET, 1, - 0x0004fc, 0x0004fc, INTEGER_OFFSET, 1, - 0x0004fe, 0x0004fe, INTEGER_OFFSET, 1, - 0x000500, 0x000500, INTEGER_OFFSET, 1, - 0x000502, 0x000502, INTEGER_OFFSET, 1, - 0x000504, 0x000504, INTEGER_OFFSET, 1, - 0x000506, 0x000506, INTEGER_OFFSET, 1, - 0x000508, 0x000508, INTEGER_OFFSET, 1, - 0x00050a, 0x00050a, INTEGER_OFFSET, 1, - 0x00050c, 0x00050c, INTEGER_OFFSET, 1, - 0x00050e, 0x00050e, INTEGER_OFFSET, 1, - 0x000510, 0x000510, INTEGER_OFFSET, 1, - 0x000512, 0x000512, INTEGER_OFFSET, 1, - 0x000514, 0x000514, INTEGER_OFFSET, 1, - 0x000516, 0x000516, INTEGER_OFFSET, 1, - 0x000518, 0x000518, INTEGER_OFFSET, 1, - 0x00051a, 0x00051a, INTEGER_OFFSET, 1, - 0x00051c, 0x00051c, INTEGER_OFFSET, 1, - 0x00051e, 0x00051e, INTEGER_OFFSET, 1, - 0x000520, 0x000520, INTEGER_OFFSET, 1, - 0x000522, 0x000522, INTEGER_OFFSET, 1, - 0x000524, 0x000524, INTEGER_OFFSET, 1, - 0x000526, 0x000526, INTEGER_OFFSET, 1, - 0x000528, 0x000528, INTEGER_OFFSET, 1, - 0x00052a, 0x00052a, INTEGER_OFFSET, 1, - 0x00052c, 0x00052c, INTEGER_OFFSET, 1, - 0x00052e, 0x00052e, INTEGER_OFFSET, 1, - 0x000531, 0x000556, INTEGER_OFFSET, 48, - 0x0010a0, 0x0010c5, INTEGER_OFFSET, 7264, - 0x0010c7, 0x0010c7, INTEGER_OFFSET, 7264, - 0x0010cd, 0x0010cd, INTEGER_OFFSET, 7264, - 0x0013f8, 0x0013fd, INTEGER_OFFSET, -8, - 0x001c80, 0x001c80, INTEGER_OFFSET, -6222, - 0x001c81, 0x001c81, INTEGER_OFFSET, -6221, - 0x001c82, 0x001c82, INTEGER_OFFSET, -6212, - 0x001c83, 0x001c84, INTEGER_OFFSET, -6210, - 0x001c85, 0x001c85, INTEGER_OFFSET, -6211, - 0x001c86, 0x001c86, INTEGER_OFFSET, -6204, - 0x001c87, 0x001c87, INTEGER_OFFSET, -6180, - 0x001c88, 0x001c88, INTEGER_OFFSET, 35267, - 0x001c90, 0x001cba, INTEGER_OFFSET, -3008, - 0x001cbd, 0x001cbf, INTEGER_OFFSET, -3008, - 0x001e00, 0x001e00, INTEGER_OFFSET, 1, - 0x001e02, 0x001e02, INTEGER_OFFSET, 1, - 0x001e04, 0x001e04, INTEGER_OFFSET, 1, - 0x001e06, 0x001e06, INTEGER_OFFSET, 1, - 0x001e08, 0x001e08, INTEGER_OFFSET, 1, - 0x001e0a, 0x001e0a, INTEGER_OFFSET, 1, - 0x001e0c, 0x001e0c, INTEGER_OFFSET, 1, - 0x001e0e, 0x001e0e, INTEGER_OFFSET, 1, - 0x001e10, 0x001e10, INTEGER_OFFSET, 1, - 0x001e12, 0x001e12, INTEGER_OFFSET, 1, - 0x001e14, 0x001e14, INTEGER_OFFSET, 1, - 0x001e16, 0x001e16, INTEGER_OFFSET, 1, - 0x001e18, 0x001e18, INTEGER_OFFSET, 1, - 0x001e1a, 0x001e1a, INTEGER_OFFSET, 1, - 0x001e1c, 0x001e1c, INTEGER_OFFSET, 1, - 0x001e1e, 0x001e1e, INTEGER_OFFSET, 1, - 0x001e20, 0x001e20, INTEGER_OFFSET, 1, - 0x001e22, 0x001e22, INTEGER_OFFSET, 1, - 0x001e24, 0x001e24, INTEGER_OFFSET, 1, - 0x001e26, 0x001e26, INTEGER_OFFSET, 1, - 0x001e28, 0x001e28, INTEGER_OFFSET, 1, - 0x001e2a, 0x001e2a, INTEGER_OFFSET, 1, - 0x001e2c, 0x001e2c, INTEGER_OFFSET, 1, - 0x001e2e, 0x001e2e, INTEGER_OFFSET, 1, - 0x001e30, 0x001e30, INTEGER_OFFSET, 1, - 0x001e32, 0x001e32, INTEGER_OFFSET, 1, - 0x001e34, 0x001e34, INTEGER_OFFSET, 1, - 0x001e36, 0x001e36, INTEGER_OFFSET, 1, - 0x001e38, 0x001e38, INTEGER_OFFSET, 1, - 0x001e3a, 0x001e3a, INTEGER_OFFSET, 1, - 0x001e3c, 0x001e3c, INTEGER_OFFSET, 1, - 0x001e3e, 0x001e3e, INTEGER_OFFSET, 1, - 0x001e40, 0x001e40, INTEGER_OFFSET, 1, - 0x001e42, 0x001e42, INTEGER_OFFSET, 1, - 0x001e44, 0x001e44, INTEGER_OFFSET, 1, - 0x001e46, 0x001e46, INTEGER_OFFSET, 1, - 0x001e48, 0x001e48, INTEGER_OFFSET, 1, - 0x001e4a, 0x001e4a, INTEGER_OFFSET, 1, - 0x001e4c, 0x001e4c, INTEGER_OFFSET, 1, - 0x001e4e, 0x001e4e, INTEGER_OFFSET, 1, - 0x001e50, 0x001e50, INTEGER_OFFSET, 1, - 0x001e52, 0x001e52, INTEGER_OFFSET, 1, - 0x001e54, 0x001e54, INTEGER_OFFSET, 1, - 0x001e56, 0x001e56, INTEGER_OFFSET, 1, - 0x001e58, 0x001e58, INTEGER_OFFSET, 1, - 0x001e5a, 0x001e5a, INTEGER_OFFSET, 1, - 0x001e5c, 0x001e5c, INTEGER_OFFSET, 1, - 0x001e5e, 0x001e5e, INTEGER_OFFSET, 1, - 0x001e60, 0x001e60, INTEGER_OFFSET, 1, - 0x001e62, 0x001e62, INTEGER_OFFSET, 1, - 0x001e64, 0x001e64, INTEGER_OFFSET, 1, - 0x001e66, 0x001e66, INTEGER_OFFSET, 1, - 0x001e68, 0x001e68, INTEGER_OFFSET, 1, - 0x001e6a, 0x001e6a, INTEGER_OFFSET, 1, - 0x001e6c, 0x001e6c, INTEGER_OFFSET, 1, - 0x001e6e, 0x001e6e, INTEGER_OFFSET, 1, - 0x001e70, 0x001e70, INTEGER_OFFSET, 1, - 0x001e72, 0x001e72, INTEGER_OFFSET, 1, - 0x001e74, 0x001e74, INTEGER_OFFSET, 1, - 0x001e76, 0x001e76, INTEGER_OFFSET, 1, - 0x001e78, 0x001e78, INTEGER_OFFSET, 1, - 0x001e7a, 0x001e7a, INTEGER_OFFSET, 1, - 0x001e7c, 0x001e7c, INTEGER_OFFSET, 1, - 0x001e7e, 0x001e7e, INTEGER_OFFSET, 1, - 0x001e80, 0x001e80, INTEGER_OFFSET, 1, - 0x001e82, 0x001e82, INTEGER_OFFSET, 1, - 0x001e84, 0x001e84, INTEGER_OFFSET, 1, - 0x001e86, 0x001e86, INTEGER_OFFSET, 1, - 0x001e88, 0x001e88, INTEGER_OFFSET, 1, - 0x001e8a, 0x001e8a, INTEGER_OFFSET, 1, - 0x001e8c, 0x001e8c, INTEGER_OFFSET, 1, - 0x001e8e, 0x001e8e, INTEGER_OFFSET, 1, - 0x001e90, 0x001e90, INTEGER_OFFSET, 1, - 0x001e92, 0x001e92, INTEGER_OFFSET, 1, - 0x001e94, 0x001e94, INTEGER_OFFSET, 1, - 0x001e9b, 0x001e9b, INTEGER_OFFSET, -58, - 0x001e9e, 0x001e9e, INTEGER_OFFSET, -7615, - 0x001ea0, 0x001ea0, INTEGER_OFFSET, 1, - 0x001ea2, 0x001ea2, INTEGER_OFFSET, 1, - 0x001ea4, 0x001ea4, INTEGER_OFFSET, 1, - 0x001ea6, 0x001ea6, INTEGER_OFFSET, 1, - 0x001ea8, 0x001ea8, INTEGER_OFFSET, 1, - 0x001eaa, 0x001eaa, INTEGER_OFFSET, 1, - 0x001eac, 0x001eac, INTEGER_OFFSET, 1, - 0x001eae, 0x001eae, INTEGER_OFFSET, 1, - 0x001eb0, 0x001eb0, INTEGER_OFFSET, 1, - 0x001eb2, 0x001eb2, INTEGER_OFFSET, 1, - 0x001eb4, 0x001eb4, INTEGER_OFFSET, 1, - 0x001eb6, 0x001eb6, INTEGER_OFFSET, 1, - 0x001eb8, 0x001eb8, INTEGER_OFFSET, 1, - 0x001eba, 0x001eba, INTEGER_OFFSET, 1, - 0x001ebc, 0x001ebc, INTEGER_OFFSET, 1, - 0x001ebe, 0x001ebe, INTEGER_OFFSET, 1, - 0x001ec0, 0x001ec0, INTEGER_OFFSET, 1, - 0x001ec2, 0x001ec2, INTEGER_OFFSET, 1, - 0x001ec4, 0x001ec4, INTEGER_OFFSET, 1, - 0x001ec6, 0x001ec6, INTEGER_OFFSET, 1, - 0x001ec8, 0x001ec8, INTEGER_OFFSET, 1, - 0x001eca, 0x001eca, INTEGER_OFFSET, 1, - 0x001ecc, 0x001ecc, INTEGER_OFFSET, 1, - 0x001ece, 0x001ece, INTEGER_OFFSET, 1, - 0x001ed0, 0x001ed0, INTEGER_OFFSET, 1, - 0x001ed2, 0x001ed2, INTEGER_OFFSET, 1, - 0x001ed4, 0x001ed4, INTEGER_OFFSET, 1, - 0x001ed6, 0x001ed6, INTEGER_OFFSET, 1, - 0x001ed8, 0x001ed8, INTEGER_OFFSET, 1, - 0x001eda, 0x001eda, INTEGER_OFFSET, 1, - 0x001edc, 0x001edc, INTEGER_OFFSET, 1, - 0x001ede, 0x001ede, INTEGER_OFFSET, 1, - 0x001ee0, 0x001ee0, INTEGER_OFFSET, 1, - 0x001ee2, 0x001ee2, INTEGER_OFFSET, 1, - 0x001ee4, 0x001ee4, INTEGER_OFFSET, 1, - 0x001ee6, 0x001ee6, INTEGER_OFFSET, 1, - 0x001ee8, 0x001ee8, INTEGER_OFFSET, 1, - 0x001eea, 0x001eea, INTEGER_OFFSET, 1, - 0x001eec, 0x001eec, INTEGER_OFFSET, 1, - 0x001eee, 0x001eee, INTEGER_OFFSET, 1, - 0x001ef0, 0x001ef0, INTEGER_OFFSET, 1, - 0x001ef2, 0x001ef2, INTEGER_OFFSET, 1, - 0x001ef4, 0x001ef4, INTEGER_OFFSET, 1, - 0x001ef6, 0x001ef6, INTEGER_OFFSET, 1, - 0x001ef8, 0x001ef8, INTEGER_OFFSET, 1, - 0x001efa, 0x001efa, INTEGER_OFFSET, 1, - 0x001efc, 0x001efc, INTEGER_OFFSET, 1, - 0x001efe, 0x001efe, INTEGER_OFFSET, 1, - 0x001f08, 0x001f0f, INTEGER_OFFSET, -8, - 0x001f18, 0x001f1d, INTEGER_OFFSET, -8, - 0x001f28, 0x001f2f, INTEGER_OFFSET, -8, - 0x001f38, 0x001f3f, INTEGER_OFFSET, -8, - 0x001f48, 0x001f4d, INTEGER_OFFSET, -8, - 0x001f59, 0x001f59, INTEGER_OFFSET, -8, - 0x001f5b, 0x001f5b, INTEGER_OFFSET, -8, - 0x001f5d, 0x001f5d, INTEGER_OFFSET, -8, - 0x001f5f, 0x001f5f, INTEGER_OFFSET, -8, - 0x001f68, 0x001f6f, INTEGER_OFFSET, -8, - 0x001f88, 0x001f8f, INTEGER_OFFSET, -8, - 0x001f98, 0x001f9f, INTEGER_OFFSET, -8, - 0x001fa8, 0x001faf, INTEGER_OFFSET, -8, - 0x001fb8, 0x001fb9, INTEGER_OFFSET, -8, - 0x001fba, 0x001fbb, INTEGER_OFFSET, -74, - 0x001fbc, 0x001fbc, INTEGER_OFFSET, -9, - 0x001fbe, 0x001fbe, INTEGER_OFFSET, -7173, - 0x001fc8, 0x001fcb, INTEGER_OFFSET, -86, - 0x001fcc, 0x001fcc, INTEGER_OFFSET, -9, - 0x001fd8, 0x001fd9, INTEGER_OFFSET, -8, - 0x001fda, 0x001fdb, INTEGER_OFFSET, -100, - 0x001fe8, 0x001fe9, INTEGER_OFFSET, -8, - 0x001fea, 0x001feb, INTEGER_OFFSET, -112, - 0x001fec, 0x001fec, INTEGER_OFFSET, -7, - 0x001ff8, 0x001ff9, INTEGER_OFFSET, -128, - 0x001ffa, 0x001ffb, INTEGER_OFFSET, -126, - 0x001ffc, 0x001ffc, INTEGER_OFFSET, -9, - 0x002126, 0x002126, INTEGER_OFFSET, -7517, - 0x00212a, 0x00212a, INTEGER_OFFSET, -8383, - 0x00212b, 0x00212b, INTEGER_OFFSET, -8262, - 0x002132, 0x002132, INTEGER_OFFSET, 28, - 0x002160, 0x00216f, INTEGER_OFFSET, 16, - 0x002183, 0x002183, INTEGER_OFFSET, 1, - 0x0024b6, 0x0024cf, INTEGER_OFFSET, 26, - 0x002c00, 0x002c2f, INTEGER_OFFSET, 48, - 0x002c60, 0x002c60, INTEGER_OFFSET, 1, - 0x002c62, 0x002c62, INTEGER_OFFSET, -10743, - 0x002c63, 0x002c63, INTEGER_OFFSET, -3814, - 0x002c64, 0x002c64, INTEGER_OFFSET, -10727, - 0x002c67, 0x002c67, INTEGER_OFFSET, 1, - 0x002c69, 0x002c69, INTEGER_OFFSET, 1, - 0x002c6b, 0x002c6b, INTEGER_OFFSET, 1, - 0x002c6d, 0x002c6d, INTEGER_OFFSET, -10780, - 0x002c6e, 0x002c6e, INTEGER_OFFSET, -10749, - 0x002c6f, 0x002c6f, INTEGER_OFFSET, -10783, - 0x002c70, 0x002c70, INTEGER_OFFSET, -10782, - 0x002c72, 0x002c72, INTEGER_OFFSET, 1, - 0x002c75, 0x002c75, INTEGER_OFFSET, 1, - 0x002c7e, 0x002c7f, INTEGER_OFFSET, -10815, - 0x002c80, 0x002c80, INTEGER_OFFSET, 1, - 0x002c82, 0x002c82, INTEGER_OFFSET, 1, - 0x002c84, 0x002c84, INTEGER_OFFSET, 1, - 0x002c86, 0x002c86, INTEGER_OFFSET, 1, - 0x002c88, 0x002c88, INTEGER_OFFSET, 1, - 0x002c8a, 0x002c8a, INTEGER_OFFSET, 1, - 0x002c8c, 0x002c8c, INTEGER_OFFSET, 1, - 0x002c8e, 0x002c8e, INTEGER_OFFSET, 1, - 0x002c90, 0x002c90, INTEGER_OFFSET, 1, - 0x002c92, 0x002c92, INTEGER_OFFSET, 1, - 0x002c94, 0x002c94, INTEGER_OFFSET, 1, - 0x002c96, 0x002c96, INTEGER_OFFSET, 1, - 0x002c98, 0x002c98, INTEGER_OFFSET, 1, - 0x002c9a, 0x002c9a, INTEGER_OFFSET, 1, - 0x002c9c, 0x002c9c, INTEGER_OFFSET, 1, - 0x002c9e, 0x002c9e, INTEGER_OFFSET, 1, - 0x002ca0, 0x002ca0, INTEGER_OFFSET, 1, - 0x002ca2, 0x002ca2, INTEGER_OFFSET, 1, - 0x002ca4, 0x002ca4, INTEGER_OFFSET, 1, - 0x002ca6, 0x002ca6, INTEGER_OFFSET, 1, - 0x002ca8, 0x002ca8, INTEGER_OFFSET, 1, - 0x002caa, 0x002caa, INTEGER_OFFSET, 1, - 0x002cac, 0x002cac, INTEGER_OFFSET, 1, - 0x002cae, 0x002cae, INTEGER_OFFSET, 1, - 0x002cb0, 0x002cb0, INTEGER_OFFSET, 1, - 0x002cb2, 0x002cb2, INTEGER_OFFSET, 1, - 0x002cb4, 0x002cb4, INTEGER_OFFSET, 1, - 0x002cb6, 0x002cb6, INTEGER_OFFSET, 1, - 0x002cb8, 0x002cb8, INTEGER_OFFSET, 1, - 0x002cba, 0x002cba, INTEGER_OFFSET, 1, - 0x002cbc, 0x002cbc, INTEGER_OFFSET, 1, - 0x002cbe, 0x002cbe, INTEGER_OFFSET, 1, - 0x002cc0, 0x002cc0, INTEGER_OFFSET, 1, - 0x002cc2, 0x002cc2, INTEGER_OFFSET, 1, - 0x002cc4, 0x002cc4, INTEGER_OFFSET, 1, - 0x002cc6, 0x002cc6, INTEGER_OFFSET, 1, - 0x002cc8, 0x002cc8, INTEGER_OFFSET, 1, - 0x002cca, 0x002cca, INTEGER_OFFSET, 1, - 0x002ccc, 0x002ccc, INTEGER_OFFSET, 1, - 0x002cce, 0x002cce, INTEGER_OFFSET, 1, - 0x002cd0, 0x002cd0, INTEGER_OFFSET, 1, - 0x002cd2, 0x002cd2, INTEGER_OFFSET, 1, - 0x002cd4, 0x002cd4, INTEGER_OFFSET, 1, - 0x002cd6, 0x002cd6, INTEGER_OFFSET, 1, - 0x002cd8, 0x002cd8, INTEGER_OFFSET, 1, - 0x002cda, 0x002cda, INTEGER_OFFSET, 1, - 0x002cdc, 0x002cdc, INTEGER_OFFSET, 1, - 0x002cde, 0x002cde, INTEGER_OFFSET, 1, - 0x002ce0, 0x002ce0, INTEGER_OFFSET, 1, - 0x002ce2, 0x002ce2, INTEGER_OFFSET, 1, - 0x002ceb, 0x002ceb, INTEGER_OFFSET, 1, - 0x002ced, 0x002ced, INTEGER_OFFSET, 1, - 0x002cf2, 0x002cf2, INTEGER_OFFSET, 1, - 0x00a640, 0x00a640, INTEGER_OFFSET, 1, - 0x00a642, 0x00a642, INTEGER_OFFSET, 1, - 0x00a644, 0x00a644, INTEGER_OFFSET, 1, - 0x00a646, 0x00a646, INTEGER_OFFSET, 1, - 0x00a648, 0x00a648, INTEGER_OFFSET, 1, - 0x00a64a, 0x00a64a, INTEGER_OFFSET, 1, - 0x00a64c, 0x00a64c, INTEGER_OFFSET, 1, - 0x00a64e, 0x00a64e, INTEGER_OFFSET, 1, - 0x00a650, 0x00a650, INTEGER_OFFSET, 1, - 0x00a652, 0x00a652, INTEGER_OFFSET, 1, - 0x00a654, 0x00a654, INTEGER_OFFSET, 1, - 0x00a656, 0x00a656, INTEGER_OFFSET, 1, - 0x00a658, 0x00a658, INTEGER_OFFSET, 1, - 0x00a65a, 0x00a65a, INTEGER_OFFSET, 1, - 0x00a65c, 0x00a65c, INTEGER_OFFSET, 1, - 0x00a65e, 0x00a65e, INTEGER_OFFSET, 1, - 0x00a660, 0x00a660, INTEGER_OFFSET, 1, - 0x00a662, 0x00a662, INTEGER_OFFSET, 1, - 0x00a664, 0x00a664, INTEGER_OFFSET, 1, - 0x00a666, 0x00a666, INTEGER_OFFSET, 1, - 0x00a668, 0x00a668, INTEGER_OFFSET, 1, - 0x00a66a, 0x00a66a, INTEGER_OFFSET, 1, - 0x00a66c, 0x00a66c, INTEGER_OFFSET, 1, - 0x00a680, 0x00a680, INTEGER_OFFSET, 1, - 0x00a682, 0x00a682, INTEGER_OFFSET, 1, - 0x00a684, 0x00a684, INTEGER_OFFSET, 1, - 0x00a686, 0x00a686, INTEGER_OFFSET, 1, - 0x00a688, 0x00a688, INTEGER_OFFSET, 1, - 0x00a68a, 0x00a68a, INTEGER_OFFSET, 1, - 0x00a68c, 0x00a68c, INTEGER_OFFSET, 1, - 0x00a68e, 0x00a68e, INTEGER_OFFSET, 1, - 0x00a690, 0x00a690, INTEGER_OFFSET, 1, - 0x00a692, 0x00a692, INTEGER_OFFSET, 1, - 0x00a694, 0x00a694, INTEGER_OFFSET, 1, - 0x00a696, 0x00a696, INTEGER_OFFSET, 1, - 0x00a698, 0x00a698, INTEGER_OFFSET, 1, - 0x00a69a, 0x00a69a, INTEGER_OFFSET, 1, - 0x00a722, 0x00a722, INTEGER_OFFSET, 1, - 0x00a724, 0x00a724, INTEGER_OFFSET, 1, - 0x00a726, 0x00a726, INTEGER_OFFSET, 1, - 0x00a728, 0x00a728, INTEGER_OFFSET, 1, - 0x00a72a, 0x00a72a, INTEGER_OFFSET, 1, - 0x00a72c, 0x00a72c, INTEGER_OFFSET, 1, - 0x00a72e, 0x00a72e, INTEGER_OFFSET, 1, - 0x00a732, 0x00a732, INTEGER_OFFSET, 1, - 0x00a734, 0x00a734, INTEGER_OFFSET, 1, - 0x00a736, 0x00a736, INTEGER_OFFSET, 1, - 0x00a738, 0x00a738, INTEGER_OFFSET, 1, - 0x00a73a, 0x00a73a, INTEGER_OFFSET, 1, - 0x00a73c, 0x00a73c, INTEGER_OFFSET, 1, - 0x00a73e, 0x00a73e, INTEGER_OFFSET, 1, - 0x00a740, 0x00a740, INTEGER_OFFSET, 1, - 0x00a742, 0x00a742, INTEGER_OFFSET, 1, - 0x00a744, 0x00a744, INTEGER_OFFSET, 1, - 0x00a746, 0x00a746, INTEGER_OFFSET, 1, - 0x00a748, 0x00a748, INTEGER_OFFSET, 1, - 0x00a74a, 0x00a74a, INTEGER_OFFSET, 1, - 0x00a74c, 0x00a74c, INTEGER_OFFSET, 1, - 0x00a74e, 0x00a74e, INTEGER_OFFSET, 1, - 0x00a750, 0x00a750, INTEGER_OFFSET, 1, - 0x00a752, 0x00a752, INTEGER_OFFSET, 1, - 0x00a754, 0x00a754, INTEGER_OFFSET, 1, - 0x00a756, 0x00a756, INTEGER_OFFSET, 1, - 0x00a758, 0x00a758, INTEGER_OFFSET, 1, - 0x00a75a, 0x00a75a, INTEGER_OFFSET, 1, - 0x00a75c, 0x00a75c, INTEGER_OFFSET, 1, - 0x00a75e, 0x00a75e, INTEGER_OFFSET, 1, - 0x00a760, 0x00a760, INTEGER_OFFSET, 1, - 0x00a762, 0x00a762, INTEGER_OFFSET, 1, - 0x00a764, 0x00a764, INTEGER_OFFSET, 1, - 0x00a766, 0x00a766, INTEGER_OFFSET, 1, - 0x00a768, 0x00a768, INTEGER_OFFSET, 1, - 0x00a76a, 0x00a76a, INTEGER_OFFSET, 1, - 0x00a76c, 0x00a76c, INTEGER_OFFSET, 1, - 0x00a76e, 0x00a76e, INTEGER_OFFSET, 1, - 0x00a779, 0x00a779, INTEGER_OFFSET, 1, - 0x00a77b, 0x00a77b, INTEGER_OFFSET, 1, - 0x00a77d, 0x00a77d, INTEGER_OFFSET, -35332, - 0x00a77e, 0x00a77e, INTEGER_OFFSET, 1, - 0x00a780, 0x00a780, INTEGER_OFFSET, 1, - 0x00a782, 0x00a782, INTEGER_OFFSET, 1, - 0x00a784, 0x00a784, INTEGER_OFFSET, 1, - 0x00a786, 0x00a786, INTEGER_OFFSET, 1, - 0x00a78b, 0x00a78b, INTEGER_OFFSET, 1, - 0x00a78d, 0x00a78d, INTEGER_OFFSET, -42280, - 0x00a790, 0x00a790, INTEGER_OFFSET, 1, - 0x00a792, 0x00a792, INTEGER_OFFSET, 1, - 0x00a796, 0x00a796, INTEGER_OFFSET, 1, - 0x00a798, 0x00a798, INTEGER_OFFSET, 1, - 0x00a79a, 0x00a79a, INTEGER_OFFSET, 1, - 0x00a79c, 0x00a79c, INTEGER_OFFSET, 1, - 0x00a79e, 0x00a79e, INTEGER_OFFSET, 1, - 0x00a7a0, 0x00a7a0, INTEGER_OFFSET, 1, - 0x00a7a2, 0x00a7a2, INTEGER_OFFSET, 1, - 0x00a7a4, 0x00a7a4, INTEGER_OFFSET, 1, - 0x00a7a6, 0x00a7a6, INTEGER_OFFSET, 1, - 0x00a7a8, 0x00a7a8, INTEGER_OFFSET, 1, - 0x00a7aa, 0x00a7aa, INTEGER_OFFSET, -42308, - 0x00a7ab, 0x00a7ab, INTEGER_OFFSET, -42319, - 0x00a7ac, 0x00a7ac, INTEGER_OFFSET, -42315, - 0x00a7ad, 0x00a7ad, INTEGER_OFFSET, -42305, - 0x00a7ae, 0x00a7ae, INTEGER_OFFSET, -42308, - 0x00a7b0, 0x00a7b0, INTEGER_OFFSET, -42258, - 0x00a7b1, 0x00a7b1, INTEGER_OFFSET, -42282, - 0x00a7b2, 0x00a7b2, INTEGER_OFFSET, -42261, - 0x00a7b3, 0x00a7b3, INTEGER_OFFSET, 928, - 0x00a7b4, 0x00a7b4, INTEGER_OFFSET, 1, - 0x00a7b6, 0x00a7b6, INTEGER_OFFSET, 1, - 0x00a7b8, 0x00a7b8, INTEGER_OFFSET, 1, - 0x00a7ba, 0x00a7ba, INTEGER_OFFSET, 1, - 0x00a7bc, 0x00a7bc, INTEGER_OFFSET, 1, - 0x00a7be, 0x00a7be, INTEGER_OFFSET, 1, - 0x00a7c0, 0x00a7c0, INTEGER_OFFSET, 1, - 0x00a7c2, 0x00a7c2, INTEGER_OFFSET, 1, - 0x00a7c4, 0x00a7c4, INTEGER_OFFSET, -48, - 0x00a7c5, 0x00a7c5, INTEGER_OFFSET, -42307, - 0x00a7c6, 0x00a7c6, INTEGER_OFFSET, -35384, - 0x00a7c7, 0x00a7c7, INTEGER_OFFSET, 1, - 0x00a7c9, 0x00a7c9, INTEGER_OFFSET, 1, - 0x00a7d0, 0x00a7d0, INTEGER_OFFSET, 1, - 0x00a7d6, 0x00a7d6, INTEGER_OFFSET, 1, - 0x00a7d8, 0x00a7d8, INTEGER_OFFSET, 1, - 0x00a7f5, 0x00a7f5, INTEGER_OFFSET, 1, - 0x00ab70, 0x00abbf, INTEGER_OFFSET, -38864, - 0x00ff21, 0x00ff3a, INTEGER_OFFSET, 32, - 0x010400, 0x010427, INTEGER_OFFSET, 40, - 0x0104b0, 0x0104d3, INTEGER_OFFSET, 40, - 0x010570, 0x01057a, INTEGER_OFFSET, 39, - 0x01057c, 0x01058a, INTEGER_OFFSET, 39, - 0x01058c, 0x010592, INTEGER_OFFSET, 39, - 0x010594, 0x010595, INTEGER_OFFSET, 39, - 0x010c80, 0x010cb2, INTEGER_OFFSET, 64, - 0x0118a0, 0x0118bf, INTEGER_OFFSET, 32, - 0x016e40, 0x016e5f, INTEGER_OFFSET, 32, - 0x01e900, 0x01e921, INTEGER_OFFSET, 34 - }); - - /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ - -} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseUnfoldingTrie.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseUnfoldingTrie.java similarity index 77% rename from regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseUnfoldingTrie.java rename to regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseUnfoldingTrie.java index c73997b35135..b9d372e47c7a 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseUnfoldingTrie.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseUnfoldingTrie.java @@ -38,7 +38,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.oracle.truffle.regex.tregex.parser.flavors; +package com.oracle.truffle.regex.tregex.parser; import java.util.ArrayList; import java.util.Collections; @@ -47,20 +47,13 @@ import org.graalvm.collections.EconomicMap; -public final class RubyCaseUnfoldingTrie { - - public static final RubyCaseUnfoldingTrie CASE_UNFOLD; - - static { - CASE_UNFOLD = new RubyCaseUnfoldingTrie(0); - RubyCaseFoldingData.CASE_FOLD.forEach((k, v) -> CASE_UNFOLD.add(k, v, 0)); - } +public final class CaseUnfoldingTrie { private final List codepoints; - private final EconomicMap childNodes; + private final EconomicMap childNodes; private final int depth; - public RubyCaseUnfoldingTrie(int depth) { + public CaseUnfoldingTrie(int depth) { this.codepoints = new ArrayList<>(); this.childNodes = EconomicMap.create(); this.depth = depth; @@ -73,7 +66,7 @@ public void add(int codepoint, int[] caseFoldedString, int offset) { } if (!hasChildAt(caseFoldedString[offset])) { - childNodes.put(caseFoldedString[offset], new RubyCaseUnfoldingTrie(depth + 1)); + childNodes.put(caseFoldedString[offset], new CaseUnfoldingTrie(depth + 1)); } getChildAt(caseFoldedString[offset]).add(codepoint, caseFoldedString, offset + 1); } @@ -82,7 +75,7 @@ public boolean hasChildAt(int index) { return childNodes.containsKey(index); } - public RubyCaseUnfoldingTrie getChildAt(int index) { + public CaseUnfoldingTrie getChildAt(int index) { return childNodes.get(index); } @@ -122,19 +115,19 @@ public int getCodepoint() { } } - public static List findUnfoldings(List caseFolded) { - List states = new ArrayList<>(); - List nextStates = new ArrayList<>(); + public static List findUnfoldings(CaseFoldData.CaseFoldAlgorithm algorithm, List caseFolded) { + List states = new ArrayList<>(); + List nextStates = new ArrayList<>(); List unfoldings = new ArrayList<>(); for (int i = 0; i < caseFolded.size(); i++) { int codepoint = caseFolded.get(i); - states.add(RubyCaseUnfoldingTrie.CASE_UNFOLD); + states.add(CaseFoldData.getUnfoldingTrie(algorithm)); - for (RubyCaseUnfoldingTrie state : states) { + for (CaseUnfoldingTrie state : states) { if (state.hasChildAt(codepoint)) { - RubyCaseUnfoldingTrie newState = state.getChildAt(codepoint); + CaseUnfoldingTrie newState = state.getChildAt(codepoint); nextStates.add(newState); for (int unfoldedCodepoint : newState.getCodepoints()) { unfoldings.add(new Unfolding(i + 1 - newState.getDepth(), newState.getDepth(), unfoldedCodepoint)); @@ -142,7 +135,7 @@ public static List findUnfoldings(List caseFolded) { } } - List statesTmp = states; + List statesTmp = states; states = nextStates; nextStates = statesTmp; @@ -154,8 +147,8 @@ public static List findUnfoldings(List caseFolded) { return unfoldings; } - public static List findSingleCharUnfoldings(int[] caseFolded) { - RubyCaseUnfoldingTrie state = CASE_UNFOLD; + public static List findSingleCharUnfoldings(CaseFoldData.CaseFoldAlgorithm algorithm, int[] caseFolded) { + CaseUnfoldingTrie state = CaseFoldData.getUnfoldingTrie(algorithm); for (int codepoint : caseFolded) { assert state.hasChildAt(codepoint); @@ -165,9 +158,9 @@ public static List findSingleCharUnfoldings(int[] caseFolded) { return state.getCodepoints(); } - public static List findSingleCharUnfoldings(int caseFolded) { - if (CASE_UNFOLD.hasChildAt(caseFolded)) { - return CASE_UNFOLD.getChildAt(caseFolded).getCodepoints(); + public static List findSingleCharUnfoldings(CaseFoldData.CaseFoldAlgorithm algorithm, int caseFolded) { + if (CaseFoldData.getUnfoldingTrie(algorithm).hasChildAt(caseFolded)) { + return CaseFoldData.getUnfoldingTrie(algorithm).getChildAt(caseFolded).getCodepoints(); } else { return Collections.emptyList(); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java index c6a0aaa71dbf..aaa2644a5c51 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexLexer.java @@ -158,14 +158,15 @@ protected boolean featureEnabledClassSetExpressions() { @Override protected void caseFoldUnfold(CodePointSetAccumulator charClass) { - CaseFoldTable.CaseFoldingAlgorithm caseFolding = flags.isEitherUnicode() ? CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptUnicode : CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptNonUnicode; - CaseFoldTable.applyCaseFoldUnfold(charClass, compilationBuffer.getCodePointSetAccumulator1(), caseFolding); + CaseFoldData.CaseFoldUnfoldAlgorithm caseFolding = flags.isEitherUnicode() ? CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode : CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptNonUnicode; + CodePointSetAccumulator tmp = compilationBuffer.getCodePointSetAccumulator1(); + CaseFoldData.applyCaseFoldUnfold(charClass, tmp, caseFolding); } @Override protected CodePointSet complementClassSet(CodePointSet codePointSet) { if (flags.isUnicodeSets() && flags.isIgnoreCase()) { - return codePointSet.createInverse(Constants.FOLDED_CHARACTERS, compilationBuffer); + return codePointSet.createInverse(CaseFoldData.FOLDED_CHARACTERS, compilationBuffer); } else { return codePointSet.createInverse(source.getEncoding()); } @@ -269,7 +270,7 @@ protected Token handleBoundedQuantifierSyntaxError() throws RegexSyntaxException throw syntaxError(JsErrorMessages.INCOMPLETE_QUANTIFIER); } position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } @Override @@ -475,7 +476,7 @@ protected Token parseCustomEscape(char c) { } handleInvalidBackReference(groupName); } else { - return charClass(c); + return literalChar(c); } } return null; diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java index 87509fe7d7f6..5a405667b800 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexParser.java @@ -44,6 +44,9 @@ import java.util.List; import java.util.Map; +import org.graalvm.collections.EconomicMap; +import org.graalvm.collections.Equivalence; + import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.regex.AbstractRegexObject; @@ -52,9 +55,10 @@ import com.oracle.truffle.regex.RegexOptions; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; +import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.errors.JsErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable.CaseFoldingAlgorithm; import com.oracle.truffle.regex.tregex.parser.ast.Group; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; import com.oracle.truffle.regex.tregex.parser.ast.RegexASTRootNode; @@ -62,17 +66,17 @@ import com.oracle.truffle.regex.tregex.parser.ast.Sequence; import com.oracle.truffle.regex.tregex.parser.ast.Term; import com.oracle.truffle.regex.tregex.string.Encodings; -import org.graalvm.collections.EconomicMap; -import org.graalvm.collections.Equivalence; public final class JSRegexParser implements RegexParser { - private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.charClass, Token.Kind.classSet, Token.Kind.groupEnd, Token.Kind.backReference); + private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.literalChar, Token.Kind.charClass, Token.Kind.charClassEnd, Token.Kind.classSet, Token.Kind.groupEnd, + Token.Kind.backReference); private final RegexParserGlobals globals; private final RegexSource source; private final RegexFlags flags; private final JSRegexLexer lexer; private final RegexASTBuilder astBuilder; + private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); @TruffleBoundary public JSRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException { @@ -204,11 +208,28 @@ private RegexAST parse(boolean rootCapture) throws RegexSyntaxException { } astBuilder.popGroup(token); break; + case literalChar: + literalChar(((Token.LiteralCharacter) token).getCodePoint()); + break; case charClass: astBuilder.addCharClass((Token.CharacterClass) token); break; + case charClassBegin: + curCharClass.clear(); + break; + case charClassAtom: + curCharClass.addSet(((Token.CharacterClassAtom) token).getContents()); + break; + case charClassEnd: + boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar(); + if (flags.isIgnoreCase()) { + lexer.caseFoldUnfold(curCharClass); + } + CodePointSet cps = curCharClass.toCodePointSet(); + astBuilder.addCharClass(lexer.isCurCharClassInverted() ? cps.createInverse(source.getEncoding()) : cps, wasSingleChar); + break; case classSet: - astBuilder.addClassSet((Token.ClassSet) token, flags.isIgnoreCase() ? CaseFoldingAlgorithm.ECMAScriptUnicode : null); + astBuilder.addClassSet((Token.ClassSet) token, flags.isIgnoreCase() ? CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode : null); break; default: throw CompilerDirectives.shouldNotReachHere(); @@ -222,6 +243,17 @@ private RegexAST parse(boolean rootCapture) throws RegexSyntaxException { return ast; } + private void literalChar(int codePoint) { + if (flags.isIgnoreCase()) { + curCharClass.clear(); + curCharClass.addCodePoint(codePoint); + lexer.caseFoldUnfold(curCharClass); + astBuilder.addCharClass(curCharClass.toCodePointSet(), true); + } else { + astBuilder.addCharClass(CodePointSet.create(codePoint)); + } + } + private static boolean isNestedInLookBehindAssertion(Term t) { RegexASTSubtreeRootNode parent = t.getSubTreeParent(); while (parent.isLookAroundAssertion()) { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java index 4a75a393dcb3..99770375d5ab 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/JSRegexValidator.java @@ -123,7 +123,11 @@ private void parseDryRun() throws RegexSyntaxException { case wordBoundary: case nonWordBoundary: case backReference: + case literalChar: case charClass: + case charClassBegin: + case charClassAtom: + case charClassEnd: case classSet: curTermState = CurTermState.Other; break; @@ -180,6 +184,9 @@ private void parseDryRun() throws RegexSyntaxException { throw CompilerDirectives.shouldNotReachHere(); } } + if (lexer.inCharacterClass()) { + throw syntaxError(JsErrorMessages.UNMATCHED_LEFT_BRACKET); + } if (!syntaxStack.isEmpty()) { throw syntaxError(JsErrorMessages.UNTERMINATED_GROUP); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFolding.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/MultiCharacterCaseFolding.java similarity index 64% rename from regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFolding.java rename to regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/MultiCharacterCaseFolding.java index 2c120e0a36dd..68d8653b379c 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFolding.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/MultiCharacterCaseFolding.java @@ -38,22 +38,28 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package com.oracle.truffle.regex.tregex.parser.flavors; +package com.oracle.truffle.regex.tregex.parser; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.function.BiConsumer; +import java.util.function.BiPredicate; import java.util.stream.Collectors; import com.oracle.truffle.regex.UnsupportedRegexException; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; -import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder; -import com.oracle.truffle.regex.tregex.parser.flavors.RubyCaseUnfoldingTrie.Unfolding; +import com.oracle.truffle.regex.charset.Range; +import com.oracle.truffle.regex.tregex.parser.CaseUnfoldingTrie.Unfolding; +import org.graalvm.collections.Pair; -public class RubyCaseFolding { +import static com.oracle.truffle.regex.tregex.parser.RegexLexer.isAscii; - public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingRange, RegexASTBuilder astBuilder) { - caseFoldUnfoldString(codepoints, encodingRange, false, astBuilder); +public class MultiCharacterCaseFolding { + + public static void caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm algorithm, int[] codepoints, CodePointSet encodingRange, RegexASTBuilder astBuilder) { + caseFoldUnfoldString(algorithm, codepoints, encodingRange, false, astBuilder); } /** @@ -66,11 +72,11 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR * the variants * @param astBuilder where to append the matcher */ - public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingRange, boolean dropAsciiOnStart, RegexASTBuilder astBuilder) { - List caseFolded = caseFold(codepoints); + public static void caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm algorithm, int[] codepoints, CodePointSet encodingRange, boolean dropAsciiOnStart, RegexASTBuilder astBuilder) { + List caseFolded = caseFold(algorithm, codepoints); - List unfoldings = RubyCaseUnfoldingTrie.findUnfoldings(caseFolded); - // We assume that if `codepoints` was in the encodingRange, than so will be `caseFolded`. + List unfoldings = CaseUnfoldingTrie.findUnfoldings(algorithm, caseFolded); + // We assume that if `codepoints` was in the encodingRange, then so will be `caseFolded`. // The only way that we could introduce out-of-range characters is through the unfoldings, // so just filter those should be enough to prevent generating out-of-range matchers. unfoldings = unfoldings.stream().filter(u -> encodingRange.contains(u.getCodepoint())).collect(Collectors.toList()); @@ -92,7 +98,7 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR // If the following mandatory string that we would add would be at the // beginning of the matcher and it would match an ASCII character, then we // return a dead matcher instead (if dropAsciiOnStart is set). - if (dropAsciiOnStart && end == 0 && RubyRegexParser.isAscii(caseFolded.get(end))) { + if (dropAsciiOnStart && end == 0 && RegexLexer.isAscii(caseFolded.get(end))) { astBuilder.popGroup(); astBuilder.replaceCurTermWithDeadNode(); return; @@ -108,7 +114,7 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR unfoldSegment(astBuilder, caseFolded, unfoldings.subList(unfoldingsStartIndex, unfoldingsEndIndex), start, end, 0, dropAsciiOnStart); if (end < caseFolded.size()) { - if (dropAsciiOnStart && end == 0 && RubyRegexParser.isAscii(caseFolded.get(end))) { + if (dropAsciiOnStart && end == 0 && RegexLexer.isAscii(caseFolded.get(end))) { astBuilder.popGroup(); astBuilder.replaceCurTermWithDeadNode(); return; @@ -119,14 +125,14 @@ public static void caseFoldUnfoldString(int[] codepoints, CodePointSet encodingR astBuilder.popGroup(); } - public static int[] caseFold(int codePoint) { - return RubyCaseFoldingData.CASE_FOLD.get(codePoint); + public static int[] caseFold(CaseFoldData.CaseFoldAlgorithm algorithm, int codePoint) { + return CaseFoldData.getTable(algorithm).caseFold(codePoint); } - private static List caseFold(int[] codepoints) { + private static List caseFold(CaseFoldData.CaseFoldAlgorithm algorithm, int[] codepoints) { List caseFolded = new ArrayList<>(); for (int codepoint : codepoints) { - int[] folded = caseFold(codepoint); + int[] folded = caseFold(algorithm, codepoint); if (folded == null) { caseFolded.add(codepoint); } else { @@ -198,7 +204,7 @@ private static void unfoldSegment(RegexASTBuilder astBuilder, List case // The only possible unfoldings at this position have length == 1. We can express all the // choices by using a character class. CodePointSetAccumulator acc = new CodePointSetAccumulator(); - if (!dropAsciiOnStart || start != 0 || !RubyRegexParser.isAscii(caseFolded.get(start))) { + if (!dropAsciiOnStart || start != 0 || !RegexLexer.isAscii(caseFolded.get(start))) { acc.addCodePoint(caseFolded.get(start)); } int unfoldingsNextIndex = 0; @@ -207,7 +213,7 @@ private static void unfoldSegment(RegexASTBuilder astBuilder, List case // length > 0. assert unfoldings.get(unfoldingsNextIndex).getLength() == 1; int codepoint = unfoldings.get(unfoldingsNextIndex).getCodepoint(); - if (!dropAsciiOnStart || start != 0 || !RubyRegexParser.isAscii(codepoint)) { + if (!dropAsciiOnStart || start != 0 || !RegexLexer.isAscii(codepoint)) { acc.addCodePoint(codepoint); } unfoldingsNextIndex++; @@ -215,4 +221,84 @@ private static void unfoldSegment(RegexASTBuilder astBuilder, List case astBuilder.addCharClass(acc.toCodePointSet(), false); unfoldSegment(astBuilder, caseFolded, unfoldings.subList(unfoldingsNextIndex, unfoldings.size()), start + 1, end, backtrackingDepth, dropAsciiOnStart); } + + /** + * Calls the argument on any element of the character class which has a case-folding. + */ + private static void caseFoldCharClass(CaseFoldData.CaseFoldAlgorithm algorithm, CodePointSetAccumulator charClass, BiConsumer caseFoldItem) { + CaseFoldData.getTable(algorithm).caseFold(charClass, caseFoldItem); + } + + /** + * This method modifies {@code charClass} to contains its closure on case mapping. + */ + public static void caseClosure(CaseFoldData.CaseFoldAlgorithm algorithm, CodePointSetAccumulator charClass, CodePointSetAccumulator tmp, BiPredicate filter, + CodePointSet allowedCodePoints) { + tmp.clear(); + + caseFoldCharClass(algorithm, charClass, (from, to) -> { + if (to.length == 1) { + // Add the case-folded version to the character class... + if (filter.test(from, to[0])) { + tmp.addCodePoint(to[0]); + } + } + // ... and also any characters which case-fold to the same. + for (int unfolding : CaseUnfoldingTrie.findSingleCharUnfoldings(algorithm, to)) { + if (unfolding != from && filter.test(from, unfolding)) { + tmp.addCodePoint(unfolding); + } + } + }); + + // We also handle all the characters which might have no case-folding, i.e. they case-fold + // to themselves. + for (Range r : charClass) { + for (int codepoint = r.lo; codepoint <= r.hi; codepoint++) { + for (int unfolding : CaseUnfoldingTrie.findSingleCharUnfoldings(algorithm, codepoint)) { + if (filter.test(codepoint, unfolding)) { + tmp.addCodePoint(unfolding); + } + } + } + } + + // Only include characters that are admissible in the given encoding. + tmp.intersectWith(allowedCodePoints); + + charClass.addSet(tmp.get()); + } + + /** + * Finds any characters in {@code charClass} that have multi-codepoint expansions. + * + * @return a list of pairs, with the first element being the expanded codepoint and the second + * element the expansion + */ + public static List> caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm algorithm, CodePointSetAccumulator charClass) { + List> multiCodePointExpansions = new ArrayList<>(); + + caseFoldCharClass(algorithm, charClass, (from, to) -> { + if (to.length > 1) { + assert !isAscii(from); + multiCodePointExpansions.add(Pair.create(from, to)); + } + }); + + return multiCodePointExpansions; + } + + public static boolean equalsIgnoreCase(CaseFoldData.CaseFoldAlgorithm algorithm, int codePointA, int codePointB) { + int[] foldedA = caseFold(algorithm, codePointA); + int[] foldedB = caseFold(algorithm, codePointB); + if (foldedA == null && foldedB == null) { + return codePointA == codePointB; + } else if (foldedA == null) { + return foldedB.length == 1 && codePointA == foldedB[0]; + } else if (foldedB == null) { + return foldedA.length == 1 && foldedA[0] == codePointB; + } else { + return Arrays.equals(foldedA, foldedB); + } + } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java index e765265d2781..084a866498af 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTBuilder.java @@ -534,7 +534,7 @@ private Term translateUnicodeCharClass(CodePointSet codePointSet, Token token, b * @param token aside from the source sections, the token most importantly contains the set of * code points and strings to be included in the class set */ - public void addClassSet(Token.ClassSet token, CaseFoldTable.CaseFoldingAlgorithm caseUnfoldAlgo) { + public void addClassSet(Token.ClassSet token, CaseFoldData.CaseFoldUnfoldAlgorithm caseUnfoldAlgo) { CodePointSetAccumulator buf = compilationBuffer.getCodePointSetAccumulator1(); ClassSetContents contents = token.getContents(); @@ -552,7 +552,7 @@ public void addClassSet(Token.ClassSet token, CaseFoldTable.CaseFoldingAlgorithm if (caseUnfoldAlgo != null) { buf.clear(); buf.addCodePoint(cp); - CaseFoldTable.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); + CaseFoldData.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); addCharClass(buf.toCodePointSet()); } else { addCharClass(CodePointSet.create(cp)); @@ -565,7 +565,7 @@ public void addClassSet(Token.ClassSet token, CaseFoldTable.CaseFoldingAlgorithm if (caseUnfoldAlgo != null) { buf.clear(); buf.addSet(contents.getCodePointSet()); - CaseFoldTable.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); + CaseFoldData.applyCaseFoldUnfold(buf, compilationBuffer.getCodePointSetAccumulator2(), caseUnfoldAlgo); addCharClass(buf.toCodePointSet()); } else { addCharClass(contents.getCodePointSet()); diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java index a9aa529d3e62..e16ddb9d45d3 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexLexer.java @@ -66,7 +66,7 @@ public abstract class RegexLexer { - private static final TBitSet PREDEFINED_CHAR_CLASSES = TBitSet.valueOf('D', 'S', 'W', 'd', 's', 'w'); + protected static final TBitSet PREDEFINED_CHAR_CLASSES = TBitSet.valueOf('D', 'S', 'W', 'd', 's', 'w'); protected static final TBitSet DEFAULT_WHITESPACE = TBitSet.valueOf('\t', '\n', '\u000b', '\f', '\r', ' '); public final RegexSource source; /** @@ -75,13 +75,16 @@ public abstract class RegexLexer { protected final String pattern; private final Encoding encoding; private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); + private boolean curCharClassInverted; /** * The index of the next character in {@link #pattern} to be parsed. */ protected int position = 0; protected Map> namedCaptureGroups = null; private int curStartIndex = 0; + private int curCharClassStartIndex = -1; private int charClassCurAtomStartIndex = 0; + private int charClassEmitInvalidRangeAtoms = 0; private int nGroups = 1; private boolean identifiedAllGroups = false; protected final CompilationBuffer compilationBuffer; @@ -243,6 +246,14 @@ public RegexLexer(RegexSource source, CompilationBuffer compilationBuffer) { */ protected abstract int getMaxBackReferenceDigits(); + /** + * Returns {@code true} iff the given character is a predefined character class when preceded + * with a backslash (e.g. \d). + */ + protected boolean isPredefCharClass(char c) { + return PREDEFINED_CHAR_CLASSES.get(c); + } + /** * Returns the CodePointSet associated with the given predefined character class (e.g. * {@code \d}). @@ -443,30 +454,38 @@ protected void retreat() { } public boolean hasNext() { - if (featureEnabledLineComments()) { - int p; - do { - p = position; + if (!inCharacterClass()) { + if (featureEnabledLineComments()) { + int p; + do { + p = position; + skipWhitespace(); + if (consumingLookahead("#")) { + skipComment('\n'); + } else if (featureEnabledGroupComments() && consumingLookahead("(?#")) { + if (!skipComment(')')) { + handleUnfinishedGroupComment(); + } + } + } while (p != position); + } else if (featureEnabledIgnoreWhiteSpace()) { skipWhitespace(); - if (consumingLookahead("#")) { - skipComment('\n'); - } else if (featureEnabledGroupComments() && consumingLookahead("(?#")) { + } + if (featureEnabledGroupComments()) { + while (consumingLookahead("(?#")) { if (!skipComment(')')) { handleUnfinishedGroupComment(); } } - } while (p != position); - } else if (featureEnabledIgnoreWhiteSpace()) { - skipWhitespace(); - } - if (featureEnabledGroupComments()) { - while (consumingLookahead("(?#")) { - if (!skipComment(')')) { - handleUnfinishedGroupComment(); - } } } - return !atEnd(); + if (atEnd()) { + if (inCharacterClass()) { + throw handleUnmatchedLeftBracket(); + } + return false; + } + return true; } private boolean skipComment(char terminator) { @@ -505,6 +524,10 @@ public int getLastTokenPosition() { return curStartIndex; } + public int getLastCharacterClassBeginPosition() { + return curCharClassStartIndex - 1; + } + protected int getLastAtomPosition() { return Math.max(curStartIndex, charClassCurAtomStartIndex); } @@ -609,6 +632,14 @@ protected boolean atEnd() { return position >= pattern.length(); } + public boolean inCharacterClass() { + return curCharClassStartIndex >= 0; + } + + public boolean isCurCharClassInverted() { + return curCharClassInverted; + } + /** * Sets the {@link com.oracle.truffle.api.source.SourceSection} of a given {@link Token} in * respect of {@link RegexSource#getSource()}. @@ -718,39 +749,34 @@ private void identifyCaptureGroups() throws RegexSyntaxException { position = restoreIndex; } - protected Token charClass(int codePoint) { - if (featureEnabledIgnoreCase()) { - curCharClass.clear(); - curCharClass.appendRange(codePoint, codePoint); - return charClass(false); - } else { - return Token.createCharClass(CodePointSet.create(codePoint), true); - } + protected Token literalChar(int codePoint) { + return Token.createLiteralCharacter(codePoint); } private Token charClass(CodePointSet codePointSet) { if (featureEnabledIgnoreCase()) { curCharClass.clear(); curCharClass.addSet(codePointSet); - return charClass(false); + boolean wasSingleChar = curCharClass.matchesSingleChar(); + caseFoldUnfold(curCharClass); + return Token.createCharClass(curCharClass.toCodePointSet(), wasSingleChar); } else { return Token.createCharClass(codePointSet); } } - private Token charClass(boolean invert) { - boolean wasSingleChar = !invert && curCharClass.matchesSingleChar(); - if (featureEnabledIgnoreCase()) { - caseFoldUnfold(curCharClass); - } - CodePointSet cps = curCharClass.toCodePointSet(); - return Token.createCharClass(invert ? cps.createInverse(encoding) : cps, wasSingleChar); - } - /* lexer */ private Token getNext() throws RegexSyntaxException { final char c = consumeChar(); + if (inCharacterClass()) { + if (c == ']' && (!featureEnabledCharClassFirstBracketIsLiteral() || position != curCharClassStartIndex + (curCharClassInverted ? 2 : 1))) { + curCharClassStartIndex = -1; + return Token.createCharacterClassEnd(); + } + ClassSetContents atom = parseCharClassAtom(c); + return Token.createCharacterClassAtom(atom.getCodePointSet(), atom.isPosixCollationEquivalenceClass()); + } switch (c) { case '.': return Token.createCharClass(getDotCodePointSet()); @@ -765,7 +791,7 @@ private Token getNext() throws RegexSyntaxException { return parseQuantifier(c); case '}': handleUnmatchedRightBrace(); - return charClass(c); + return literalChar(c); case '|': return Token.createAlternation(); case '(': @@ -773,14 +799,19 @@ private Token getNext() throws RegexSyntaxException { case ')': return Token.createGroupEnd(); case '[': - return parseCharClass(); + if (featureEnabledClassSetExpressions()) { + return Token.createClassSetExpression(parseClassSetExpression()); + } + curCharClassStartIndex = position; + curCharClassInverted = consumingLookahead("^"); + return Token.createCharacterClassBegin(); case ']': handleUnmatchedRightBracket(); - return charClass(c); + return literalChar(c); case '\\': return parseEscape(); default: - return charClass(toCodePoint(c)); + return literalChar(toCodePoint(c)); } } @@ -835,7 +866,7 @@ private Token parseEscape() throws RegexSyntaxException { return charClass(unicodePropertyContents.getCodePointSet()); } } else { - return charClass(parseEscapeChar(c, false)); + return literalChar(parseEscapeChar(c, false)); } } @@ -1033,23 +1064,6 @@ private int countZeros(int fromIndex) { return countFrom((c) -> c == '0', fromIndex); } - private Token parseCharClass() throws RegexSyntaxException { - if (featureEnabledClassSetExpressions()) { - return Token.createClassSetExpression(parseClassSetExpression()); - } - final boolean invert = consumingLookahead("^"); - curCharClass.clear(); - int startPos = position; - while (!atEnd()) { - final char c = consumeChar(); - if (c == ']' && (!featureEnabledCharClassFirstBracketIsLiteral() || position != startPos + 1)) { - return charClass(invert); - } - parseCharClassRange(c); - } - throw handleUnmatchedLeftBracket(); - } - private ClassSetContents parseCharClassAtomPredefCharClass(char c) throws RegexSyntaxException { if (c == '\\') { if (atEnd()) { @@ -1132,7 +1146,7 @@ private int parseCharClassAtomCodePoint(char c) throws RegexSyntaxException { } } - private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException { + private ClassSetContents parseCharClassAtomInner(char c) throws RegexSyntaxException { ClassSetContents cc = parseCharClassAtomPredefCharClass(c); if (cc != null) { return cc; @@ -1140,42 +1154,42 @@ private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException return ClassSetContents.createCharacter(parseCharClassAtomCodePoint(c)); } - private void parseCharClassRange(char c) throws RegexSyntaxException { + private ClassSetContents parseCharClassAtom(char c) throws RegexSyntaxException { int startPos = position - 1; charClassCurAtomStartIndex = position - 1; - ClassSetContents firstAtom = parseCharClassAtom(c); + ClassSetContents firstAtom = parseCharClassAtomInner(c); + if (charClassEmitInvalidRangeAtoms > 0) { + charClassEmitInvalidRangeAtoms--; + return firstAtom; + } if (consumingLookahead("-")) { if (atEnd() || lookahead("]")) { - addCharClassAtom(firstAtom); - curCharClass.addRange('-', '-'); + position--; + return firstAtom; } else { char nextC = consumeChar(); charClassCurAtomStartIndex = position - 1; - ClassSetContents secondAtom = parseCharClassAtom(nextC); + ClassSetContents secondAtom = parseCharClassAtomInner(nextC); // Runtime Semantics: CharacterRangeOrUnion(firstAtom, secondAtom) if (!firstAtom.isAllowedInRange() || !secondAtom.isAllowedInRange()) { handleCCRangeWithPredefCharClass(startPos, firstAtom, secondAtom); - addCharClassAtom(firstAtom); - addCharClassAtom(secondAtom); - curCharClass.addRange('-', '-'); + // no syntax error thrown, so we have to emit the range as three separate atoms + position = charClassCurAtomStartIndex - 1; + charClassEmitInvalidRangeAtoms = 2; + return firstAtom; } else { if (secondAtom.getCodePoint() < firstAtom.getCodePoint()) { throw handleCCRangeOutOfOrder(startPos); } else { - curCharClass.addRange(firstAtom.getCodePoint(), secondAtom.getCodePoint()); + return ClassSetContents.createRange(firstAtom.getCodePoint(), secondAtom.getCodePoint()); } } } } else { - addCharClassAtom(firstAtom); + return firstAtom; } } - private void addCharClassAtom(ClassSetContents atom) { - assert atom.isCodePointSetOnly(); - curCharClass.addSet(atom.getCodePointSet()); - } - private ClassSetContents parseEscapeCharClass(char c) throws RegexSyntaxException { if (isPredefCharClass(c)) { return ClassSetContents.createCharacterClass(getPredefinedCharClass(c)); @@ -1461,19 +1475,19 @@ public RegexSyntaxException syntaxError(String msg) { return RegexSyntaxException.createPattern(source, msg, getLastAtomPosition()); } - private static boolean isPredefCharClass(char c) { - return PREDEFINED_CHAR_CLASSES.get(c); - } - - protected static boolean isDecimalDigit(int c) { + public static boolean isDecimalDigit(int c) { return '0' <= c && c <= '9'; } - protected static boolean isOctalDigit(int c) { + public static boolean isOctalDigit(int c) { return '0' <= c && c <= '7'; } - protected static boolean isHexDigit(int c) { + public static boolean isHexDigit(int c) { return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'; } + + public static boolean isAscii(int c) { + return Integer.compareUnsigned(c, 128) < 0; + } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java index 6bb029e6a3f9..2b698594f469 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/Token.java @@ -71,7 +71,11 @@ public enum Kind { lookAheadAssertionBegin, lookBehindAssertionBegin, groupEnd, + literalChar, charClass, + charClassBegin, + charClassAtom, + charClassEnd, classSet, inlineFlags, conditionalBackreference @@ -87,6 +91,8 @@ public enum Kind { private static final Token ALTERNATION = new Token(Kind.alternation); private static final Token CAPTURE_GROUP_BEGIN = new Token(Kind.captureGroupBegin); private static final Token NON_CAPTURE_GROUP_BEGIN = new Token(Kind.nonCaptureGroupBegin); + private static final Token CHAR_CLASS_BEGIN = new Token(Kind.charClassBegin); + private static final Token CHAR_CLASS_END = new Token(Kind.charClassEnd); private static final Token LOOK_AHEAD_ASSERTION_BEGIN = new LookAheadAssertionBegin(false); private static final Token NEGATIVE_LOOK_AHEAD_ASSERTION_BEGIN = new LookAheadAssertionBegin(true); private static final Token LOOK_BEHIND_ASSERTION_BEGIN = new LookBehindAssertionBegin(false); @@ -157,6 +163,10 @@ public static Quantifier createQuantifier(int min, int max, boolean greedy) { return new Quantifier(min, max, greedy); } + public static LiteralCharacter createLiteralCharacter(int codePoint) { + return new LiteralCharacter(codePoint); + } + public static CharacterClass createCharClass(CodePointSet codePointSet) { return new CharacterClass(codePointSet, false); } @@ -169,6 +179,18 @@ public static ClassSet createClassSetExpression(ClassSetContents contents) { return new ClassSet(contents); } + public static Token createCharacterClassBegin() { + return CHAR_CLASS_BEGIN; + } + + public static Token createCharacterClassAtom(CodePointSet contents, boolean isPosixCollationEquivalenceClass) { + return new CharacterClassAtom(contents, isPosixCollationEquivalenceClass); + } + + public static Token createCharacterClassEnd() { + return CHAR_CLASS_END; + } + public static Token createLookAheadAssertionBegin(boolean negated) { return negated ? NEGATIVE_LOOK_AHEAD_ASSERTION_BEGIN : LOOK_AHEAD_ASSERTION_BEGIN; } @@ -359,6 +381,52 @@ public JsonObject toJson() { } } + public static final class LiteralCharacter extends Token { + + private final int codePoint; + + public LiteralCharacter(int codePoint) { + super(Kind.literalChar); + this.codePoint = codePoint; + } + + @TruffleBoundary + @Override + public JsonObject toJson() { + return super.toJson().append(Json.prop("codePoint", codePoint)); + } + + public int getCodePoint() { + return codePoint; + } + } + + public static final class CharacterClassAtom extends Token { + + private final CodePointSet contents; + private final boolean isPosixCollationEquivalenceClass; + + public CharacterClassAtom(CodePointSet contents, boolean isPosixCollationEquivalenceClass) { + super(Kind.charClassAtom); + this.contents = contents; + this.isPosixCollationEquivalenceClass = isPosixCollationEquivalenceClass; + } + + @TruffleBoundary + @Override + public JsonObject toJson() { + return super.toJson().append(Json.prop("contents", contents)); + } + + public CodePointSet getContents() { + return contents; + } + + public boolean isPosixCollationEquivalenceClass() { + return isPosixCollationEquivalenceClass; + } + } + public static final class CharacterClass extends Token { private final CodePointSet codePointSet; @@ -420,7 +488,7 @@ public BackReference(Token.Kind kind, int[] groupNumbers, boolean namedReference @TruffleBoundary @Override public JsonObject toJson() { - return super.toJson().append(Json.prop("groupNumbers", Arrays.stream(groupNumbers).mapToObj(x -> Json.val(x)))); + return super.toJson().append(Json.prop("groupNumbers", Arrays.stream(groupNumbers).mapToObj(Json::val))); } public int[] getGroupNumbers() { diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java index e91b67d0d5e1..07f154078c2f 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/ECMAScriptFlavor.java @@ -43,7 +43,7 @@ import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.parser.JSRegexParser; import com.oracle.truffle.regex.tregex.parser.JSRegexValidator; import com.oracle.truffle.regex.tregex.parser.RegexParser; @@ -73,9 +73,9 @@ public RegexParser createParser(RegexLanguage language, RegexSource source, Comp @Override public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) { if (ast.getFlags().isEitherUnicode()) { - return CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptUnicode.getEqualsPredicate(); + return CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode.getEqualsPredicate(); } else { - return CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptNonUnicode.getEqualsPredicate(); + return CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptNonUnicode.getEqualsPredicate(); } } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java new file mode 100644 index 000000000000..c47e6253b6e5 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2023, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.regex.tregex.parser.flavors; + +import org.graalvm.collections.EconomicMap; + +import com.oracle.truffle.regex.charset.CodePointSet; + +final class OracleDBConstants { + + // This map contains the character sets of POSIX character classes like [[:alpha:]] and + // [[:punct:]]. + static final EconomicMap POSIX_CHAR_CLASSES = EconomicMap.create(12); + // \w + static final CodePointSet WORD_CHARACTERS; + + static { + + /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + POSIX_CHAR_CLASSES.put("alpha", CodePointSet.createNoDedup( + 0x000041, 0x00005a, 0x000061, 0x00007a, 0x0000aa, 0x0000aa, 0x0000b5, 0x0000b5, 0x0000ba, 0x0000ba, 0x0000c0, 0x0000d6, 0x0000d8, 0x0000f6, 0x0000f8, 0x0002b8, 0x0002bb, + 0x0002c1, 0x0002c7, 0x0002c7, 0x0002c9, 0x0002cb, 0x0002ce, 0x0002ce, 0x0002d1, 0x0002d1, 0x0002d9, 0x0002d9, 0x0002e0, 0x0002e4, 0x0002ec, 0x0002ec, 0x0002ee, 0x0002ee, + 0x000300, 0x000374, 0x000376, 0x00037d, 0x00037f, 0x000383, 0x000386, 0x000386, 0x000388, 0x0003f5, 0x0003f7, 0x000481, 0x000483, 0x000559, 0x000560, 0x000588, 0x00058b, + 0x00058c, 0x000590, 0x0005af, 0x0005ba, 0x0005ba, 0x0005c5, 0x0005c5, 0x0005c7, 0x0005f2, 0x0005f5, 0x0005ff, 0x000610, 0x00061a, 0x00061d, 0x00061d, 0x000620, 0x00065f, + 0x00066e, 0x0006d3, 0x0006d5, 0x0006dc, 0x0006df, 0x0006e8, 0x0006ea, 0x0006ef, 0x0006fa, 0x0006fc, 0x0006ff, 0x0006ff, 0x00070e, 0x00070e, 0x000710, 0x0007bf, 0x0007ca, + 0x0007f5, 0x0007fa, 0x0007fd, 0x000800, 0x00082f, 0x00083f, 0x00085d, 0x00085f, 0x0008e1, 0x0008e3, 0x000900, 0x000904, 0x00093b, 0x00094e, 0x000963, 0x000971, 0x0009e5, + 0x0009f0, 0x0009f1, 0x0009fc, 0x0009fc, 0x0009fe, 0x000a65, 0x000a70, 0x000a75, 0x000a77, 0x000ae5, 0x000af2, 0x000b65, 0x000b71, 0x000b71, 0x000b78, 0x000be5, 0x000bfb, + 0x000c65, 0x000c70, 0x000c76, 0x000c80, 0x000c83, 0x000c85, 0x000ce5, 0x000cf0, 0x000d4e, 0x000d50, 0x000d57, 0x000d5f, 0x000d65, 0x000d7a, 0x000de5, 0x000df0, 0x000df3, + 0x000df5, 0x000e2e, 0x000e30, 0x000e3e, 0x000e40, 0x000e45, 0x000e47, 0x000e4d, 0x000e5c, 0x000ecf, 0x000eda, 0x000f00, 0x000f18, 0x000f19, 0x000f35, 0x000f35, 0x000f37, + 0x000f37, 0x000f39, 0x000f39, 0x000f3e, 0x000f84, 0x000f86, 0x000fbd, 0x000fc6, 0x000fc6, 0x000fcd, 0x000fcd, 0x000fdb, 0x00103f, 0x001050, 0x00108f, 0x00109a, 0x00109d, + 0x0010a0, 0x0010fa, 0x0010fc, 0x00135f, 0x00137d, 0x00138f, 0x00139a, 0x0013ff, 0x001401, 0x00166c, 0x00166f, 0x00167f, 0x001681, 0x00169a, 0x00169d, 0x0016ea, 0x0016ee, + 0x001734, 0x001737, 0x0017d3, 0x0017d7, 0x0017d7, 0x0017dc, 0x0017df, 0x0017ea, 0x0017ef, 0x0017fa, 0x0017ff, 0x00180b, 0x00180d, 0x00180f, 0x00180f, 0x00181a, 0x00193f, + 0x001941, 0x001943, 0x001950, 0x0019cf, 0x0019db, 0x0019dd, 0x001a00, 0x001a1d, 0x001a20, 0x001a7f, 0x001a8a, 0x001a8f, 0x001a9a, 0x001a9f, 0x001aa7, 0x001aa7, 0x001aae, + 0x001b4f, 0x001b6b, 0x001b73, 0x001b7d, 0x001baf, 0x001bba, 0x001bfb, 0x001c00, 0x001c3a, 0x001c4a, 0x001c4f, 0x001c5a, 0x001c7d, 0x001c80, 0x001cbf, 0x001cc8, 0x001cd2, + 0x001cd4, 0x001fbc, 0x001fbe, 0x001fbe, 0x001fc2, 0x001fcc, 0x001fd0, 0x001fdc, 0x001fe0, 0x001fec, 0x001ff0, 0x001ffc, 0x001fff, 0x001fff, 0x002015, 0x002015, 0x002065, + 0x002065, 0x002071, 0x002073, 0x00208f, 0x00209f, 0x0020c0, 0x0020ff, 0x002102, 0x002102, 0x002107, 0x002107, 0x00210a, 0x002113, 0x002115, 0x002115, 0x002119, 0x00211d, + 0x002124, 0x002124, 0x002128, 0x002128, 0x00212a, 0x00212a, 0x00212c, 0x00212d, 0x00212f, 0x002139, 0x00213c, 0x00213f, 0x002145, 0x002149, 0x00214e, 0x00214e, 0x002160, + 0x002188, 0x00218c, 0x00218f, 0x002400, 0x002421, 0x002427, 0x00243f, 0x00244b, 0x002487, 0x002b74, 0x002b75, 0x002b96, 0x002b97, 0x002c00, 0x002ce4, 0x002ceb, 0x002cf8, + 0x002d00, 0x002d6f, 0x002d71, 0x002dff, 0x002e2f, 0x002e2f, 0x002e50, 0x002fef, 0x002ffc, 0x002fff, 0x003003, 0x003003, 0x003005, 0x003007, 0x00301c, 0x00301c, 0x003021, + 0x00302f, 0x003031, 0x003035, 0x003038, 0x00303c, 0x003040, 0x00309a, 0x00309d, 0x00309f, 0x0030a1, 0x0030fa, 0x0030fc, 0x00318f, 0x003192, 0x0031bf, 0x0031e4, 0x0031ff, + 0x00321f, 0x00321f, 0x003400, 0x004dbf, 0x004e00, 0x00a48f, 0x00a4c7, 0x00a4fd, 0x00a500, 0x00a60c, 0x00a610, 0x00a61f, 0x00a62a, 0x00a672, 0x00a674, 0x00a67d, 0x00a67f, + 0x00a6f1, 0x00a6f8, 0x00a6ff, 0x00a717, 0x00a71f, 0x00a722, 0x00a788, 0x00a78b, 0x00a827, 0x00a82c, 0x00a82f, 0x00a83a, 0x00a873, 0x00a878, 0x00a8cd, 0x00a8da, 0x00a8f7, + 0x00a8fb, 0x00a8fb, 0x00a8fd, 0x00a8ff, 0x00a90a, 0x00a92d, 0x00a930, 0x00a95e, 0x00a960, 0x00a9c0, 0x00a9ce, 0x00a9cf, 0x00a9da, 0x00a9dd, 0x00a9e0, 0x00a9ef, 0x00a9fa, + 0x00aa4f, 0x00aa5a, 0x00aa5b, 0x00aa60, 0x00aa76, 0x00aa7a, 0x00aadd, 0x00aae0, 0x00aaef, 0x00aaf2, 0x00ab5a, 0x00ab5c, 0x00abea, 0x00abec, 0x00abef, 0x00abfa, 0x00d7ff, + 0x00e000, 0x00fb1e, 0x00fb20, 0x00fb28, 0x00fb2a, 0x00fbb1, 0x00fbc2, 0x00fd3d, 0x00fd40, 0x00fdfb, 0x00fdfe, 0x00fe0f, 0x00fe1a, 0x00fe2f, 0x00fe53, 0x00fe53, 0x00fe67, + 0x00fe67, 0x00fe6c, 0x00fefe, 0x00ff00, 0x00ff00, 0x00ff21, 0x00ff3a, 0x00ff3f, 0x00ff3f, 0x00ff41, 0x00ff5a, 0x00ff66, 0x00ffdf, 0x00ffe7, 0x00ffe7, 0x00ffef, 0x00fff8, + 0x00fffd, 0x0100ff, 0x010103, 0x010106, 0x010134, 0x010136, 0x010140, 0x010174, 0x01018f, 0x01018f, 0x01019c, 0x01019f, 0x0101a1, 0x0101cf, 0x0101fd, 0x0102e0, 0x0102fc, + 0x01031f, 0x010324, 0x01039e, 0x0103a0, 0x0103cf, 0x0103d1, 0x01049f, 0x0104aa, 0x01056e, 0x010570, 0x010856, 0x010860, 0x010876, 0x010880, 0x0108a6, 0x0108b0, 0x0108fa, + 0x010900, 0x010915, 0x01091c, 0x01091e, 0x010920, 0x01093e, 0x010940, 0x0109bb, 0x0109be, 0x0109bf, 0x0109d0, 0x0109d1, 0x010a00, 0x010a3f, 0x010a49, 0x010a4f, 0x010a59, + 0x010a7c, 0x010a80, 0x010a9c, 0x010aa0, 0x010ac7, 0x010ac9, 0x010aea, 0x010af7, 0x010b38, 0x010b40, 0x010b57, 0x010b60, 0x010b77, 0x010b80, 0x010b98, 0x010b9d, 0x010ba8, + 0x010bb0, 0x010cf9, 0x010d00, 0x010d2f, 0x010d3a, 0x010e5f, 0x010e7f, 0x010f1c, 0x010f27, 0x010f50, 0x010f5a, 0x011046, 0x01104e, 0x011051, 0x011070, 0x0110ba, 0x0110c2, + 0x0110cc, 0x0110ce, 0x0110ef, 0x0110fa, 0x011135, 0x011144, 0x011173, 0x011176, 0x0111c4, 0x0111c9, 0x0111cc, 0x0111ce, 0x0111cf, 0x0111da, 0x0111da, 0x0111dc, 0x0111dc, + 0x0111e0, 0x0111e0, 0x0111f5, 0x011237, 0x01123e, 0x0112a8, 0x0112aa, 0x0112ef, 0x0112fa, 0x01144a, 0x01145a, 0x01145a, 0x01145c, 0x01145c, 0x01145e, 0x0114c5, 0x0114c7, + 0x0114cf, 0x0114da, 0x0115c0, 0x0115d8, 0x011640, 0x011644, 0x01164f, 0x01165a, 0x01165f, 0x01166d, 0x0116bf, 0x0116ca, 0x01172f, 0x011740, 0x01183a, 0x01183c, 0x0118df, + 0x0118f3, 0x0119e1, 0x0119e3, 0x011a3e, 0x011a47, 0x011a99, 0x011a9d, 0x011a9d, 0x011aa3, 0x011c40, 0x011c46, 0x011c4f, 0x011c6d, 0x011c6f, 0x011c72, 0x011d4f, 0x011d5a, + 0x011d9f, 0x011daa, 0x011ef6, 0x011ef9, 0x011fbf, 0x011ff2, 0x011ffe, 0x012000, 0x01246f, 0x012475, 0x01342f, 0x013439, 0x016a5f, 0x016a6a, 0x016a6d, 0x016a70, 0x016af4, + 0x016af6, 0x016b36, 0x016b40, 0x016b43, 0x016b46, 0x016b4f, 0x016b5a, 0x016b5a, 0x016b62, 0x016e7f, 0x016e9b, 0x016fe1, 0x016fe3, 0x01bc9b, 0x01bc9d, 0x01bc9e, 0x01bca4, + 0x01cfff, 0x01d0f6, 0x01d0ff, 0x01d127, 0x01d128, 0x01d165, 0x01d169, 0x01d16d, 0x01d172, 0x01d17b, 0x01d182, 0x01d185, 0x01d18b, 0x01d1aa, 0x01d1ad, 0x01d1e9, 0x01d1ff, + 0x01d242, 0x01d244, 0x01d246, 0x01d2df, 0x01d2f4, 0x01d2ff, 0x01d357, 0x01d35f, 0x01d379, 0x01d6c0, 0x01d6c2, 0x01d6da, 0x01d6dc, 0x01d6fa, 0x01d6fc, 0x01d714, 0x01d716, + 0x01d734, 0x01d736, 0x01d74e, 0x01d750, 0x01d76e, 0x01d770, 0x01d788, 0x01d78a, 0x01d7a8, 0x01d7aa, 0x01d7c2, 0x01d7c4, 0x01d7cd, 0x01da00, 0x01da36, 0x01da3b, 0x01da6c, + 0x01da75, 0x01da75, 0x01da84, 0x01da84, 0x01da8c, 0x01e13f, 0x01e14a, 0x01e14e, 0x01e150, 0x01e2ef, 0x01e2fa, 0x01e2fe, 0x01e300, 0x01e8c6, 0x01e8d0, 0x01e94f, 0x01e95a, + 0x01e95d, 0x01e960, 0x01ec70, 0x01ecb5, 0x01ed00, 0x01ed3e, 0x01eeef, 0x01eef2, 0x01efff, 0x01f02c, 0x01f02f, 0x01f094, 0x01f09f, 0x01f0af, 0x01f0b0, 0x01f0c0, 0x01f0c0, + 0x01f0d0, 0x01f0d0, 0x01f0f6, 0x01f0ff, 0x01f10d, 0x01f10f, 0x01f16d, 0x01f16f, 0x01f1ad, 0x01f1e5, 0x01f203, 0x01f20f, 0x01f23c, 0x01f23f, 0x01f249, 0x01f24f, 0x01f252, + 0x01f25f, 0x01f266, 0x01f2ff, 0x01f6d6, 0x01f6df, 0x01f6ed, 0x01f6ef, 0x01f6fb, 0x01f6ff, 0x01f774, 0x01f77f, 0x01f7d9, 0x01f7df, 0x01f7ec, 0x01f7ff, 0x01f80c, 0x01f80f, + 0x01f848, 0x01f84f, 0x01f85a, 0x01f85f, 0x01f888, 0x01f88f, 0x01f8ae, 0x01f8ff, 0x01f90c, 0x01f90c, 0x01f972, 0x01f972, 0x01f977, 0x01f979, 0x01f9a3, 0x01f9a4, 0x01f9ab, + 0x01f9ad, 0x01f9cb, 0x01f9cc, 0x01fa54, 0x01fa5f, 0x01fa6e, 0x01fa6f, 0x01fa74, 0x01fa77, 0x01fa7b, 0x01fa7f, 0x01fa83, 0x01fa8f, 0x01fa96, 0x0e0000, 0x0e0002, 0x0e001f, + 0x0e0080, 0x10fffd)); + + POSIX_CHAR_CLASSES.put("blank", CodePointSet.createNoDedup( + 0x000020, 0x000020, 0x001680, 0x001680, 0x002000, 0x00200a, 0x002028, 0x002029, 0x00202f, 0x00202f, 0x00205f, 0x00205f, 0x003000, 0x003000)); + + POSIX_CHAR_CLASSES.put("cntrl", CodePointSet.createNoDedup( + 0x000000, 0x00001f, 0x00007f, 0x00009f, 0x0000ad, 0x0000ad, 0x000600, 0x000605, 0x00061c, 0x00061c, 0x0006dd, 0x0006dd, 0x00070f, 0x00070f, 0x0008e2, 0x0008e2, 0x00180e, + 0x00180e, 0x00200b, 0x00200f, 0x00202a, 0x00202e, 0x002060, 0x002064, 0x002066, 0x00206f, 0x00feff, 0x00feff, 0x00fff9, 0x00fffb, 0x0110bd, 0x0110bd, 0x0110cd, 0x0110cd, + 0x013430, 0x013438, 0x01bca0, 0x01bca3, 0x01d173, 0x01d17a, 0x0e0001, 0x0e0001, 0x0e0020, 0x0e007f)); + + POSIX_CHAR_CLASSES.put("digit", CodePointSet.createNoDedup( + 0x000030, 0x000039, 0x000660, 0x000669, 0x0006f0, 0x0006f9, 0x0007c0, 0x0007c9, 0x000966, 0x00096f, 0x0009e6, 0x0009ef, 0x000a66, 0x000a6f, 0x000ae6, 0x000aef, 0x000b66, + 0x000b6f, 0x000be6, 0x000bef, 0x000c66, 0x000c6f, 0x000ce6, 0x000cef, 0x000d66, 0x000d6f, 0x000de6, 0x000def, 0x000e50, 0x000e59, 0x000ed0, 0x000ed9, 0x000f20, 0x000f29, + 0x001040, 0x001049, 0x001090, 0x001099, 0x0016ee, 0x0016f0, 0x0017e0, 0x0017e9, 0x001810, 0x001819, 0x001946, 0x00194f, 0x0019d0, 0x0019d9, 0x001a80, 0x001a89, 0x001a90, + 0x001a99, 0x001b50, 0x001b59, 0x001bb0, 0x001bb9, 0x001c40, 0x001c49, 0x001c50, 0x001c59, 0x002160, 0x002182, 0x002185, 0x002188, 0x003007, 0x003007, 0x003021, 0x003029, + 0x003038, 0x00303a, 0x00a620, 0x00a629, 0x00a6e6, 0x00a6ef, 0x00a8d0, 0x00a8d9, 0x00a900, 0x00a909, 0x00a9d0, 0x00a9d9, 0x00a9f0, 0x00a9f9, 0x00aa50, 0x00aa59, 0x00abf0, + 0x00abf9, 0x00ff10, 0x00ff19, 0x010140, 0x010174, 0x010341, 0x010341, 0x01034a, 0x01034a, 0x0103d1, 0x0103d5, 0x0104a0, 0x0104a9, 0x010d30, 0x010d39, 0x011066, 0x01106f, + 0x0110f0, 0x0110f9, 0x011136, 0x01113f, 0x0111d0, 0x0111d9, 0x0112f0, 0x0112f9, 0x011450, 0x011459, 0x0114d0, 0x0114d9, 0x011650, 0x011659, 0x0116c0, 0x0116c9, 0x011730, + 0x011739, 0x0118e0, 0x0118e9, 0x011c50, 0x011c59, 0x011d50, 0x011d59, 0x011da0, 0x011da9, 0x012400, 0x01246e, 0x016a60, 0x016a69, 0x016b50, 0x016b59, 0x01d7ce, 0x01d7ff, + 0x01e140, 0x01e149, 0x01e2f0, 0x01e2f9, 0x01e950, 0x01e959)); + + POSIX_CHAR_CLASSES.put("graph", CodePointSet.createNoDedup( + 0x000021, 0x00007e, 0x0000a0, 0x0000ac, 0x0000ae, 0x0000b1, 0x0000b4, 0x0000b8, 0x0000ba, 0x0000bb, 0x0000bf, 0x0005ff, 0x000606, 0x00061b, 0x00061d, 0x0006dc, 0x0006de, + 0x00070e, 0x000710, 0x0008e1, 0x0008e3, 0x0009f3, 0x0009fa, 0x000b71, 0x000b78, 0x000bef, 0x000bf3, 0x000c77, 0x000c7f, 0x000d57, 0x000d5f, 0x000d6f, 0x000d79, 0x000f29, + 0x000f34, 0x001368, 0x00137d, 0x00167f, 0x001681, 0x0017ef, 0x0017fa, 0x00180d, 0x00180f, 0x0019d9, 0x0019db, 0x001fff, 0x002010, 0x002027, 0x002030, 0x00205e, 0x002065, + 0x002065, 0x002071, 0x002073, 0x00207a, 0x00207f, 0x00208a, 0x00212a, 0x00212c, 0x00214f, 0x002160, 0x002188, 0x00218a, 0x002487, 0x00249c, 0x0024e9, 0x002500, 0x002775, + 0x002794, 0x002cfc, 0x002cfe, 0x002fff, 0x003001, 0x00321f, 0x00322a, 0x003247, 0x003250, 0x003250, 0x003260, 0x00327f, 0x00328a, 0x0032b0, 0x0032c0, 0x00a82f, 0x00a836, + 0x00d7ff, 0x00e000, 0x00fefe, 0x00ff00, 0x00fff8, 0x00fffc, 0x010106, 0x010134, 0x010174, 0x010179, 0x010189, 0x01018c, 0x0102e0, 0x0102fc, 0x01031f, 0x010324, 0x010857, + 0x010860, 0x010878, 0x010880, 0x0108a6, 0x0108b0, 0x0108fa, 0x010900, 0x010915, 0x01091c, 0x0109bb, 0x0109be, 0x0109bf, 0x0109d0, 0x0109d1, 0x010a00, 0x010a3f, 0x010a49, + 0x010a7c, 0x010a7f, 0x010a9c, 0x010aa0, 0x010aea, 0x010af0, 0x010b57, 0x010b60, 0x010b77, 0x010b80, 0x010ba8, 0x010bb0, 0x010cf9, 0x010d00, 0x010e5f, 0x010e7f, 0x010f1c, + 0x010f27, 0x010f50, 0x010f55, 0x011051, 0x011066, 0x0110bc, 0x0110be, 0x0110cc, 0x0110ce, 0x0111e0, 0x0111f5, 0x011739, 0x01173c, 0x0118e9, 0x0118f3, 0x011c59, 0x011c6d, + 0x011fbf, 0x011fd5, 0x01342f, 0x013439, 0x016b5a, 0x016b62, 0x016e7f, 0x016e97, 0x01bc9f, 0x01bca4, 0x01d172, 0x01d17b, 0x01d2df, 0x01d2f4, 0x01d35f, 0x01d379, 0x01e8c6, + 0x01e8d0, 0x01ec70, 0x01ecac, 0x01ecac, 0x01ecb0, 0x01ecb0, 0x01ecb5, 0x01ed00, 0x01ed2e, 0x01ed2e, 0x01ed3e, 0x01f0ff, 0x01f10d, 0x0e0000, 0x0e0002, 0x0e001f, 0x0e0080, + 0x10fffd)); + + POSIX_CHAR_CLASSES.put("lower", CodePointSet.createNoDedup( + 0x000061, 0x00007a, 0x0000b5, 0x0000b5, 0x0000df, 0x0000f6, 0x0000f8, 0x0000ff, 0x000101, 0x000101, 0x000103, 0x000103, 0x000105, 0x000105, 0x000107, 0x000107, 0x000109, + 0x000109, 0x00010b, 0x00010b, 0x00010d, 0x00010d, 0x00010f, 0x00010f, 0x000111, 0x000111, 0x000113, 0x000113, 0x000115, 0x000115, 0x000117, 0x000117, 0x000119, 0x000119, + 0x00011b, 0x00011b, 0x00011d, 0x00011d, 0x00011f, 0x00011f, 0x000121, 0x000121, 0x000123, 0x000123, 0x000125, 0x000125, 0x000127, 0x000127, 0x000129, 0x000129, 0x00012b, + 0x00012b, 0x00012d, 0x00012d, 0x00012f, 0x00012f, 0x000131, 0x000131, 0x000133, 0x000133, 0x000135, 0x000135, 0x000137, 0x000138, 0x00013a, 0x00013a, 0x00013c, 0x00013c, + 0x00013e, 0x00013e, 0x000140, 0x000140, 0x000142, 0x000142, 0x000144, 0x000144, 0x000146, 0x000146, 0x000148, 0x000149, 0x00014b, 0x00014b, 0x00014d, 0x00014d, 0x00014f, + 0x00014f, 0x000151, 0x000151, 0x000153, 0x000153, 0x000155, 0x000155, 0x000157, 0x000157, 0x000159, 0x000159, 0x00015b, 0x00015b, 0x00015d, 0x00015d, 0x00015f, 0x00015f, + 0x000161, 0x000161, 0x000163, 0x000163, 0x000165, 0x000165, 0x000167, 0x000167, 0x000169, 0x000169, 0x00016b, 0x00016b, 0x00016d, 0x00016d, 0x00016f, 0x00016f, 0x000171, + 0x000171, 0x000173, 0x000173, 0x000175, 0x000175, 0x000177, 0x000177, 0x00017a, 0x00017a, 0x00017c, 0x00017c, 0x00017e, 0x000180, 0x000183, 0x000183, 0x000185, 0x000185, + 0x000188, 0x000188, 0x00018c, 0x00018d, 0x000192, 0x000192, 0x000195, 0x000195, 0x000199, 0x00019b, 0x00019e, 0x00019e, 0x0001a1, 0x0001a1, 0x0001a3, 0x0001a3, 0x0001a5, + 0x0001a5, 0x0001a8, 0x0001a8, 0x0001aa, 0x0001ab, 0x0001ad, 0x0001ad, 0x0001b0, 0x0001b0, 0x0001b4, 0x0001b4, 0x0001b6, 0x0001b6, 0x0001b9, 0x0001ba, 0x0001bd, 0x0001bf, + 0x0001c5, 0x0001c6, 0x0001c8, 0x0001c9, 0x0001cb, 0x0001cc, 0x0001ce, 0x0001ce, 0x0001d0, 0x0001d0, 0x0001d2, 0x0001d2, 0x0001d4, 0x0001d4, 0x0001d6, 0x0001d6, 0x0001d8, + 0x0001d8, 0x0001da, 0x0001da, 0x0001dc, 0x0001dd, 0x0001df, 0x0001df, 0x0001e1, 0x0001e1, 0x0001e3, 0x0001e3, 0x0001e5, 0x0001e5, 0x0001e7, 0x0001e7, 0x0001e9, 0x0001e9, + 0x0001eb, 0x0001eb, 0x0001ed, 0x0001ed, 0x0001ef, 0x0001f0, 0x0001f2, 0x0001f3, 0x0001f5, 0x0001f5, 0x0001f9, 0x0001f9, 0x0001fb, 0x0001fb, 0x0001fd, 0x0001fd, 0x0001ff, + 0x0001ff, 0x000201, 0x000201, 0x000203, 0x000203, 0x000205, 0x000205, 0x000207, 0x000207, 0x000209, 0x000209, 0x00020b, 0x00020b, 0x00020d, 0x00020d, 0x00020f, 0x00020f, + 0x000211, 0x000211, 0x000213, 0x000213, 0x000215, 0x000215, 0x000217, 0x000217, 0x000219, 0x000219, 0x00021b, 0x00021b, 0x00021d, 0x00021d, 0x00021f, 0x00021f, 0x000221, + 0x000221, 0x000223, 0x000223, 0x000225, 0x000225, 0x000227, 0x000227, 0x000229, 0x000229, 0x00022b, 0x00022b, 0x00022d, 0x00022d, 0x00022f, 0x00022f, 0x000231, 0x000231, + 0x000233, 0x000239, 0x00023c, 0x00023c, 0x00023f, 0x000240, 0x000242, 0x000242, 0x000247, 0x000247, 0x000249, 0x000249, 0x00024b, 0x00024b, 0x00024d, 0x00024d, 0x00024f, + 0x000293, 0x000295, 0x0002af, 0x000345, 0x000345, 0x000371, 0x000371, 0x000373, 0x000373, 0x000377, 0x000377, 0x00037b, 0x00037d, 0x000390, 0x000390, 0x0003ac, 0x0003ce, + 0x0003d0, 0x0003d1, 0x0003d5, 0x0003d7, 0x0003d9, 0x0003d9, 0x0003db, 0x0003db, 0x0003dd, 0x0003dd, 0x0003df, 0x0003df, 0x0003e1, 0x0003e1, 0x0003e3, 0x0003e3, 0x0003e5, + 0x0003e5, 0x0003e7, 0x0003e7, 0x0003e9, 0x0003e9, 0x0003eb, 0x0003eb, 0x0003ed, 0x0003ed, 0x0003ef, 0x0003f3, 0x0003f5, 0x0003f5, 0x0003f8, 0x0003f8, 0x0003fb, 0x0003fc, + 0x000430, 0x00045f, 0x000461, 0x000461, 0x000463, 0x000463, 0x000465, 0x000465, 0x000467, 0x000467, 0x000469, 0x000469, 0x00046b, 0x00046b, 0x00046d, 0x00046d, 0x00046f, + 0x00046f, 0x000471, 0x000471, 0x000473, 0x000473, 0x000475, 0x000475, 0x000477, 0x000477, 0x000479, 0x000479, 0x00047b, 0x00047b, 0x00047d, 0x00047d, 0x00047f, 0x00047f, + 0x000481, 0x000481, 0x00048b, 0x00048b, 0x00048d, 0x00048d, 0x00048f, 0x00048f, 0x000491, 0x000491, 0x000493, 0x000493, 0x000495, 0x000495, 0x000497, 0x000497, 0x000499, + 0x000499, 0x00049b, 0x00049b, 0x00049d, 0x00049d, 0x00049f, 0x00049f, 0x0004a1, 0x0004a1, 0x0004a3, 0x0004a3, 0x0004a5, 0x0004a5, 0x0004a7, 0x0004a7, 0x0004a9, 0x0004a9, + 0x0004ab, 0x0004ab, 0x0004ad, 0x0004ad, 0x0004af, 0x0004af, 0x0004b1, 0x0004b1, 0x0004b3, 0x0004b3, 0x0004b5, 0x0004b5, 0x0004b7, 0x0004b7, 0x0004b9, 0x0004b9, 0x0004bb, + 0x0004bb, 0x0004bd, 0x0004bd, 0x0004bf, 0x0004bf, 0x0004c2, 0x0004c2, 0x0004c4, 0x0004c4, 0x0004c6, 0x0004c6, 0x0004c8, 0x0004c8, 0x0004ca, 0x0004ca, 0x0004cc, 0x0004cc, + 0x0004ce, 0x0004cf, 0x0004d1, 0x0004d1, 0x0004d3, 0x0004d3, 0x0004d5, 0x0004d5, 0x0004d7, 0x0004d7, 0x0004d9, 0x0004d9, 0x0004db, 0x0004db, 0x0004dd, 0x0004dd, 0x0004df, + 0x0004df, 0x0004e1, 0x0004e1, 0x0004e3, 0x0004e3, 0x0004e5, 0x0004e5, 0x0004e7, 0x0004e7, 0x0004e9, 0x0004e9, 0x0004eb, 0x0004eb, 0x0004ed, 0x0004ed, 0x0004ef, 0x0004ef, + 0x0004f1, 0x0004f1, 0x0004f3, 0x0004f3, 0x0004f5, 0x0004f5, 0x0004f7, 0x0004f7, 0x0004f9, 0x0004f9, 0x0004fb, 0x0004fb, 0x0004fd, 0x0004fd, 0x0004ff, 0x0004ff, 0x000501, + 0x000501, 0x000503, 0x000503, 0x000505, 0x000505, 0x000507, 0x000507, 0x000509, 0x000509, 0x00050b, 0x00050b, 0x00050d, 0x00050d, 0x00050f, 0x00050f, 0x000511, 0x000511, + 0x000513, 0x000513, 0x000515, 0x000515, 0x000517, 0x000517, 0x000519, 0x000519, 0x00051b, 0x00051b, 0x00051d, 0x00051d, 0x00051f, 0x00051f, 0x000521, 0x000521, 0x000523, + 0x000523, 0x000525, 0x000525, 0x000527, 0x000527, 0x000529, 0x000529, 0x00052b, 0x00052b, 0x00052d, 0x00052d, 0x00052f, 0x00052f, 0x000560, 0x000588, 0x0010d0, 0x0010fa, + 0x0010fd, 0x0010ff, 0x0013f8, 0x0013fd, 0x001c80, 0x001c88, 0x001d00, 0x001d2b, 0x001d6b, 0x001d77, 0x001d79, 0x001d9a, 0x001e01, 0x001e01, 0x001e03, 0x001e03, 0x001e05, + 0x001e05, 0x001e07, 0x001e07, 0x001e09, 0x001e09, 0x001e0b, 0x001e0b, 0x001e0d, 0x001e0d, 0x001e0f, 0x001e0f, 0x001e11, 0x001e11, 0x001e13, 0x001e13, 0x001e15, 0x001e15, + 0x001e17, 0x001e17, 0x001e19, 0x001e19, 0x001e1b, 0x001e1b, 0x001e1d, 0x001e1d, 0x001e1f, 0x001e1f, 0x001e21, 0x001e21, 0x001e23, 0x001e23, 0x001e25, 0x001e25, 0x001e27, + 0x001e27, 0x001e29, 0x001e29, 0x001e2b, 0x001e2b, 0x001e2d, 0x001e2d, 0x001e2f, 0x001e2f, 0x001e31, 0x001e31, 0x001e33, 0x001e33, 0x001e35, 0x001e35, 0x001e37, 0x001e37, + 0x001e39, 0x001e39, 0x001e3b, 0x001e3b, 0x001e3d, 0x001e3d, 0x001e3f, 0x001e3f, 0x001e41, 0x001e41, 0x001e43, 0x001e43, 0x001e45, 0x001e45, 0x001e47, 0x001e47, 0x001e49, + 0x001e49, 0x001e4b, 0x001e4b, 0x001e4d, 0x001e4d, 0x001e4f, 0x001e4f, 0x001e51, 0x001e51, 0x001e53, 0x001e53, 0x001e55, 0x001e55, 0x001e57, 0x001e57, 0x001e59, 0x001e59, + 0x001e5b, 0x001e5b, 0x001e5d, 0x001e5d, 0x001e5f, 0x001e5f, 0x001e61, 0x001e61, 0x001e63, 0x001e63, 0x001e65, 0x001e65, 0x001e67, 0x001e67, 0x001e69, 0x001e69, 0x001e6b, + 0x001e6b, 0x001e6d, 0x001e6d, 0x001e6f, 0x001e6f, 0x001e71, 0x001e71, 0x001e73, 0x001e73, 0x001e75, 0x001e75, 0x001e77, 0x001e77, 0x001e79, 0x001e79, 0x001e7b, 0x001e7b, + 0x001e7d, 0x001e7d, 0x001e7f, 0x001e7f, 0x001e81, 0x001e81, 0x001e83, 0x001e83, 0x001e85, 0x001e85, 0x001e87, 0x001e87, 0x001e89, 0x001e89, 0x001e8b, 0x001e8b, 0x001e8d, + 0x001e8d, 0x001e8f, 0x001e8f, 0x001e91, 0x001e91, 0x001e93, 0x001e93, 0x001e95, 0x001e9d, 0x001e9f, 0x001e9f, 0x001ea1, 0x001ea1, 0x001ea3, 0x001ea3, 0x001ea5, 0x001ea5, + 0x001ea7, 0x001ea7, 0x001ea9, 0x001ea9, 0x001eab, 0x001eab, 0x001ead, 0x001ead, 0x001eaf, 0x001eaf, 0x001eb1, 0x001eb1, 0x001eb3, 0x001eb3, 0x001eb5, 0x001eb5, 0x001eb7, + 0x001eb7, 0x001eb9, 0x001eb9, 0x001ebb, 0x001ebb, 0x001ebd, 0x001ebd, 0x001ebf, 0x001ebf, 0x001ec1, 0x001ec1, 0x001ec3, 0x001ec3, 0x001ec5, 0x001ec5, 0x001ec7, 0x001ec7, + 0x001ec9, 0x001ec9, 0x001ecb, 0x001ecb, 0x001ecd, 0x001ecd, 0x001ecf, 0x001ecf, 0x001ed1, 0x001ed1, 0x001ed3, 0x001ed3, 0x001ed5, 0x001ed5, 0x001ed7, 0x001ed7, 0x001ed9, + 0x001ed9, 0x001edb, 0x001edb, 0x001edd, 0x001edd, 0x001edf, 0x001edf, 0x001ee1, 0x001ee1, 0x001ee3, 0x001ee3, 0x001ee5, 0x001ee5, 0x001ee7, 0x001ee7, 0x001ee9, 0x001ee9, + 0x001eeb, 0x001eeb, 0x001eed, 0x001eed, 0x001eef, 0x001eef, 0x001ef1, 0x001ef1, 0x001ef3, 0x001ef3, 0x001ef5, 0x001ef5, 0x001ef7, 0x001ef7, 0x001ef9, 0x001ef9, 0x001efb, + 0x001efb, 0x001efd, 0x001efd, 0x001eff, 0x001f07, 0x001f10, 0x001f15, 0x001f20, 0x001f27, 0x001f30, 0x001f37, 0x001f40, 0x001f45, 0x001f50, 0x001f57, 0x001f60, 0x001f67, + 0x001f70, 0x001f7d, 0x001f80, 0x001f87, 0x001f90, 0x001f97, 0x001fa0, 0x001fa7, 0x001fb0, 0x001fb4, 0x001fb6, 0x001fb7, 0x001fbe, 0x001fbe, 0x001fc2, 0x001fc4, 0x001fc6, + 0x001fc7, 0x001fd0, 0x001fd3, 0x001fd6, 0x001fd7, 0x001fe0, 0x001fe7, 0x001ff2, 0x001ff4, 0x001ff6, 0x001ff7, 0x00207f, 0x00207f, 0x00210a, 0x00210a, 0x00210e, 0x00210f, + 0x002113, 0x002113, 0x00212f, 0x00212f, 0x002134, 0x002134, 0x002139, 0x002139, 0x00213c, 0x00213d, 0x002146, 0x002149, 0x00214e, 0x00214e, 0x002170, 0x00217f, 0x002184, + 0x002184, 0x0024d0, 0x0024e9, 0x002c30, 0x002c5e, 0x002c61, 0x002c61, 0x002c65, 0x002c66, 0x002c68, 0x002c68, 0x002c6a, 0x002c6a, 0x002c6c, 0x002c6c, 0x002c71, 0x002c71, + 0x002c73, 0x002c74, 0x002c76, 0x002c7b, 0x002c81, 0x002c81, 0x002c83, 0x002c83, 0x002c85, 0x002c85, 0x002c87, 0x002c87, 0x002c89, 0x002c89, 0x002c8b, 0x002c8b, 0x002c8d, + 0x002c8d, 0x002c8f, 0x002c8f, 0x002c91, 0x002c91, 0x002c93, 0x002c93, 0x002c95, 0x002c95, 0x002c97, 0x002c97, 0x002c99, 0x002c99, 0x002c9b, 0x002c9b, 0x002c9d, 0x002c9d, + 0x002c9f, 0x002c9f, 0x002ca1, 0x002ca1, 0x002ca3, 0x002ca3, 0x002ca5, 0x002ca5, 0x002ca7, 0x002ca7, 0x002ca9, 0x002ca9, 0x002cab, 0x002cab, 0x002cad, 0x002cad, 0x002caf, + 0x002caf, 0x002cb1, 0x002cb1, 0x002cb3, 0x002cb3, 0x002cb5, 0x002cb5, 0x002cb7, 0x002cb7, 0x002cb9, 0x002cb9, 0x002cbb, 0x002cbb, 0x002cbd, 0x002cbd, 0x002cbf, 0x002cbf, + 0x002cc1, 0x002cc1, 0x002cc3, 0x002cc3, 0x002cc5, 0x002cc5, 0x002cc7, 0x002cc7, 0x002cc9, 0x002cc9, 0x002ccb, 0x002ccb, 0x002ccd, 0x002ccd, 0x002ccf, 0x002ccf, 0x002cd1, + 0x002cd1, 0x002cd3, 0x002cd3, 0x002cd5, 0x002cd5, 0x002cd7, 0x002cd7, 0x002cd9, 0x002cd9, 0x002cdb, 0x002cdb, 0x002cdd, 0x002cdd, 0x002cdf, 0x002cdf, 0x002ce1, 0x002ce1, + 0x002ce3, 0x002ce4, 0x002cec, 0x002cec, 0x002cee, 0x002cee, 0x002cf3, 0x002cf3, 0x002d00, 0x002d25, 0x002d27, 0x002d27, 0x002d2d, 0x002d2d, 0x00a641, 0x00a641, 0x00a643, + 0x00a643, 0x00a645, 0x00a645, 0x00a647, 0x00a647, 0x00a649, 0x00a649, 0x00a64b, 0x00a64b, 0x00a64d, 0x00a64d, 0x00a64f, 0x00a64f, 0x00a651, 0x00a651, 0x00a653, 0x00a653, + 0x00a655, 0x00a655, 0x00a657, 0x00a657, 0x00a659, 0x00a659, 0x00a65b, 0x00a65b, 0x00a65d, 0x00a65d, 0x00a65f, 0x00a65f, 0x00a661, 0x00a661, 0x00a663, 0x00a663, 0x00a665, + 0x00a665, 0x00a667, 0x00a667, 0x00a669, 0x00a669, 0x00a66b, 0x00a66b, 0x00a66d, 0x00a66d, 0x00a681, 0x00a681, 0x00a683, 0x00a683, 0x00a685, 0x00a685, 0x00a687, 0x00a687, + 0x00a689, 0x00a689, 0x00a68b, 0x00a68b, 0x00a68d, 0x00a68d, 0x00a68f, 0x00a68f, 0x00a691, 0x00a691, 0x00a693, 0x00a693, 0x00a695, 0x00a695, 0x00a697, 0x00a697, 0x00a699, + 0x00a699, 0x00a69b, 0x00a69b, 0x00a723, 0x00a723, 0x00a725, 0x00a725, 0x00a727, 0x00a727, 0x00a729, 0x00a729, 0x00a72b, 0x00a72b, 0x00a72d, 0x00a72d, 0x00a72f, 0x00a731, + 0x00a733, 0x00a733, 0x00a735, 0x00a735, 0x00a737, 0x00a737, 0x00a739, 0x00a739, 0x00a73b, 0x00a73b, 0x00a73d, 0x00a73d, 0x00a73f, 0x00a73f, 0x00a741, 0x00a741, 0x00a743, + 0x00a743, 0x00a745, 0x00a745, 0x00a747, 0x00a747, 0x00a749, 0x00a749, 0x00a74b, 0x00a74b, 0x00a74d, 0x00a74d, 0x00a74f, 0x00a74f, 0x00a751, 0x00a751, 0x00a753, 0x00a753, + 0x00a755, 0x00a755, 0x00a757, 0x00a757, 0x00a759, 0x00a759, 0x00a75b, 0x00a75b, 0x00a75d, 0x00a75d, 0x00a75f, 0x00a75f, 0x00a761, 0x00a761, 0x00a763, 0x00a763, 0x00a765, + 0x00a765, 0x00a767, 0x00a767, 0x00a769, 0x00a769, 0x00a76b, 0x00a76b, 0x00a76d, 0x00a76d, 0x00a76f, 0x00a76f, 0x00a771, 0x00a778, 0x00a77a, 0x00a77a, 0x00a77c, 0x00a77c, + 0x00a77f, 0x00a77f, 0x00a781, 0x00a781, 0x00a783, 0x00a783, 0x00a785, 0x00a785, 0x00a787, 0x00a787, 0x00a78c, 0x00a78c, 0x00a78e, 0x00a78e, 0x00a791, 0x00a791, 0x00a793, + 0x00a795, 0x00a797, 0x00a797, 0x00a799, 0x00a799, 0x00a79b, 0x00a79b, 0x00a79d, 0x00a79d, 0x00a79f, 0x00a79f, 0x00a7a1, 0x00a7a1, 0x00a7a3, 0x00a7a3, 0x00a7a5, 0x00a7a5, + 0x00a7a7, 0x00a7a7, 0x00a7a9, 0x00a7a9, 0x00a7af, 0x00a7af, 0x00a7b5, 0x00a7b5, 0x00a7b7, 0x00a7b7, 0x00a7b9, 0x00a7b9, 0x00a7bb, 0x00a7bb, 0x00a7bd, 0x00a7bd, 0x00a7bf, + 0x00a7bf, 0x00a7c3, 0x00a7c3, 0x00a7fa, 0x00a7fa, 0x00ab30, 0x00ab5a, 0x00ab60, 0x00ab67, 0x00ab70, 0x00abbf, 0x00fb00, 0x00fb06, 0x00fb13, 0x00fb17, 0x00ff41, 0x00ff5a, + 0x010428, 0x01044f, 0x0104d8, 0x0104fb, 0x010cc0, 0x010cf2, 0x0118c0, 0x0118df, 0x016e60, 0x016e7f, 0x01d41a, 0x01d433, 0x01d44e, 0x01d454, 0x01d456, 0x01d467, 0x01d482, + 0x01d49b, 0x01d4b6, 0x01d4b9, 0x01d4bb, 0x01d4bb, 0x01d4bd, 0x01d4c3, 0x01d4c5, 0x01d4cf, 0x01d4ea, 0x01d503, 0x01d51e, 0x01d537, 0x01d552, 0x01d56b, 0x01d586, 0x01d59f, + 0x01d5ba, 0x01d5d3, 0x01d5ee, 0x01d607, 0x01d622, 0x01d63b, 0x01d656, 0x01d66f, 0x01d68a, 0x01d6a5, 0x01d6c2, 0x01d6da, 0x01d6dc, 0x01d6e1, 0x01d6fc, 0x01d714, 0x01d716, + 0x01d71b, 0x01d736, 0x01d74e, 0x01d750, 0x01d755, 0x01d770, 0x01d788, 0x01d78a, 0x01d78f, 0x01d7aa, 0x01d7c2, 0x01d7c4, 0x01d7c9, 0x01d7cb, 0x01d7cb, 0x01e922, 0x01e943)); + + POSIX_CHAR_CLASSES.put("print", CodePointSet.createNoDedup( + 0x000020, 0x00007e, 0x0000a0, 0x0000ac, 0x0000ae, 0x0005ff, 0x000606, 0x00061b, 0x00061d, 0x0006dc, 0x0006de, 0x00070e, 0x000710, 0x0008e1, 0x0008e3, 0x00180d, 0x00180f, + 0x00200a, 0x002010, 0x002029, 0x00202f, 0x00205f, 0x002065, 0x002065, 0x002070, 0x00d7ff, 0x00e000, 0x00fefe, 0x00ff00, 0x00fff8, 0x00fffc, 0x0110bc, 0x0110be, 0x0110cc, + 0x0110ce, 0x01342f, 0x013439, 0x01bc9f, 0x01bca4, 0x01d172, 0x01d17b, 0x0e0000, 0x0e0002, 0x0e001f, 0x0e0080, 0x10fffd)); + + POSIX_CHAR_CLASSES.put("punct", CodePointSet.createNoDedup( + 0x000021, 0x00002f, 0x00003a, 0x000040, 0x00005b, 0x000060, 0x00007b, 0x00007e, 0x0000a0, 0x0000a9, 0x0000ab, 0x0000ac, 0x0000ae, 0x0000b1, 0x0000b4, 0x0000b4, 0x0000b6, + 0x0000b8, 0x0000bb, 0x0000bb, 0x0000bf, 0x0000bf, 0x0000d7, 0x0000d7, 0x0000f7, 0x0000f7, 0x0002b9, 0x0002ba, 0x0002c2, 0x0002c6, 0x0002c8, 0x0002c8, 0x0002cc, 0x0002cd, + 0x0002cf, 0x0002d0, 0x0002d2, 0x0002d8, 0x0002da, 0x0002df, 0x0002e5, 0x0002eb, 0x0002ed, 0x0002ed, 0x0002ef, 0x0002ff, 0x000375, 0x000375, 0x00037e, 0x00037e, 0x000384, + 0x000385, 0x000387, 0x000387, 0x0003f6, 0x0003f6, 0x000482, 0x000482, 0x00055a, 0x00055f, 0x000589, 0x00058a, 0x00058d, 0x00058f, 0x0005b0, 0x0005b9, 0x0005bb, 0x0005c4, + 0x0005c6, 0x0005c6, 0x0005f3, 0x0005f4, 0x000606, 0x00060f, 0x00061b, 0x00061b, 0x00061e, 0x00061f, 0x00066a, 0x00066d, 0x0006d4, 0x0006d4, 0x0006de, 0x0006de, 0x0006e9, + 0x0006e9, 0x0006fd, 0x0006fe, 0x000700, 0x00070d, 0x0007f6, 0x0007f9, 0x0007fe, 0x0007ff, 0x000830, 0x00083e, 0x00085e, 0x00085e, 0x000901, 0x000903, 0x00093c, 0x00094d, + 0x000964, 0x000965, 0x000970, 0x000970, 0x0009f2, 0x0009f3, 0x0009fa, 0x0009fb, 0x0009fd, 0x0009fd, 0x000a76, 0x000a76, 0x000af0, 0x000af1, 0x000b70, 0x000b70, 0x000bf3, + 0x000bfa, 0x000c77, 0x000c77, 0x000c7f, 0x000c7f, 0x000c84, 0x000c84, 0x000d4f, 0x000d4f, 0x000d79, 0x000d79, 0x000df4, 0x000df4, 0x000e2f, 0x000e2f, 0x000e3f, 0x000e3f, + 0x000e46, 0x000e46, 0x000e4e, 0x000e4f, 0x000e5a, 0x000e5b, 0x000f01, 0x000f17, 0x000f1a, 0x000f1f, 0x000f34, 0x000f34, 0x000f36, 0x000f36, 0x000f38, 0x000f38, 0x000f3a, + 0x000f3d, 0x000f85, 0x000f85, 0x000fbe, 0x000fc5, 0x000fc7, 0x000fcc, 0x000fce, 0x000fda, 0x00104a, 0x00104f, 0x00109e, 0x00109f, 0x0010fb, 0x0010fb, 0x001360, 0x001368, + 0x001390, 0x001399, 0x001400, 0x001400, 0x00166d, 0x00166e, 0x00169b, 0x00169c, 0x0016eb, 0x0016ed, 0x001735, 0x001736, 0x0017d4, 0x0017d6, 0x0017d8, 0x0017db, 0x001800, + 0x00180a, 0x001940, 0x001940, 0x001944, 0x001945, 0x0019de, 0x0019ff, 0x001a1e, 0x001a1f, 0x001aa0, 0x001aa6, 0x001aa8, 0x001aad, 0x001b5a, 0x001b6a, 0x001b74, 0x001b7c, + 0x001bfc, 0x001bff, 0x001c3b, 0x001c3f, 0x001c7e, 0x001c7f, 0x001cc0, 0x001cc7, 0x001cd3, 0x001cd3, 0x001fbd, 0x001fbd, 0x001fbf, 0x001fc1, 0x001fcd, 0x001fcf, 0x001fdd, + 0x001fdf, 0x001fed, 0x001fef, 0x001ffd, 0x001ffe, 0x002010, 0x002014, 0x002016, 0x002027, 0x002030, 0x00205e, 0x00207a, 0x00207f, 0x00208a, 0x00208e, 0x0020a0, 0x0020bf, + 0x002100, 0x002101, 0x002103, 0x002106, 0x002108, 0x002109, 0x002114, 0x002114, 0x002116, 0x002118, 0x00211e, 0x002123, 0x002125, 0x002127, 0x002129, 0x002129, 0x00212e, + 0x00212e, 0x00213a, 0x00213b, 0x002140, 0x002144, 0x00214a, 0x00214d, 0x00214f, 0x00214f, 0x00218a, 0x00218b, 0x002190, 0x0023ff, 0x002422, 0x002426, 0x002440, 0x00244a, + 0x00249c, 0x0024e9, 0x002500, 0x002775, 0x002794, 0x002b73, 0x002b76, 0x002b95, 0x002b98, 0x002bff, 0x002ce5, 0x002cea, 0x002cf9, 0x002cfc, 0x002cfe, 0x002cff, 0x002d70, + 0x002d70, 0x002e00, 0x002e2e, 0x002e30, 0x002e4f, 0x002ff0, 0x002ffb, 0x003001, 0x003002, 0x003004, 0x003004, 0x003008, 0x00301b, 0x00301d, 0x003020, 0x003030, 0x003030, + 0x003036, 0x003037, 0x00303d, 0x00303f, 0x00309b, 0x00309c, 0x0030a0, 0x0030a0, 0x0030fb, 0x0030fb, 0x003190, 0x003191, 0x0031c0, 0x0031e3, 0x003200, 0x00321e, 0x00322a, + 0x003247, 0x003250, 0x003250, 0x003260, 0x00327f, 0x00328a, 0x0032b0, 0x0032c0, 0x0033ff, 0x004dc0, 0x004dff, 0x00a490, 0x00a4c6, 0x00a4fe, 0x00a4ff, 0x00a60d, 0x00a60f, + 0x00a673, 0x00a673, 0x00a67e, 0x00a67e, 0x00a6f2, 0x00a6f7, 0x00a700, 0x00a716, 0x00a720, 0x00a721, 0x00a789, 0x00a78a, 0x00a828, 0x00a82b, 0x00a836, 0x00a839, 0x00a874, + 0x00a877, 0x00a8ce, 0x00a8cf, 0x00a8f8, 0x00a8fa, 0x00a8fc, 0x00a8fc, 0x00a92e, 0x00a92f, 0x00a95f, 0x00a95f, 0x00a9c1, 0x00a9cd, 0x00a9de, 0x00a9df, 0x00aa5c, 0x00aa5f, + 0x00aa77, 0x00aa79, 0x00aade, 0x00aadf, 0x00aaf0, 0x00aaf1, 0x00ab5b, 0x00ab5b, 0x00abeb, 0x00abeb, 0x00fb1f, 0x00fb1f, 0x00fb29, 0x00fb29, 0x00fbb2, 0x00fbc1, 0x00fd3e, + 0x00fd3f, 0x00fdfc, 0x00fdfd, 0x00fe10, 0x00fe19, 0x00fe30, 0x00fe52, 0x00fe54, 0x00fe66, 0x00fe68, 0x00fe6b, 0x00ff01, 0x00ff0f, 0x00ff1a, 0x00ff20, 0x00ff3b, 0x00ff3e, + 0x00ff40, 0x00ff40, 0x00ff5b, 0x00ff65, 0x00ffe0, 0x00ffe6, 0x00ffe8, 0x00ffee, 0x00fffc, 0x00fffc, 0x010100, 0x010102, 0x010137, 0x01013f, 0x010179, 0x010189, 0x01018c, + 0x01018e, 0x010190, 0x01019b, 0x0101a0, 0x0101a0, 0x0101d0, 0x0101fc, 0x01039f, 0x01039f, 0x0103d0, 0x0103d0, 0x01056f, 0x01056f, 0x010857, 0x010857, 0x010877, 0x010878, + 0x01091f, 0x01091f, 0x01093f, 0x01093f, 0x010a50, 0x010a58, 0x010a7f, 0x010a7f, 0x010ac8, 0x010ac8, 0x010af0, 0x010af6, 0x010b39, 0x010b3f, 0x010b99, 0x010b9c, 0x010f55, + 0x010f59, 0x011047, 0x01104d, 0x0110bb, 0x0110bc, 0x0110be, 0x0110c1, 0x011140, 0x011143, 0x011174, 0x011175, 0x0111c5, 0x0111c8, 0x0111cd, 0x0111cd, 0x0111db, 0x0111db, + 0x0111dd, 0x0111df, 0x011238, 0x01123d, 0x0112a9, 0x0112a9, 0x01144b, 0x01144f, 0x01145b, 0x01145b, 0x01145d, 0x01145d, 0x0114c6, 0x0114c6, 0x0115c1, 0x0115d7, 0x011641, + 0x011643, 0x011660, 0x01166c, 0x01173c, 0x01173f, 0x01183b, 0x01183b, 0x0119e2, 0x0119e2, 0x011a3f, 0x011a46, 0x011a9a, 0x011a9c, 0x011a9e, 0x011aa2, 0x011c41, 0x011c45, + 0x011c70, 0x011c71, 0x011ef7, 0x011ef8, 0x011fd5, 0x011ff1, 0x011fff, 0x011fff, 0x012470, 0x012474, 0x016a6e, 0x016a6f, 0x016af5, 0x016af5, 0x016b37, 0x016b3f, 0x016b44, + 0x016b45, 0x016e97, 0x016e9a, 0x016fe2, 0x016fe2, 0x01bc9c, 0x01bc9c, 0x01bc9f, 0x01bc9f, 0x01d000, 0x01d0f5, 0x01d100, 0x01d126, 0x01d129, 0x01d164, 0x01d16a, 0x01d16c, + 0x01d183, 0x01d184, 0x01d18c, 0x01d1a9, 0x01d1ae, 0x01d1e8, 0x01d200, 0x01d241, 0x01d245, 0x01d245, 0x01d300, 0x01d356, 0x01d6c1, 0x01d6c1, 0x01d6db, 0x01d6db, 0x01d6fb, + 0x01d6fb, 0x01d715, 0x01d715, 0x01d735, 0x01d735, 0x01d74f, 0x01d74f, 0x01d76f, 0x01d76f, 0x01d789, 0x01d789, 0x01d7a9, 0x01d7a9, 0x01d7c3, 0x01d7c3, 0x01d800, 0x01d9ff, + 0x01da37, 0x01da3a, 0x01da6d, 0x01da74, 0x01da76, 0x01da83, 0x01da85, 0x01da8b, 0x01e14f, 0x01e14f, 0x01e2ff, 0x01e2ff, 0x01e95e, 0x01e95f, 0x01ecac, 0x01ecac, 0x01ecb0, + 0x01ecb0, 0x01ed2e, 0x01ed2e, 0x01eef0, 0x01eef1, 0x01f000, 0x01f02b, 0x01f030, 0x01f093, 0x01f0a0, 0x01f0ae, 0x01f0b1, 0x01f0bf, 0x01f0c1, 0x01f0cf, 0x01f0d1, 0x01f0f5, + 0x01f110, 0x01f16c, 0x01f170, 0x01f1ac, 0x01f1e6, 0x01f202, 0x01f210, 0x01f23b, 0x01f240, 0x01f248, 0x01f250, 0x01f251, 0x01f260, 0x01f265, 0x01f300, 0x01f6d5, 0x01f6e0, + 0x01f6ec, 0x01f6f0, 0x01f6fa, 0x01f700, 0x01f773, 0x01f780, 0x01f7d8, 0x01f7e0, 0x01f7eb, 0x01f800, 0x01f80b, 0x01f810, 0x01f847, 0x01f850, 0x01f859, 0x01f860, 0x01f887, + 0x01f890, 0x01f8ad, 0x01f900, 0x01f90b, 0x01f90d, 0x01f971, 0x01f973, 0x01f976, 0x01f97a, 0x01f9a2, 0x01f9a5, 0x01f9aa, 0x01f9ae, 0x01f9ca, 0x01f9cd, 0x01fa53, 0x01fa60, + 0x01fa6d, 0x01fa70, 0x01fa73, 0x01fa78, 0x01fa7a, 0x01fa80, 0x01fa82, 0x01fa90, 0x01fa95)); + + POSIX_CHAR_CLASSES.put("space", CodePointSet.createNoDedup( + 0x000009, 0x00000d, 0x000020, 0x000020, 0x001680, 0x001680, 0x002000, 0x00200a, 0x002028, 0x002029, 0x00202f, 0x00202f, 0x00205f, 0x00205f, 0x003000, 0x003000)); + + POSIX_CHAR_CLASSES.put("upper", CodePointSet.createNoDedup( + 0x000041, 0x00005a, 0x0000c0, 0x0000d6, 0x0000d8, 0x0000de, 0x000100, 0x000100, 0x000102, 0x000102, 0x000104, 0x000104, 0x000106, 0x000106, 0x000108, 0x000108, 0x00010a, + 0x00010a, 0x00010c, 0x00010c, 0x00010e, 0x00010e, 0x000110, 0x000110, 0x000112, 0x000112, 0x000114, 0x000114, 0x000116, 0x000116, 0x000118, 0x000118, 0x00011a, 0x00011a, + 0x00011c, 0x00011c, 0x00011e, 0x00011e, 0x000120, 0x000120, 0x000122, 0x000122, 0x000124, 0x000124, 0x000126, 0x000126, 0x000128, 0x000128, 0x00012a, 0x00012a, 0x00012c, + 0x00012c, 0x00012e, 0x00012e, 0x000130, 0x000130, 0x000132, 0x000132, 0x000134, 0x000134, 0x000136, 0x000136, 0x000139, 0x000139, 0x00013b, 0x00013b, 0x00013d, 0x00013d, + 0x00013f, 0x00013f, 0x000141, 0x000141, 0x000143, 0x000143, 0x000145, 0x000145, 0x000147, 0x000147, 0x00014a, 0x00014a, 0x00014c, 0x00014c, 0x00014e, 0x00014e, 0x000150, + 0x000150, 0x000152, 0x000152, 0x000154, 0x000154, 0x000156, 0x000156, 0x000158, 0x000158, 0x00015a, 0x00015a, 0x00015c, 0x00015c, 0x00015e, 0x00015e, 0x000160, 0x000160, + 0x000162, 0x000162, 0x000164, 0x000164, 0x000166, 0x000166, 0x000168, 0x000168, 0x00016a, 0x00016a, 0x00016c, 0x00016c, 0x00016e, 0x00016e, 0x000170, 0x000170, 0x000172, + 0x000172, 0x000174, 0x000174, 0x000176, 0x000176, 0x000178, 0x000179, 0x00017b, 0x00017b, 0x00017d, 0x00017d, 0x000181, 0x000182, 0x000184, 0x000184, 0x000186, 0x000187, + 0x000189, 0x00018b, 0x00018e, 0x000191, 0x000193, 0x000194, 0x000196, 0x000198, 0x00019c, 0x00019d, 0x00019f, 0x0001a0, 0x0001a2, 0x0001a2, 0x0001a4, 0x0001a4, 0x0001a6, + 0x0001a7, 0x0001a9, 0x0001a9, 0x0001ac, 0x0001ac, 0x0001ae, 0x0001af, 0x0001b1, 0x0001b3, 0x0001b5, 0x0001b5, 0x0001b7, 0x0001b8, 0x0001bc, 0x0001bc, 0x0001c4, 0x0001c5, + 0x0001c7, 0x0001c8, 0x0001ca, 0x0001cb, 0x0001cd, 0x0001cd, 0x0001cf, 0x0001cf, 0x0001d1, 0x0001d1, 0x0001d3, 0x0001d3, 0x0001d5, 0x0001d5, 0x0001d7, 0x0001d7, 0x0001d9, + 0x0001d9, 0x0001db, 0x0001db, 0x0001de, 0x0001de, 0x0001e0, 0x0001e0, 0x0001e2, 0x0001e2, 0x0001e4, 0x0001e4, 0x0001e6, 0x0001e6, 0x0001e8, 0x0001e8, 0x0001ea, 0x0001ea, + 0x0001ec, 0x0001ec, 0x0001ee, 0x0001ee, 0x0001f1, 0x0001f2, 0x0001f4, 0x0001f4, 0x0001f6, 0x0001f8, 0x0001fa, 0x0001fa, 0x0001fc, 0x0001fc, 0x0001fe, 0x0001fe, 0x000200, + 0x000200, 0x000202, 0x000202, 0x000204, 0x000204, 0x000206, 0x000206, 0x000208, 0x000208, 0x00020a, 0x00020a, 0x00020c, 0x00020c, 0x00020e, 0x00020e, 0x000210, 0x000210, + 0x000212, 0x000212, 0x000214, 0x000214, 0x000216, 0x000216, 0x000218, 0x000218, 0x00021a, 0x00021a, 0x00021c, 0x00021c, 0x00021e, 0x00021e, 0x000220, 0x000220, 0x000222, + 0x000222, 0x000224, 0x000224, 0x000226, 0x000226, 0x000228, 0x000228, 0x00022a, 0x00022a, 0x00022c, 0x00022c, 0x00022e, 0x00022e, 0x000230, 0x000230, 0x000232, 0x000232, + 0x00023a, 0x00023b, 0x00023d, 0x00023e, 0x000241, 0x000241, 0x000243, 0x000246, 0x000248, 0x000248, 0x00024a, 0x00024a, 0x00024c, 0x00024c, 0x00024e, 0x00024e, 0x000370, + 0x000370, 0x000372, 0x000372, 0x000376, 0x000376, 0x00037f, 0x00037f, 0x000386, 0x000386, 0x000388, 0x00038a, 0x00038c, 0x00038c, 0x00038e, 0x00038f, 0x000391, 0x0003a1, + 0x0003a3, 0x0003ab, 0x0003cf, 0x0003cf, 0x0003d2, 0x0003d4, 0x0003d8, 0x0003d8, 0x0003da, 0x0003da, 0x0003dc, 0x0003dc, 0x0003de, 0x0003de, 0x0003e0, 0x0003e0, 0x0003e2, + 0x0003e2, 0x0003e4, 0x0003e4, 0x0003e6, 0x0003e6, 0x0003e8, 0x0003e8, 0x0003ea, 0x0003ea, 0x0003ec, 0x0003ec, 0x0003ee, 0x0003ee, 0x0003f4, 0x0003f4, 0x0003f7, 0x0003f7, + 0x0003f9, 0x0003fa, 0x0003fd, 0x00042f, 0x000460, 0x000460, 0x000462, 0x000462, 0x000464, 0x000464, 0x000466, 0x000466, 0x000468, 0x000468, 0x00046a, 0x00046a, 0x00046c, + 0x00046c, 0x00046e, 0x00046e, 0x000470, 0x000470, 0x000472, 0x000472, 0x000474, 0x000474, 0x000476, 0x000476, 0x000478, 0x000478, 0x00047a, 0x00047a, 0x00047c, 0x00047c, + 0x00047e, 0x00047e, 0x000480, 0x000480, 0x00048a, 0x00048a, 0x00048c, 0x00048c, 0x00048e, 0x00048e, 0x000490, 0x000490, 0x000492, 0x000492, 0x000494, 0x000494, 0x000496, + 0x000496, 0x000498, 0x000498, 0x00049a, 0x00049a, 0x00049c, 0x00049c, 0x00049e, 0x00049e, 0x0004a0, 0x0004a0, 0x0004a2, 0x0004a2, 0x0004a4, 0x0004a4, 0x0004a6, 0x0004a6, + 0x0004a8, 0x0004a8, 0x0004aa, 0x0004aa, 0x0004ac, 0x0004ac, 0x0004ae, 0x0004ae, 0x0004b0, 0x0004b0, 0x0004b2, 0x0004b2, 0x0004b4, 0x0004b4, 0x0004b6, 0x0004b6, 0x0004b8, + 0x0004b8, 0x0004ba, 0x0004ba, 0x0004bc, 0x0004bc, 0x0004be, 0x0004be, 0x0004c0, 0x0004c1, 0x0004c3, 0x0004c3, 0x0004c5, 0x0004c5, 0x0004c7, 0x0004c7, 0x0004c9, 0x0004c9, + 0x0004cb, 0x0004cb, 0x0004cd, 0x0004cd, 0x0004d0, 0x0004d0, 0x0004d2, 0x0004d2, 0x0004d4, 0x0004d4, 0x0004d6, 0x0004d6, 0x0004d8, 0x0004d8, 0x0004da, 0x0004da, 0x0004dc, + 0x0004dc, 0x0004de, 0x0004de, 0x0004e0, 0x0004e0, 0x0004e2, 0x0004e2, 0x0004e4, 0x0004e4, 0x0004e6, 0x0004e6, 0x0004e8, 0x0004e8, 0x0004ea, 0x0004ea, 0x0004ec, 0x0004ec, + 0x0004ee, 0x0004ee, 0x0004f0, 0x0004f0, 0x0004f2, 0x0004f2, 0x0004f4, 0x0004f4, 0x0004f6, 0x0004f6, 0x0004f8, 0x0004f8, 0x0004fa, 0x0004fa, 0x0004fc, 0x0004fc, 0x0004fe, + 0x0004fe, 0x000500, 0x000500, 0x000502, 0x000502, 0x000504, 0x000504, 0x000506, 0x000506, 0x000508, 0x000508, 0x00050a, 0x00050a, 0x00050c, 0x00050c, 0x00050e, 0x00050e, + 0x000510, 0x000510, 0x000512, 0x000512, 0x000514, 0x000514, 0x000516, 0x000516, 0x000518, 0x000518, 0x00051a, 0x00051a, 0x00051c, 0x00051c, 0x00051e, 0x00051e, 0x000520, + 0x000520, 0x000522, 0x000522, 0x000524, 0x000524, 0x000526, 0x000526, 0x000528, 0x000528, 0x00052a, 0x00052a, 0x00052c, 0x00052c, 0x00052e, 0x00052e, 0x000531, 0x000556, + 0x0010a0, 0x0010c5, 0x0010c7, 0x0010c7, 0x0010cd, 0x0010cd, 0x0013a0, 0x0013f5, 0x001c90, 0x001cba, 0x001cbd, 0x001cbf, 0x001e00, 0x001e00, 0x001e02, 0x001e02, 0x001e04, + 0x001e04, 0x001e06, 0x001e06, 0x001e08, 0x001e08, 0x001e0a, 0x001e0a, 0x001e0c, 0x001e0c, 0x001e0e, 0x001e0e, 0x001e10, 0x001e10, 0x001e12, 0x001e12, 0x001e14, 0x001e14, + 0x001e16, 0x001e16, 0x001e18, 0x001e18, 0x001e1a, 0x001e1a, 0x001e1c, 0x001e1c, 0x001e1e, 0x001e1e, 0x001e20, 0x001e20, 0x001e22, 0x001e22, 0x001e24, 0x001e24, 0x001e26, + 0x001e26, 0x001e28, 0x001e28, 0x001e2a, 0x001e2a, 0x001e2c, 0x001e2c, 0x001e2e, 0x001e2e, 0x001e30, 0x001e30, 0x001e32, 0x001e32, 0x001e34, 0x001e34, 0x001e36, 0x001e36, + 0x001e38, 0x001e38, 0x001e3a, 0x001e3a, 0x001e3c, 0x001e3c, 0x001e3e, 0x001e3e, 0x001e40, 0x001e40, 0x001e42, 0x001e42, 0x001e44, 0x001e44, 0x001e46, 0x001e46, 0x001e48, + 0x001e48, 0x001e4a, 0x001e4a, 0x001e4c, 0x001e4c, 0x001e4e, 0x001e4e, 0x001e50, 0x001e50, 0x001e52, 0x001e52, 0x001e54, 0x001e54, 0x001e56, 0x001e56, 0x001e58, 0x001e58, + 0x001e5a, 0x001e5a, 0x001e5c, 0x001e5c, 0x001e5e, 0x001e5e, 0x001e60, 0x001e60, 0x001e62, 0x001e62, 0x001e64, 0x001e64, 0x001e66, 0x001e66, 0x001e68, 0x001e68, 0x001e6a, + 0x001e6a, 0x001e6c, 0x001e6c, 0x001e6e, 0x001e6e, 0x001e70, 0x001e70, 0x001e72, 0x001e72, 0x001e74, 0x001e74, 0x001e76, 0x001e76, 0x001e78, 0x001e78, 0x001e7a, 0x001e7a, + 0x001e7c, 0x001e7c, 0x001e7e, 0x001e7e, 0x001e80, 0x001e80, 0x001e82, 0x001e82, 0x001e84, 0x001e84, 0x001e86, 0x001e86, 0x001e88, 0x001e88, 0x001e8a, 0x001e8a, 0x001e8c, + 0x001e8c, 0x001e8e, 0x001e8e, 0x001e90, 0x001e90, 0x001e92, 0x001e92, 0x001e94, 0x001e94, 0x001e9e, 0x001e9e, 0x001ea0, 0x001ea0, 0x001ea2, 0x001ea2, 0x001ea4, 0x001ea4, + 0x001ea6, 0x001ea6, 0x001ea8, 0x001ea8, 0x001eaa, 0x001eaa, 0x001eac, 0x001eac, 0x001eae, 0x001eae, 0x001eb0, 0x001eb0, 0x001eb2, 0x001eb2, 0x001eb4, 0x001eb4, 0x001eb6, + 0x001eb6, 0x001eb8, 0x001eb8, 0x001eba, 0x001eba, 0x001ebc, 0x001ebc, 0x001ebe, 0x001ebe, 0x001ec0, 0x001ec0, 0x001ec2, 0x001ec2, 0x001ec4, 0x001ec4, 0x001ec6, 0x001ec6, + 0x001ec8, 0x001ec8, 0x001eca, 0x001eca, 0x001ecc, 0x001ecc, 0x001ece, 0x001ece, 0x001ed0, 0x001ed0, 0x001ed2, 0x001ed2, 0x001ed4, 0x001ed4, 0x001ed6, 0x001ed6, 0x001ed8, + 0x001ed8, 0x001eda, 0x001eda, 0x001edc, 0x001edc, 0x001ede, 0x001ede, 0x001ee0, 0x001ee0, 0x001ee2, 0x001ee2, 0x001ee4, 0x001ee4, 0x001ee6, 0x001ee6, 0x001ee8, 0x001ee8, + 0x001eea, 0x001eea, 0x001eec, 0x001eec, 0x001eee, 0x001eee, 0x001ef0, 0x001ef0, 0x001ef2, 0x001ef2, 0x001ef4, 0x001ef4, 0x001ef6, 0x001ef6, 0x001ef8, 0x001ef8, 0x001efa, + 0x001efa, 0x001efc, 0x001efc, 0x001efe, 0x001efe, 0x001f08, 0x001f0f, 0x001f18, 0x001f1d, 0x001f28, 0x001f2f, 0x001f38, 0x001f3f, 0x001f48, 0x001f4d, 0x001f59, 0x001f59, + 0x001f5b, 0x001f5b, 0x001f5d, 0x001f5d, 0x001f5f, 0x001f5f, 0x001f68, 0x001f6f, 0x001f88, 0x001f8f, 0x001f98, 0x001f9f, 0x001fa8, 0x001faf, 0x001fb8, 0x001fbc, 0x001fc8, + 0x001fcc, 0x001fd8, 0x001fdb, 0x001fe8, 0x001fec, 0x001ff8, 0x001ffc, 0x002102, 0x002102, 0x002107, 0x002107, 0x00210b, 0x00210d, 0x002110, 0x002112, 0x002115, 0x002115, + 0x002119, 0x00211d, 0x002124, 0x002124, 0x002126, 0x002126, 0x002128, 0x002128, 0x00212a, 0x00212d, 0x002130, 0x002133, 0x00213e, 0x00213f, 0x002145, 0x002145, 0x002160, + 0x00216f, 0x002183, 0x002183, 0x0024b6, 0x0024cf, 0x002c00, 0x002c2e, 0x002c60, 0x002c60, 0x002c62, 0x002c64, 0x002c67, 0x002c67, 0x002c69, 0x002c69, 0x002c6b, 0x002c6b, + 0x002c6d, 0x002c70, 0x002c72, 0x002c72, 0x002c75, 0x002c75, 0x002c7e, 0x002c80, 0x002c82, 0x002c82, 0x002c84, 0x002c84, 0x002c86, 0x002c86, 0x002c88, 0x002c88, 0x002c8a, + 0x002c8a, 0x002c8c, 0x002c8c, 0x002c8e, 0x002c8e, 0x002c90, 0x002c90, 0x002c92, 0x002c92, 0x002c94, 0x002c94, 0x002c96, 0x002c96, 0x002c98, 0x002c98, 0x002c9a, 0x002c9a, + 0x002c9c, 0x002c9c, 0x002c9e, 0x002c9e, 0x002ca0, 0x002ca0, 0x002ca2, 0x002ca2, 0x002ca4, 0x002ca4, 0x002ca6, 0x002ca6, 0x002ca8, 0x002ca8, 0x002caa, 0x002caa, 0x002cac, + 0x002cac, 0x002cae, 0x002cae, 0x002cb0, 0x002cb0, 0x002cb2, 0x002cb2, 0x002cb4, 0x002cb4, 0x002cb6, 0x002cb6, 0x002cb8, 0x002cb8, 0x002cba, 0x002cba, 0x002cbc, 0x002cbc, + 0x002cbe, 0x002cbe, 0x002cc0, 0x002cc0, 0x002cc2, 0x002cc2, 0x002cc4, 0x002cc4, 0x002cc6, 0x002cc6, 0x002cc8, 0x002cc8, 0x002cca, 0x002cca, 0x002ccc, 0x002ccc, 0x002cce, + 0x002cce, 0x002cd0, 0x002cd0, 0x002cd2, 0x002cd2, 0x002cd4, 0x002cd4, 0x002cd6, 0x002cd6, 0x002cd8, 0x002cd8, 0x002cda, 0x002cda, 0x002cdc, 0x002cdc, 0x002cde, 0x002cde, + 0x002ce0, 0x002ce0, 0x002ce2, 0x002ce2, 0x002ceb, 0x002ceb, 0x002ced, 0x002ced, 0x002cf2, 0x002cf2, 0x00a640, 0x00a640, 0x00a642, 0x00a642, 0x00a644, 0x00a644, 0x00a646, + 0x00a646, 0x00a648, 0x00a648, 0x00a64a, 0x00a64a, 0x00a64c, 0x00a64c, 0x00a64e, 0x00a64e, 0x00a650, 0x00a650, 0x00a652, 0x00a652, 0x00a654, 0x00a654, 0x00a656, 0x00a656, + 0x00a658, 0x00a658, 0x00a65a, 0x00a65a, 0x00a65c, 0x00a65c, 0x00a65e, 0x00a65e, 0x00a660, 0x00a660, 0x00a662, 0x00a662, 0x00a664, 0x00a664, 0x00a666, 0x00a666, 0x00a668, + 0x00a668, 0x00a66a, 0x00a66a, 0x00a66c, 0x00a66c, 0x00a680, 0x00a680, 0x00a682, 0x00a682, 0x00a684, 0x00a684, 0x00a686, 0x00a686, 0x00a688, 0x00a688, 0x00a68a, 0x00a68a, + 0x00a68c, 0x00a68c, 0x00a68e, 0x00a68e, 0x00a690, 0x00a690, 0x00a692, 0x00a692, 0x00a694, 0x00a694, 0x00a696, 0x00a696, 0x00a698, 0x00a698, 0x00a69a, 0x00a69a, 0x00a722, + 0x00a722, 0x00a724, 0x00a724, 0x00a726, 0x00a726, 0x00a728, 0x00a728, 0x00a72a, 0x00a72a, 0x00a72c, 0x00a72c, 0x00a72e, 0x00a72e, 0x00a732, 0x00a732, 0x00a734, 0x00a734, + 0x00a736, 0x00a736, 0x00a738, 0x00a738, 0x00a73a, 0x00a73a, 0x00a73c, 0x00a73c, 0x00a73e, 0x00a73e, 0x00a740, 0x00a740, 0x00a742, 0x00a742, 0x00a744, 0x00a744, 0x00a746, + 0x00a746, 0x00a748, 0x00a748, 0x00a74a, 0x00a74a, 0x00a74c, 0x00a74c, 0x00a74e, 0x00a74e, 0x00a750, 0x00a750, 0x00a752, 0x00a752, 0x00a754, 0x00a754, 0x00a756, 0x00a756, + 0x00a758, 0x00a758, 0x00a75a, 0x00a75a, 0x00a75c, 0x00a75c, 0x00a75e, 0x00a75e, 0x00a760, 0x00a760, 0x00a762, 0x00a762, 0x00a764, 0x00a764, 0x00a766, 0x00a766, 0x00a768, + 0x00a768, 0x00a76a, 0x00a76a, 0x00a76c, 0x00a76c, 0x00a76e, 0x00a76e, 0x00a779, 0x00a779, 0x00a77b, 0x00a77b, 0x00a77d, 0x00a77e, 0x00a780, 0x00a780, 0x00a782, 0x00a782, + 0x00a784, 0x00a784, 0x00a786, 0x00a786, 0x00a78b, 0x00a78b, 0x00a78d, 0x00a78d, 0x00a790, 0x00a790, 0x00a792, 0x00a792, 0x00a796, 0x00a796, 0x00a798, 0x00a798, 0x00a79a, + 0x00a79a, 0x00a79c, 0x00a79c, 0x00a79e, 0x00a79e, 0x00a7a0, 0x00a7a0, 0x00a7a2, 0x00a7a2, 0x00a7a4, 0x00a7a4, 0x00a7a6, 0x00a7a6, 0x00a7a8, 0x00a7a8, 0x00a7aa, 0x00a7ae, + 0x00a7b0, 0x00a7b4, 0x00a7b6, 0x00a7b6, 0x00a7b8, 0x00a7b8, 0x00a7ba, 0x00a7ba, 0x00a7bc, 0x00a7bc, 0x00a7be, 0x00a7be, 0x00a7c2, 0x00a7c2, 0x00a7c4, 0x00a7c6, 0x00ff21, + 0x00ff3a, 0x010400, 0x010427, 0x0104b0, 0x0104d3, 0x010c80, 0x010cb2, 0x0118a0, 0x0118bf, 0x016e40, 0x016e5f, 0x01d400, 0x01d419, 0x01d434, 0x01d44d, 0x01d468, 0x01d481, + 0x01d49c, 0x01d49c, 0x01d49e, 0x01d49f, 0x01d4a2, 0x01d4a2, 0x01d4a5, 0x01d4a6, 0x01d4a9, 0x01d4ac, 0x01d4ae, 0x01d4b5, 0x01d4d0, 0x01d4e9, 0x01d504, 0x01d505, 0x01d507, + 0x01d50a, 0x01d50d, 0x01d514, 0x01d516, 0x01d51c, 0x01d538, 0x01d539, 0x01d53b, 0x01d53e, 0x01d540, 0x01d544, 0x01d546, 0x01d546, 0x01d54a, 0x01d550, 0x01d56c, 0x01d585, + 0x01d5a0, 0x01d5b9, 0x01d5d4, 0x01d5ed, 0x01d608, 0x01d621, 0x01d63c, 0x01d655, 0x01d670, 0x01d689, 0x01d6a8, 0x01d6c0, 0x01d6e2, 0x01d6fa, 0x01d71c, 0x01d734, 0x01d756, + 0x01d76e, 0x01d790, 0x01d7a8, 0x01d7ca, 0x01d7ca, 0x01e900, 0x01e921)); + + POSIX_CHAR_CLASSES.put("xdigit", CodePointSet.createNoDedup( + 0x000030, 0x000039, 0x000041, 0x000046, 0x000061, 0x000066, 0x00ff10, 0x00ff19, 0x00ff21, 0x00ff26, 0x00ff41, 0x00ff46)); + + /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */ + + POSIX_CHAR_CLASSES.put("alnum", POSIX_CHAR_CLASSES.get("alpha").union(POSIX_CHAR_CLASSES.get("digit"))); + WORD_CHARACTERS = POSIX_CHAR_CLASSES.get("alnum").union(CodePointSet.create('_')); + } +} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java index 1957b5230f65..887a68208c91 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBFlavor.java @@ -42,11 +42,11 @@ import java.util.function.BiPredicate; -import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; - import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; @@ -59,7 +59,8 @@ public final class OracleDBFlavor extends RegexFlavor { public static final OracleDBFlavor INSTANCE = new OracleDBFlavor(); private OracleDBFlavor() { - super(BACKREFERENCES_TO_UNMATCHED_GROUPS_FAIL | NESTED_CAPTURE_GROUPS_KEPT_ON_LOOP_REENTRY | SUPPORTS_RECURSIVE_BACKREFERENCES); + super(EMPTY_CHECKS_MONITOR_CAPTURE_GROUPS | FAILING_EMPTY_CHECKS_DONT_BACKTRACK | BACKREFERENCES_TO_UNMATCHED_GROUPS_FAIL | NESTED_CAPTURE_GROUPS_KEPT_ON_LOOP_REENTRY | + SUPPORTS_RECURSIVE_BACKREFERENCES); } @Override @@ -74,10 +75,10 @@ public RegexParser createParser(RegexLanguage language, RegexSource source, Comp @Override public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) { - return OracleDBFlavor::equalsIgnoreCaseUnicode; + return OracleDBFlavor::equalsIgnoreCase; } - private static boolean equalsIgnoreCaseUnicode(int codePointA, int codePointB) { - return UCharacter.toLowerCase(codePointA) == UCharacter.toLowerCase(codePointB); + private static boolean equalsIgnoreCase(int codePointA, int codePointB) { + return MultiCharacterCaseFolding.equalsIgnoreCase(CaseFoldData.CaseFoldAlgorithm.OracleDB, codePointA, codePointB); } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java index 2402c75086f9..dfe1d5001db6 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexLexer.java @@ -40,7 +40,8 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; -import org.graalvm.collections.EconomicMap; +import static com.oracle.truffle.regex.tregex.parser.flavors.OracleDBConstants.POSIX_CHAR_CLASSES; +import static com.oracle.truffle.regex.tregex.parser.flavors.OracleDBConstants.WORD_CHARACTERS; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.regex.RegexSource; @@ -49,10 +50,9 @@ import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.charset.Constants; -import com.oracle.truffle.regex.charset.UnicodeProperties; import com.oracle.truffle.regex.errors.OracleDBErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.parser.RegexLexer; import com.oracle.truffle.regex.tregex.parser.Token; import com.oracle.truffle.regex.tregex.string.Encodings; @@ -60,38 +60,7 @@ import com.oracle.truffle.regex.util.TBitSet; public final class OracleDBRegexLexer extends RegexLexer { - - // This map contains the character sets of POSIX character classes like [[:alpha:]] and - // [[:punct:]]. - private static final EconomicMap UNICODE_POSIX_CHAR_CLASSES; private static final CodePointSet EMPTY_POSIX_CHAR_CLASS = CodePointSet.create(':', ':', '[', '[', ']', ']'); - - static { - CodePointSet alpha = UnicodeProperties.getProperty("Alphabetic"); - CodePointSet digit = UnicodeProperties.getProperty("General_Category=Decimal_Number"); - CodePointSet space = UnicodeProperties.getProperty("White_Space"); - CodePointSet xdigit = CodePointSet.create('0', '9', 'A', 'F', 'a', 'f'); - - UNICODE_POSIX_CHAR_CLASSES = EconomicMap.create(12); - CompilationBuffer buffer = new CompilationBuffer(Encodings.UTF_32); - - CodePointSet blank = UnicodeProperties.getProperty("General_Category=Space_Separator").union(CodePointSet.create('\t', '\t')); - CodePointSet cntrl = UnicodeProperties.getProperty("General_Category=Control"); - CodePointSet graph = space.union(UnicodeProperties.getProperty("General_Category=Control")).union(UnicodeProperties.getProperty("General_Category=Surrogate")).union( - UnicodeProperties.getProperty("General_Category=Unassigned")).createInverse(Encodings.UTF_32); - UNICODE_POSIX_CHAR_CLASSES.put("alpha", alpha); - UNICODE_POSIX_CHAR_CLASSES.put("alnum", alpha.union(digit)); - UNICODE_POSIX_CHAR_CLASSES.put("blank", blank); - UNICODE_POSIX_CHAR_CLASSES.put("cntrl", cntrl); - UNICODE_POSIX_CHAR_CLASSES.put("digit", digit); - UNICODE_POSIX_CHAR_CLASSES.put("graph", graph); - UNICODE_POSIX_CHAR_CLASSES.put("lower", UnicodeProperties.getProperty("Lowercase")); - UNICODE_POSIX_CHAR_CLASSES.put("print", graph.union(blank).subtract(cntrl, buffer)); - UNICODE_POSIX_CHAR_CLASSES.put("punct", UnicodeProperties.getProperty("General_Category=Punctuation").union(UnicodeProperties.getProperty("General_Category=Symbol").subtract(alpha, buffer))); - UNICODE_POSIX_CHAR_CLASSES.put("space", space); - UNICODE_POSIX_CHAR_CLASSES.put("upper", UnicodeProperties.getProperty("Uppercase")); - UNICODE_POSIX_CHAR_CLASSES.put("xdigit", xdigit); - } private static final TBitSet WHITESPACE = TBitSet.valueOf('\n', ' '); private final OracleDBFlags flags; private final CodePointSetAccumulator caseFoldTmp = new CodePointSetAccumulator(); @@ -114,7 +83,7 @@ public boolean hasNext() { @Override protected boolean featureEnabledIgnoreCase() { - return flags.isIgnoreCase(); + return false; } @Override @@ -158,7 +127,7 @@ protected CodePointSet getPOSIXCharClass(String name) { // oracledb quirk: [::] inside a character class is treated as [:] instead of re-parsing return EMPTY_POSIX_CHAR_CLASS; } - CodePointSet cps = UNICODE_POSIX_CHAR_CLASSES.get(name); + CodePointSet cps = POSIX_CHAR_CLASSES.get(name); if (cps != null) { return cps; } @@ -224,7 +193,7 @@ protected boolean featureEnabledClassSetExpressions() { @Override protected void caseFoldUnfold(CodePointSetAccumulator charClass) { - CaseFoldTable.applyCaseFoldUnfold(charClass, caseFoldTmp, CaseFoldTable.CaseFoldingAlgorithm.ECMAScriptUnicode); + CaseFoldData.applyCaseFoldUnfold(charClass, caseFoldTmp, CaseFoldData.CaseFoldUnfoldAlgorithm.ECMAScriptUnicode); } @Override @@ -257,10 +226,16 @@ protected int getMaxBackReferenceDigits() { return 1; } + @Override + protected boolean isPredefCharClass(char c) { + // OracleDB ignores \s \d \w inside character classes, and interprets them as literal + // characters instead + return !inCharacterClass() && PREDEFINED_CHAR_CLASSES.get(c); + } + @Override protected CodePointSet getPredefinedCharClass(char c) { - assert UNICODE_POSIX_CHAR_CLASSES.containsKey(getPOSIXCharClassName(c)); - CodePointSet cps = UNICODE_POSIX_CHAR_CLASSES.get(getPOSIXCharClassName(c)); + CodePointSet cps = getPOSIXCharClass(c); if (isLowerCase(c)) { return cps; } else { @@ -276,14 +251,14 @@ private static boolean isLowerCase(char c) { return (c & 0x20) != 0; } - private static String getPOSIXCharClassName(char c) { + private static CodePointSet getPOSIXCharClass(char c) { switch (toLowerCase(c)) { case 's': - return "space"; + return POSIX_CHAR_CLASSES.get("space"); case 'd': - return "digit"; + return POSIX_CHAR_CLASSES.get("digit"); case 'w': - return "alnum"; + return WORD_CHARACTERS; default: throw CompilerDirectives.shouldNotReachHere(); } @@ -303,7 +278,7 @@ protected RegexSyntaxException handleBoundedQuantifierOutOfOrder() { protected Token handleBoundedQuantifierSyntaxError() throws RegexSyntaxException { // invalid bounded quantifiers are treated as string literals position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } @Override @@ -311,7 +286,7 @@ protected Token handleBoundedQuantifierOverflow(long min, long max) { if (min == -1 || max == -1) { // bounded quantifiers outside uint32 range are treated as string literals position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } if (Long.compareUnsigned(min, max) > 0) { throw handleBoundedQuantifierOutOfOrder(); @@ -326,7 +301,7 @@ protected Token handleBoundedQuantifierOverflowMin(long min, long max) { if (min == -1) { // bounded quantifiers outside uint32 range are treated as string literals position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } // oracledb quirk: values between 0x7fff_ffff and 0xffff_ffff are treated as uint32 in the // quantifier order check, but are later "cast" to int32 by stripping the sign bit. diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java index 3193adae5015..86f36981a785 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java @@ -40,6 +40,10 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; +import java.util.List; + +import org.graalvm.collections.Pair; + import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.regex.AbstractRegexObject; @@ -48,13 +52,18 @@ import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.errors.OracleDBErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; +import com.oracle.truffle.regex.tregex.buffer.IntArrayBuffer; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.Token; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; import com.oracle.truffle.regex.tregex.parser.ast.RegexASTRootNode; +import com.oracle.truffle.regex.tregex.string.Encodings; public final class OracleDBRegexParser implements RegexParser { @@ -62,6 +71,9 @@ public final class OracleDBRegexParser implements RegexParser { private final OracleDBFlags flags; private final OracleDBRegexLexer lexer; private final RegexASTBuilder astBuilder; + private CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); + private CodePointSetAccumulator curCharClassPosixEquivalenceClasses = new CodePointSetAccumulator(); + private CodePointSetAccumulator charClassTmp = new CodePointSetAccumulator(); @TruffleBoundary public OracleDBRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException { @@ -91,12 +103,26 @@ public AbstractRegexObject getNamedCaptureGroups() { @Override @TruffleBoundary public RegexAST parse() throws RegexSyntaxException { + IntArrayBuffer literalStringBuffer = new IntArrayBuffer(); astBuilder.pushRootGroup(); Token token = null; Token.Kind prevKind; while (lexer.hasNext()) { prevKind = token == null ? null : token.kind; token = lexer.next(); + if (token.kind != Token.Kind.literalChar && !literalStringBuffer.isEmpty()) { + int last = -1; + if (token.kind == Token.Kind.quantifier) { + last = literalStringBuffer.get(literalStringBuffer.length() - 1); + literalStringBuffer.setLength(literalStringBuffer.length() - 1); + } + addLiteralString(literalStringBuffer); + if (last >= 0) { + assert literalStringBuffer.isEmpty(); + literalStringBuffer.add(last); + addLiteralString(literalStringBuffer); + } + } switch (token.kind) { case A, z: astBuilder.addPositionAssertion(token); @@ -143,13 +169,31 @@ public RegexAST parse() throws RegexSyntaxException { // quantifiers without target are ignored break; } - astBuilder.addQuantifier((Token.Quantifier) token); + Token.Quantifier quantifier = (Token.Quantifier) token; + if (astBuilder.getCurTerm().isQuantifiableTerm() && astBuilder.getCurTerm().asQuantifiableTerm().hasQuantifier()) { + Token.Quantifier existingQuantifier = astBuilder.getCurTerm().asQuantifiableTerm().getQuantifier(); + if (existingQuantifier.getMin() > 1) { + astBuilder.wrapCurTermInGroup(); + } else { + astBuilder.addQuantifier(Token.createQuantifier( + Math.max(quantifier.getMin(), existingQuantifier.getMin()), + (int) Math.max(Integer.toUnsignedLong(quantifier.getMax()), Integer.toUnsignedLong(existingQuantifier.getMax())), + quantifier.isGreedy() && existingQuantifier.isGreedy())); + break; + } + } + astBuilder.addQuantifier(quantifier); break; case alternation: astBuilder.nextSequence(); break; case captureGroupBegin: - astBuilder.pushCaptureGroup(token); + if (lexer.numberOfCaptureGroupsSoFar() <= 10) { + // oracledb only tracks capture groups 0 - 9 + astBuilder.pushCaptureGroup(token); + } else { + astBuilder.pushGroup(token); + } break; case groupEnd: if (astBuilder.getCurGroup().getParent() instanceof RegexASTRootNode) { @@ -157,9 +201,27 @@ public RegexAST parse() throws RegexSyntaxException { } astBuilder.popGroup(token); break; + case literalChar: + literalStringBuffer.add(((Token.LiteralCharacter) token).getCodePoint()); + break; case charClass: astBuilder.addCharClass((Token.CharacterClass) token); break; + case charClassBegin: + curCharClass.clear(); + curCharClassPosixEquivalenceClasses.clear(); + break; + case charClassAtom: + CodePointSet contents = ((Token.CharacterClassAtom) token).getContents(); + if (((Token.CharacterClassAtom) token).isPosixCollationEquivalenceClass()) { + curCharClassPosixEquivalenceClasses.addSet(contents); + } else { + curCharClass.addSet(contents); + } + break; + case charClassEnd: + addCharClass(); + break; default: throw CompilerDirectives.shouldNotReachHere(); } @@ -167,9 +229,71 @@ public RegexAST parse() throws RegexSyntaxException { if (!astBuilder.curGroupIsRoot()) { throw syntaxError(OracleDBErrorMessages.UNTERMINATED_GROUP); } + if (!literalStringBuffer.isEmpty()) { + addLiteralString(literalStringBuffer); + } return astBuilder.popRootGroup(); } + private void addCharClass() { + boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar() && curCharClassPosixEquivalenceClasses.isEmpty(); + if (flags.isIgnoreCase()) { + MultiCharacterCaseFolding.caseClosure(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClass, charClassTmp, (a, b) -> true, Encodings.UTF_8.getFullSet()); + } + MultiCharacterCaseFolding.caseClosure(CaseFoldData.CaseFoldAlgorithm.OracleDBAI, curCharClassPosixEquivalenceClasses, charClassTmp, (a, b) -> true, Encodings.UTF_8.getFullSet()); + curCharClass.addSet(curCharClassPosixEquivalenceClasses.get()); + if (lexer.isCurCharClassInverted()) { + curCharClass.invert(Encodings.UTF_8); + } + if (flags.isIgnoreCase()) { + List> multiCodePointExpansions = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDB, curCharClass); + List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDBAI, + curCharClassPosixEquivalenceClasses); + if (!multiCodePointExpansions.isEmpty() || !multiCodePointExpansionsPEC.isEmpty()) { + astBuilder.pushGroup(); + astBuilder.addCharClass(curCharClass.toCodePointSet()); + addMultiCodePointExpansions(multiCodePointExpansions, CaseFoldData.CaseFoldAlgorithm.OracleDB); + addMultiCodePointExpansions(multiCodePointExpansionsPEC, CaseFoldData.CaseFoldAlgorithm.OracleDBAI); + astBuilder.popGroup(); + } else { + astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); + } + } else if (!curCharClassPosixEquivalenceClasses.isEmpty()) { + List> multiCodePointExpansionsPEC = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.OracleDBAI, + curCharClassPosixEquivalenceClasses); + if (!multiCodePointExpansionsPEC.isEmpty()) { + astBuilder.pushGroup(); + astBuilder.addCharClass(curCharClass.toCodePointSet()); + addMultiCodePointExpansions(multiCodePointExpansionsPEC, CaseFoldData.CaseFoldAlgorithm.OracleDBAI); + astBuilder.popGroup(); + } else { + astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); + } + } else { + astBuilder.addCharClass(curCharClass.toCodePointSet(), wasSingleChar); + } + } + + private void addMultiCodePointExpansions(List> multiCodePointExpansions, CaseFoldData.CaseFoldAlgorithm algorithm) { + for (Pair pair : multiCodePointExpansions) { + astBuilder.nextSequence(); + int[] to = pair.getRight(); + boolean dropAsciiOnStart = false; + MultiCharacterCaseFolding.caseFoldUnfoldString(algorithm, to, Encodings.UTF_8.getFullSet(), dropAsciiOnStart, astBuilder); + } + } + + private void addLiteralString(IntArrayBuffer literalStringBuffer) { + if (flags.isIgnoreCase()) { + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.OracleDB, literalStringBuffer.toArray(), Encodings.UTF_8.getFullSet(), astBuilder); + } else { + for (int i = 0; i < literalStringBuffer.length(); i++) { + astBuilder.addCharClass(CodePointSet.create(literalStringBuffer.get(i)), true); + } + } + literalStringBuffer.clear(); + } + private RegexSyntaxException syntaxError(String msg) { return RegexSyntaxException.createPattern(source, msg, lexer.getLastTokenPosition()); } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java index 0fabd94d2625..de3b43e1316d 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlavor.java @@ -42,12 +42,12 @@ import java.util.function.BiPredicate; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; @@ -84,7 +84,7 @@ public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) return PythonFlavor::equalsIgnoreCaseUnicode; } else { assert ast.getOptions().getEncoding() == Encodings.LATIN_1; - return CaseFoldTable.CaseFoldingAlgorithm.PythonAscii.getEqualsPredicate(); + return CaseFoldData.CaseFoldUnfoldAlgorithm.PythonAscii.getEqualsPredicate(); } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java index 1550869421e4..a00c86e0f79e 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexLexer.java @@ -46,6 +46,7 @@ import java.util.Locale; import java.util.Map; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; import com.oracle.truffle.api.CompilerDirectives; @@ -59,7 +60,6 @@ import com.oracle.truffle.regex.charset.UnicodeProperties; import com.oracle.truffle.regex.errors.PyErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; -import com.oracle.truffle.regex.tregex.parser.CaseFoldTable; import com.oracle.truffle.regex.charset.ClassSetContents; import com.oracle.truffle.regex.tregex.parser.RegexLexer; import com.oracle.truffle.regex.tregex.parser.Token; @@ -414,8 +414,8 @@ protected void caseFoldUnfold(CodePointSetAccumulator charClass) { if (getLocalFlags().isLocale()) { getLocaleData().caseFoldUnfold(charClass, caseFoldTmp); } else { - CaseFoldTable.CaseFoldingAlgorithm caseFolding = getLocalFlags().isUnicode(mode) ? CaseFoldTable.CaseFoldingAlgorithm.PythonUnicode : CaseFoldTable.CaseFoldingAlgorithm.PythonAscii; - CaseFoldTable.applyCaseFoldUnfold(charClass, caseFoldTmp, caseFolding); + CaseFoldData.CaseFoldUnfoldAlgorithm caseFolding = getLocalFlags().isUnicode(mode) ? CaseFoldData.CaseFoldUnfoldAlgorithm.PythonUnicode : CaseFoldData.CaseFoldUnfoldAlgorithm.PythonAscii; + CaseFoldData.applyCaseFoldUnfold(charClass, caseFoldTmp, caseFolding); } } @@ -487,7 +487,7 @@ protected RegexSyntaxException handleBoundedQuantifierOutOfOrder() { @Override protected Token handleBoundedQuantifierSyntaxError() throws RegexSyntaxException { position = getLastTokenPosition() + 1; - return charClass('{'); + return literalChar('{'); } @Override @@ -614,7 +614,7 @@ protected void handleUnmatchedRightBrace() { @Override protected RegexSyntaxException handleUnmatchedLeftBracket() { - return syntaxErrorAtAbs(PyErrorMessages.UNTERMINATED_CHARACTER_SET, getLastTokenPosition()); + return syntaxErrorAtAbs(PyErrorMessages.UNTERMINATED_CHARACTER_SET, getLastCharacterClassBeginPosition()); } @Override @@ -635,7 +635,7 @@ protected Token parseCustomEscape(char c) { if (codePoint > 0xff) { handleOctalOutOfRange(); } - return charClass(codePoint); + return literalChar(codePoint); } return null; } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java index 7f82bab72c96..ce905237f6dd 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonRegexParser.java @@ -51,6 +51,7 @@ import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.RegexSyntaxException; import com.oracle.truffle.regex.charset.CodePointSet; +import com.oracle.truffle.regex.charset.CodePointSetAccumulator; import com.oracle.truffle.regex.charset.Constants; import com.oracle.truffle.regex.errors.PyErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; @@ -65,7 +66,7 @@ public final class PythonRegexParser implements RegexParser { - private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.charClass, Token.Kind.groupEnd, Token.Kind.backReference); + private static final EnumSet QUANTIFIER_PREV = EnumSet.of(Token.Kind.literalChar, Token.Kind.charClass, Token.Kind.charClassEnd, Token.Kind.groupEnd, Token.Kind.backReference); /** * Indicates whether the regex being parsed is a 'str' pattern or a 'bytes' pattern. @@ -73,6 +74,7 @@ public final class PythonRegexParser implements RegexParser { private final PythonREMode mode; private final PythonRegexLexer lexer; private final RegexASTBuilder astBuilder; + private final CodePointSetAccumulator curCharClass = new CodePointSetAccumulator(); public PythonRegexParser(RegexLanguage language, RegexSource source, CompilationBuffer compilationBuffer) throws RegexSyntaxException { this.mode = PythonREMode.fromEncoding(source.getEncoding()); @@ -228,9 +230,26 @@ public RegexAST parse() throws RegexSyntaxException { } astBuilder.popGroup(token); break; + case literalChar: + literalChar(((Token.LiteralCharacter) token).getCodePoint()); + break; case charClass: astBuilder.addCharClass((Token.CharacterClass) token); break; + case charClassBegin: + curCharClass.clear(); + break; + case charClassAtom: + curCharClass.addSet(((Token.CharacterClassAtom) token).getContents()); + break; + case charClassEnd: + boolean wasSingleChar = !lexer.isCurCharClassInverted() && curCharClass.matchesSingleChar(); + if (lexer.featureEnabledIgnoreCase()) { + lexer.caseFoldUnfold(curCharClass); + } + CodePointSet cps = curCharClass.toCodePointSet(); + astBuilder.addCharClass(lexer.isCurCharClassInverted() ? cps.createInverse(lexer.source.getEncoding()) : cps, wasSingleChar); + break; case conditionalBackreference: Token.BackReference conditionalBackRefToken = (Token.BackReference) token; verifyGroupReference(conditionalBackRefToken); @@ -263,6 +282,17 @@ public RegexAST parse() throws RegexSyntaxException { return ast; } + private void literalChar(int codePoint) { + if (lexer.featureEnabledIgnoreCase()) { + curCharClass.clear(); + curCharClass.addCodePoint(codePoint); + lexer.caseFoldUnfold(curCharClass); + astBuilder.addCharClass(curCharClass.toCodePointSet(), true); + } else { + astBuilder.addCharClass(CodePointSet.create(codePoint)); + } + } + /** * Verifies that making a back-reference to a certain group is legal in the current context. * diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java deleted file mode 100644 index 4efcb62e609d..000000000000 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java +++ /dev/null @@ -1,1584 +0,0 @@ -/* - * Copyright (c) 2021, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.truffle.regex.tregex.parser.flavors; - -import java.util.SortedMap; -import java.util.TreeMap; - -public class RubyCaseFoldingData { - - public static final SortedMap CASE_FOLD; - - static { - CASE_FOLD = new TreeMap<>(); - - CASE_FOLD.put(0x0041, new int[]{0x0061}); - CASE_FOLD.put(0x0042, new int[]{0x0062}); - CASE_FOLD.put(0x0043, new int[]{0x0063}); - CASE_FOLD.put(0x0044, new int[]{0x0064}); - CASE_FOLD.put(0x0045, new int[]{0x0065}); - CASE_FOLD.put(0x0046, new int[]{0x0066}); - CASE_FOLD.put(0x0047, new int[]{0x0067}); - CASE_FOLD.put(0x0048, new int[]{0x0068}); - CASE_FOLD.put(0x0049, new int[]{0x0069}); - CASE_FOLD.put(0x004A, new int[]{0x006A}); - CASE_FOLD.put(0x004B, new int[]{0x006B}); - CASE_FOLD.put(0x004C, new int[]{0x006C}); - CASE_FOLD.put(0x004D, new int[]{0x006D}); - CASE_FOLD.put(0x004E, new int[]{0x006E}); - CASE_FOLD.put(0x004F, new int[]{0x006F}); - CASE_FOLD.put(0x0050, new int[]{0x0070}); - CASE_FOLD.put(0x0051, new int[]{0x0071}); - CASE_FOLD.put(0x0052, new int[]{0x0072}); - CASE_FOLD.put(0x0053, new int[]{0x0073}); - CASE_FOLD.put(0x0054, new int[]{0x0074}); - CASE_FOLD.put(0x0055, new int[]{0x0075}); - CASE_FOLD.put(0x0056, new int[]{0x0076}); - CASE_FOLD.put(0x0057, new int[]{0x0077}); - CASE_FOLD.put(0x0058, new int[]{0x0078}); - CASE_FOLD.put(0x0059, new int[]{0x0079}); - CASE_FOLD.put(0x005A, new int[]{0x007A}); - CASE_FOLD.put(0x00B5, new int[]{0x03BC}); - CASE_FOLD.put(0x00C0, new int[]{0x00E0}); - CASE_FOLD.put(0x00C1, new int[]{0x00E1}); - CASE_FOLD.put(0x00C2, new int[]{0x00E2}); - CASE_FOLD.put(0x00C3, new int[]{0x00E3}); - CASE_FOLD.put(0x00C4, new int[]{0x00E4}); - CASE_FOLD.put(0x00C5, new int[]{0x00E5}); - CASE_FOLD.put(0x00C6, new int[]{0x00E6}); - CASE_FOLD.put(0x00C7, new int[]{0x00E7}); - CASE_FOLD.put(0x00C8, new int[]{0x00E8}); - CASE_FOLD.put(0x00C9, new int[]{0x00E9}); - CASE_FOLD.put(0x00CA, new int[]{0x00EA}); - CASE_FOLD.put(0x00CB, new int[]{0x00EB}); - CASE_FOLD.put(0x00CC, new int[]{0x00EC}); - CASE_FOLD.put(0x00CD, new int[]{0x00ED}); - CASE_FOLD.put(0x00CE, new int[]{0x00EE}); - CASE_FOLD.put(0x00CF, new int[]{0x00EF}); - CASE_FOLD.put(0x00D0, new int[]{0x00F0}); - CASE_FOLD.put(0x00D1, new int[]{0x00F1}); - CASE_FOLD.put(0x00D2, new int[]{0x00F2}); - CASE_FOLD.put(0x00D3, new int[]{0x00F3}); - CASE_FOLD.put(0x00D4, new int[]{0x00F4}); - CASE_FOLD.put(0x00D5, new int[]{0x00F5}); - CASE_FOLD.put(0x00D6, new int[]{0x00F6}); - CASE_FOLD.put(0x00D8, new int[]{0x00F8}); - CASE_FOLD.put(0x00D9, new int[]{0x00F9}); - CASE_FOLD.put(0x00DA, new int[]{0x00FA}); - CASE_FOLD.put(0x00DB, new int[]{0x00FB}); - CASE_FOLD.put(0x00DC, new int[]{0x00FC}); - CASE_FOLD.put(0x00DD, new int[]{0x00FD}); - CASE_FOLD.put(0x00DE, new int[]{0x00FE}); - CASE_FOLD.put(0x00DF, new int[]{0x0073, 0x0073}); - CASE_FOLD.put(0x0100, new int[]{0x0101}); - CASE_FOLD.put(0x0102, new int[]{0x0103}); - CASE_FOLD.put(0x0104, new int[]{0x0105}); - CASE_FOLD.put(0x0106, new int[]{0x0107}); - CASE_FOLD.put(0x0108, new int[]{0x0109}); - CASE_FOLD.put(0x010A, new int[]{0x010B}); - CASE_FOLD.put(0x010C, new int[]{0x010D}); - CASE_FOLD.put(0x010E, new int[]{0x010F}); - CASE_FOLD.put(0x0110, new int[]{0x0111}); - CASE_FOLD.put(0x0112, new int[]{0x0113}); - CASE_FOLD.put(0x0114, new int[]{0x0115}); - CASE_FOLD.put(0x0116, new int[]{0x0117}); - CASE_FOLD.put(0x0118, new int[]{0x0119}); - CASE_FOLD.put(0x011A, new int[]{0x011B}); - CASE_FOLD.put(0x011C, new int[]{0x011D}); - CASE_FOLD.put(0x011E, new int[]{0x011F}); - CASE_FOLD.put(0x0120, new int[]{0x0121}); - CASE_FOLD.put(0x0122, new int[]{0x0123}); - CASE_FOLD.put(0x0124, new int[]{0x0125}); - CASE_FOLD.put(0x0126, new int[]{0x0127}); - CASE_FOLD.put(0x0128, new int[]{0x0129}); - CASE_FOLD.put(0x012A, new int[]{0x012B}); - CASE_FOLD.put(0x012C, new int[]{0x012D}); - CASE_FOLD.put(0x012E, new int[]{0x012F}); - CASE_FOLD.put(0x0130, new int[]{0x0069, 0x0307}); - CASE_FOLD.put(0x0132, new int[]{0x0133}); - CASE_FOLD.put(0x0134, new int[]{0x0135}); - CASE_FOLD.put(0x0136, new int[]{0x0137}); - CASE_FOLD.put(0x0139, new int[]{0x013A}); - CASE_FOLD.put(0x013B, new int[]{0x013C}); - CASE_FOLD.put(0x013D, new int[]{0x013E}); - CASE_FOLD.put(0x013F, new int[]{0x0140}); - CASE_FOLD.put(0x0141, new int[]{0x0142}); - CASE_FOLD.put(0x0143, new int[]{0x0144}); - CASE_FOLD.put(0x0145, new int[]{0x0146}); - CASE_FOLD.put(0x0147, new int[]{0x0148}); - CASE_FOLD.put(0x0149, new int[]{0x02BC, 0x006E}); - CASE_FOLD.put(0x014A, new int[]{0x014B}); - CASE_FOLD.put(0x014C, new int[]{0x014D}); - CASE_FOLD.put(0x014E, new int[]{0x014F}); - CASE_FOLD.put(0x0150, new int[]{0x0151}); - CASE_FOLD.put(0x0152, new int[]{0x0153}); - CASE_FOLD.put(0x0154, new int[]{0x0155}); - CASE_FOLD.put(0x0156, new int[]{0x0157}); - CASE_FOLD.put(0x0158, new int[]{0x0159}); - CASE_FOLD.put(0x015A, new int[]{0x015B}); - CASE_FOLD.put(0x015C, new int[]{0x015D}); - CASE_FOLD.put(0x015E, new int[]{0x015F}); - CASE_FOLD.put(0x0160, new int[]{0x0161}); - CASE_FOLD.put(0x0162, new int[]{0x0163}); - CASE_FOLD.put(0x0164, new int[]{0x0165}); - CASE_FOLD.put(0x0166, new int[]{0x0167}); - CASE_FOLD.put(0x0168, new int[]{0x0169}); - CASE_FOLD.put(0x016A, new int[]{0x016B}); - CASE_FOLD.put(0x016C, new int[]{0x016D}); - CASE_FOLD.put(0x016E, new int[]{0x016F}); - CASE_FOLD.put(0x0170, new int[]{0x0171}); - CASE_FOLD.put(0x0172, new int[]{0x0173}); - CASE_FOLD.put(0x0174, new int[]{0x0175}); - CASE_FOLD.put(0x0176, new int[]{0x0177}); - CASE_FOLD.put(0x0178, new int[]{0x00FF}); - CASE_FOLD.put(0x0179, new int[]{0x017A}); - CASE_FOLD.put(0x017B, new int[]{0x017C}); - CASE_FOLD.put(0x017D, new int[]{0x017E}); - CASE_FOLD.put(0x017F, new int[]{0x0073}); - CASE_FOLD.put(0x0181, new int[]{0x0253}); - CASE_FOLD.put(0x0182, new int[]{0x0183}); - CASE_FOLD.put(0x0184, new int[]{0x0185}); - CASE_FOLD.put(0x0186, new int[]{0x0254}); - CASE_FOLD.put(0x0187, new int[]{0x0188}); - CASE_FOLD.put(0x0189, new int[]{0x0256}); - CASE_FOLD.put(0x018A, new int[]{0x0257}); - CASE_FOLD.put(0x018B, new int[]{0x018C}); - CASE_FOLD.put(0x018E, new int[]{0x01DD}); - CASE_FOLD.put(0x018F, new int[]{0x0259}); - CASE_FOLD.put(0x0190, new int[]{0x025B}); - CASE_FOLD.put(0x0191, new int[]{0x0192}); - CASE_FOLD.put(0x0193, new int[]{0x0260}); - CASE_FOLD.put(0x0194, new int[]{0x0263}); - CASE_FOLD.put(0x0196, new int[]{0x0269}); - CASE_FOLD.put(0x0197, new int[]{0x0268}); - CASE_FOLD.put(0x0198, new int[]{0x0199}); - CASE_FOLD.put(0x019C, new int[]{0x026F}); - CASE_FOLD.put(0x019D, new int[]{0x0272}); - CASE_FOLD.put(0x019F, new int[]{0x0275}); - CASE_FOLD.put(0x01A0, new int[]{0x01A1}); - CASE_FOLD.put(0x01A2, new int[]{0x01A3}); - CASE_FOLD.put(0x01A4, new int[]{0x01A5}); - CASE_FOLD.put(0x01A6, new int[]{0x0280}); - CASE_FOLD.put(0x01A7, new int[]{0x01A8}); - CASE_FOLD.put(0x01A9, new int[]{0x0283}); - CASE_FOLD.put(0x01AC, new int[]{0x01AD}); - CASE_FOLD.put(0x01AE, new int[]{0x0288}); - CASE_FOLD.put(0x01AF, new int[]{0x01B0}); - CASE_FOLD.put(0x01B1, new int[]{0x028A}); - CASE_FOLD.put(0x01B2, new int[]{0x028B}); - CASE_FOLD.put(0x01B3, new int[]{0x01B4}); - CASE_FOLD.put(0x01B5, new int[]{0x01B6}); - CASE_FOLD.put(0x01B7, new int[]{0x0292}); - CASE_FOLD.put(0x01B8, new int[]{0x01B9}); - CASE_FOLD.put(0x01BC, new int[]{0x01BD}); - CASE_FOLD.put(0x01C4, new int[]{0x01C6}); - CASE_FOLD.put(0x01C5, new int[]{0x01C6}); - CASE_FOLD.put(0x01C7, new int[]{0x01C9}); - CASE_FOLD.put(0x01C8, new int[]{0x01C9}); - CASE_FOLD.put(0x01CA, new int[]{0x01CC}); - CASE_FOLD.put(0x01CB, new int[]{0x01CC}); - CASE_FOLD.put(0x01CD, new int[]{0x01CE}); - CASE_FOLD.put(0x01CF, new int[]{0x01D0}); - CASE_FOLD.put(0x01D1, new int[]{0x01D2}); - CASE_FOLD.put(0x01D3, new int[]{0x01D4}); - CASE_FOLD.put(0x01D5, new int[]{0x01D6}); - CASE_FOLD.put(0x01D7, new int[]{0x01D8}); - CASE_FOLD.put(0x01D9, new int[]{0x01DA}); - CASE_FOLD.put(0x01DB, new int[]{0x01DC}); - CASE_FOLD.put(0x01DE, new int[]{0x01DF}); - CASE_FOLD.put(0x01E0, new int[]{0x01E1}); - CASE_FOLD.put(0x01E2, new int[]{0x01E3}); - CASE_FOLD.put(0x01E4, new int[]{0x01E5}); - CASE_FOLD.put(0x01E6, new int[]{0x01E7}); - CASE_FOLD.put(0x01E8, new int[]{0x01E9}); - CASE_FOLD.put(0x01EA, new int[]{0x01EB}); - CASE_FOLD.put(0x01EC, new int[]{0x01ED}); - CASE_FOLD.put(0x01EE, new int[]{0x01EF}); - CASE_FOLD.put(0x01F0, new int[]{0x006A, 0x030C}); - CASE_FOLD.put(0x01F1, new int[]{0x01F3}); - CASE_FOLD.put(0x01F2, new int[]{0x01F3}); - CASE_FOLD.put(0x01F4, new int[]{0x01F5}); - CASE_FOLD.put(0x01F6, new int[]{0x0195}); - CASE_FOLD.put(0x01F7, new int[]{0x01BF}); - CASE_FOLD.put(0x01F8, new int[]{0x01F9}); - CASE_FOLD.put(0x01FA, new int[]{0x01FB}); - CASE_FOLD.put(0x01FC, new int[]{0x01FD}); - CASE_FOLD.put(0x01FE, new int[]{0x01FF}); - CASE_FOLD.put(0x0200, new int[]{0x0201}); - CASE_FOLD.put(0x0202, new int[]{0x0203}); - CASE_FOLD.put(0x0204, new int[]{0x0205}); - CASE_FOLD.put(0x0206, new int[]{0x0207}); - CASE_FOLD.put(0x0208, new int[]{0x0209}); - CASE_FOLD.put(0x020A, new int[]{0x020B}); - CASE_FOLD.put(0x020C, new int[]{0x020D}); - CASE_FOLD.put(0x020E, new int[]{0x020F}); - CASE_FOLD.put(0x0210, new int[]{0x0211}); - CASE_FOLD.put(0x0212, new int[]{0x0213}); - CASE_FOLD.put(0x0214, new int[]{0x0215}); - CASE_FOLD.put(0x0216, new int[]{0x0217}); - CASE_FOLD.put(0x0218, new int[]{0x0219}); - CASE_FOLD.put(0x021A, new int[]{0x021B}); - CASE_FOLD.put(0x021C, new int[]{0x021D}); - CASE_FOLD.put(0x021E, new int[]{0x021F}); - CASE_FOLD.put(0x0220, new int[]{0x019E}); - CASE_FOLD.put(0x0222, new int[]{0x0223}); - CASE_FOLD.put(0x0224, new int[]{0x0225}); - CASE_FOLD.put(0x0226, new int[]{0x0227}); - CASE_FOLD.put(0x0228, new int[]{0x0229}); - CASE_FOLD.put(0x022A, new int[]{0x022B}); - CASE_FOLD.put(0x022C, new int[]{0x022D}); - CASE_FOLD.put(0x022E, new int[]{0x022F}); - CASE_FOLD.put(0x0230, new int[]{0x0231}); - CASE_FOLD.put(0x0232, new int[]{0x0233}); - CASE_FOLD.put(0x023A, new int[]{0x2C65}); - CASE_FOLD.put(0x023B, new int[]{0x023C}); - CASE_FOLD.put(0x023D, new int[]{0x019A}); - CASE_FOLD.put(0x023E, new int[]{0x2C66}); - CASE_FOLD.put(0x0241, new int[]{0x0242}); - CASE_FOLD.put(0x0243, new int[]{0x0180}); - CASE_FOLD.put(0x0244, new int[]{0x0289}); - CASE_FOLD.put(0x0245, new int[]{0x028C}); - CASE_FOLD.put(0x0246, new int[]{0x0247}); - CASE_FOLD.put(0x0248, new int[]{0x0249}); - CASE_FOLD.put(0x024A, new int[]{0x024B}); - CASE_FOLD.put(0x024C, new int[]{0x024D}); - CASE_FOLD.put(0x024E, new int[]{0x024F}); - CASE_FOLD.put(0x0345, new int[]{0x03B9}); - CASE_FOLD.put(0x0370, new int[]{0x0371}); - CASE_FOLD.put(0x0372, new int[]{0x0373}); - CASE_FOLD.put(0x0376, new int[]{0x0377}); - CASE_FOLD.put(0x037F, new int[]{0x03F3}); - CASE_FOLD.put(0x0386, new int[]{0x03AC}); - CASE_FOLD.put(0x0388, new int[]{0x03AD}); - CASE_FOLD.put(0x0389, new int[]{0x03AE}); - CASE_FOLD.put(0x038A, new int[]{0x03AF}); - CASE_FOLD.put(0x038C, new int[]{0x03CC}); - CASE_FOLD.put(0x038E, new int[]{0x03CD}); - CASE_FOLD.put(0x038F, new int[]{0x03CE}); - CASE_FOLD.put(0x0390, new int[]{0x03B9, 0x0308, 0x0301}); - CASE_FOLD.put(0x0391, new int[]{0x03B1}); - CASE_FOLD.put(0x0392, new int[]{0x03B2}); - CASE_FOLD.put(0x0393, new int[]{0x03B3}); - CASE_FOLD.put(0x0394, new int[]{0x03B4}); - CASE_FOLD.put(0x0395, new int[]{0x03B5}); - CASE_FOLD.put(0x0396, new int[]{0x03B6}); - CASE_FOLD.put(0x0397, new int[]{0x03B7}); - CASE_FOLD.put(0x0398, new int[]{0x03B8}); - CASE_FOLD.put(0x0399, new int[]{0x03B9}); - CASE_FOLD.put(0x039A, new int[]{0x03BA}); - CASE_FOLD.put(0x039B, new int[]{0x03BB}); - CASE_FOLD.put(0x039C, new int[]{0x03BC}); - CASE_FOLD.put(0x039D, new int[]{0x03BD}); - CASE_FOLD.put(0x039E, new int[]{0x03BE}); - CASE_FOLD.put(0x039F, new int[]{0x03BF}); - CASE_FOLD.put(0x03A0, new int[]{0x03C0}); - CASE_FOLD.put(0x03A1, new int[]{0x03C1}); - CASE_FOLD.put(0x03A3, new int[]{0x03C3}); - CASE_FOLD.put(0x03A4, new int[]{0x03C4}); - CASE_FOLD.put(0x03A5, new int[]{0x03C5}); - CASE_FOLD.put(0x03A6, new int[]{0x03C6}); - CASE_FOLD.put(0x03A7, new int[]{0x03C7}); - CASE_FOLD.put(0x03A8, new int[]{0x03C8}); - CASE_FOLD.put(0x03A9, new int[]{0x03C9}); - CASE_FOLD.put(0x03AA, new int[]{0x03CA}); - CASE_FOLD.put(0x03AB, new int[]{0x03CB}); - CASE_FOLD.put(0x03B0, new int[]{0x03C5, 0x0308, 0x0301}); - CASE_FOLD.put(0x03C2, new int[]{0x03C3}); - CASE_FOLD.put(0x03CF, new int[]{0x03D7}); - CASE_FOLD.put(0x03D0, new int[]{0x03B2}); - CASE_FOLD.put(0x03D1, new int[]{0x03B8}); - CASE_FOLD.put(0x03D5, new int[]{0x03C6}); - CASE_FOLD.put(0x03D6, new int[]{0x03C0}); - CASE_FOLD.put(0x03D8, new int[]{0x03D9}); - CASE_FOLD.put(0x03DA, new int[]{0x03DB}); - CASE_FOLD.put(0x03DC, new int[]{0x03DD}); - CASE_FOLD.put(0x03DE, new int[]{0x03DF}); - CASE_FOLD.put(0x03E0, new int[]{0x03E1}); - CASE_FOLD.put(0x03E2, new int[]{0x03E3}); - CASE_FOLD.put(0x03E4, new int[]{0x03E5}); - CASE_FOLD.put(0x03E6, new int[]{0x03E7}); - CASE_FOLD.put(0x03E8, new int[]{0x03E9}); - CASE_FOLD.put(0x03EA, new int[]{0x03EB}); - CASE_FOLD.put(0x03EC, new int[]{0x03ED}); - CASE_FOLD.put(0x03EE, new int[]{0x03EF}); - CASE_FOLD.put(0x03F0, new int[]{0x03BA}); - CASE_FOLD.put(0x03F1, new int[]{0x03C1}); - CASE_FOLD.put(0x03F4, new int[]{0x03B8}); - CASE_FOLD.put(0x03F5, new int[]{0x03B5}); - CASE_FOLD.put(0x03F7, new int[]{0x03F8}); - CASE_FOLD.put(0x03F9, new int[]{0x03F2}); - CASE_FOLD.put(0x03FA, new int[]{0x03FB}); - CASE_FOLD.put(0x03FD, new int[]{0x037B}); - CASE_FOLD.put(0x03FE, new int[]{0x037C}); - CASE_FOLD.put(0x03FF, new int[]{0x037D}); - CASE_FOLD.put(0x0400, new int[]{0x0450}); - CASE_FOLD.put(0x0401, new int[]{0x0451}); - CASE_FOLD.put(0x0402, new int[]{0x0452}); - CASE_FOLD.put(0x0403, new int[]{0x0453}); - CASE_FOLD.put(0x0404, new int[]{0x0454}); - CASE_FOLD.put(0x0405, new int[]{0x0455}); - CASE_FOLD.put(0x0406, new int[]{0x0456}); - CASE_FOLD.put(0x0407, new int[]{0x0457}); - CASE_FOLD.put(0x0408, new int[]{0x0458}); - CASE_FOLD.put(0x0409, new int[]{0x0459}); - CASE_FOLD.put(0x040A, new int[]{0x045A}); - CASE_FOLD.put(0x040B, new int[]{0x045B}); - CASE_FOLD.put(0x040C, new int[]{0x045C}); - CASE_FOLD.put(0x040D, new int[]{0x045D}); - CASE_FOLD.put(0x040E, new int[]{0x045E}); - CASE_FOLD.put(0x040F, new int[]{0x045F}); - CASE_FOLD.put(0x0410, new int[]{0x0430}); - CASE_FOLD.put(0x0411, new int[]{0x0431}); - CASE_FOLD.put(0x0412, new int[]{0x0432}); - CASE_FOLD.put(0x0413, new int[]{0x0433}); - CASE_FOLD.put(0x0414, new int[]{0x0434}); - CASE_FOLD.put(0x0415, new int[]{0x0435}); - CASE_FOLD.put(0x0416, new int[]{0x0436}); - CASE_FOLD.put(0x0417, new int[]{0x0437}); - CASE_FOLD.put(0x0418, new int[]{0x0438}); - CASE_FOLD.put(0x0419, new int[]{0x0439}); - CASE_FOLD.put(0x041A, new int[]{0x043A}); - CASE_FOLD.put(0x041B, new int[]{0x043B}); - CASE_FOLD.put(0x041C, new int[]{0x043C}); - CASE_FOLD.put(0x041D, new int[]{0x043D}); - CASE_FOLD.put(0x041E, new int[]{0x043E}); - CASE_FOLD.put(0x041F, new int[]{0x043F}); - CASE_FOLD.put(0x0420, new int[]{0x0440}); - CASE_FOLD.put(0x0421, new int[]{0x0441}); - CASE_FOLD.put(0x0422, new int[]{0x0442}); - CASE_FOLD.put(0x0423, new int[]{0x0443}); - CASE_FOLD.put(0x0424, new int[]{0x0444}); - CASE_FOLD.put(0x0425, new int[]{0x0445}); - CASE_FOLD.put(0x0426, new int[]{0x0446}); - CASE_FOLD.put(0x0427, new int[]{0x0447}); - CASE_FOLD.put(0x0428, new int[]{0x0448}); - CASE_FOLD.put(0x0429, new int[]{0x0449}); - CASE_FOLD.put(0x042A, new int[]{0x044A}); - CASE_FOLD.put(0x042B, new int[]{0x044B}); - CASE_FOLD.put(0x042C, new int[]{0x044C}); - CASE_FOLD.put(0x042D, new int[]{0x044D}); - CASE_FOLD.put(0x042E, new int[]{0x044E}); - CASE_FOLD.put(0x042F, new int[]{0x044F}); - CASE_FOLD.put(0x0460, new int[]{0x0461}); - CASE_FOLD.put(0x0462, new int[]{0x0463}); - CASE_FOLD.put(0x0464, new int[]{0x0465}); - CASE_FOLD.put(0x0466, new int[]{0x0467}); - CASE_FOLD.put(0x0468, new int[]{0x0469}); - CASE_FOLD.put(0x046A, new int[]{0x046B}); - CASE_FOLD.put(0x046C, new int[]{0x046D}); - CASE_FOLD.put(0x046E, new int[]{0x046F}); - CASE_FOLD.put(0x0470, new int[]{0x0471}); - CASE_FOLD.put(0x0472, new int[]{0x0473}); - CASE_FOLD.put(0x0474, new int[]{0x0475}); - CASE_FOLD.put(0x0476, new int[]{0x0477}); - CASE_FOLD.put(0x0478, new int[]{0x0479}); - CASE_FOLD.put(0x047A, new int[]{0x047B}); - CASE_FOLD.put(0x047C, new int[]{0x047D}); - CASE_FOLD.put(0x047E, new int[]{0x047F}); - CASE_FOLD.put(0x0480, new int[]{0x0481}); - CASE_FOLD.put(0x048A, new int[]{0x048B}); - CASE_FOLD.put(0x048C, new int[]{0x048D}); - CASE_FOLD.put(0x048E, new int[]{0x048F}); - CASE_FOLD.put(0x0490, new int[]{0x0491}); - CASE_FOLD.put(0x0492, new int[]{0x0493}); - CASE_FOLD.put(0x0494, new int[]{0x0495}); - CASE_FOLD.put(0x0496, new int[]{0x0497}); - CASE_FOLD.put(0x0498, new int[]{0x0499}); - CASE_FOLD.put(0x049A, new int[]{0x049B}); - CASE_FOLD.put(0x049C, new int[]{0x049D}); - CASE_FOLD.put(0x049E, new int[]{0x049F}); - CASE_FOLD.put(0x04A0, new int[]{0x04A1}); - CASE_FOLD.put(0x04A2, new int[]{0x04A3}); - CASE_FOLD.put(0x04A4, new int[]{0x04A5}); - CASE_FOLD.put(0x04A6, new int[]{0x04A7}); - CASE_FOLD.put(0x04A8, new int[]{0x04A9}); - CASE_FOLD.put(0x04AA, new int[]{0x04AB}); - CASE_FOLD.put(0x04AC, new int[]{0x04AD}); - CASE_FOLD.put(0x04AE, new int[]{0x04AF}); - CASE_FOLD.put(0x04B0, new int[]{0x04B1}); - CASE_FOLD.put(0x04B2, new int[]{0x04B3}); - CASE_FOLD.put(0x04B4, new int[]{0x04B5}); - CASE_FOLD.put(0x04B6, new int[]{0x04B7}); - CASE_FOLD.put(0x04B8, new int[]{0x04B9}); - CASE_FOLD.put(0x04BA, new int[]{0x04BB}); - CASE_FOLD.put(0x04BC, new int[]{0x04BD}); - CASE_FOLD.put(0x04BE, new int[]{0x04BF}); - CASE_FOLD.put(0x04C0, new int[]{0x04CF}); - CASE_FOLD.put(0x04C1, new int[]{0x04C2}); - CASE_FOLD.put(0x04C3, new int[]{0x04C4}); - CASE_FOLD.put(0x04C5, new int[]{0x04C6}); - CASE_FOLD.put(0x04C7, new int[]{0x04C8}); - CASE_FOLD.put(0x04C9, new int[]{0x04CA}); - CASE_FOLD.put(0x04CB, new int[]{0x04CC}); - CASE_FOLD.put(0x04CD, new int[]{0x04CE}); - CASE_FOLD.put(0x04D0, new int[]{0x04D1}); - CASE_FOLD.put(0x04D2, new int[]{0x04D3}); - CASE_FOLD.put(0x04D4, new int[]{0x04D5}); - CASE_FOLD.put(0x04D6, new int[]{0x04D7}); - CASE_FOLD.put(0x04D8, new int[]{0x04D9}); - CASE_FOLD.put(0x04DA, new int[]{0x04DB}); - CASE_FOLD.put(0x04DC, new int[]{0x04DD}); - CASE_FOLD.put(0x04DE, new int[]{0x04DF}); - CASE_FOLD.put(0x04E0, new int[]{0x04E1}); - CASE_FOLD.put(0x04E2, new int[]{0x04E3}); - CASE_FOLD.put(0x04E4, new int[]{0x04E5}); - CASE_FOLD.put(0x04E6, new int[]{0x04E7}); - CASE_FOLD.put(0x04E8, new int[]{0x04E9}); - CASE_FOLD.put(0x04EA, new int[]{0x04EB}); - CASE_FOLD.put(0x04EC, new int[]{0x04ED}); - CASE_FOLD.put(0x04EE, new int[]{0x04EF}); - CASE_FOLD.put(0x04F0, new int[]{0x04F1}); - CASE_FOLD.put(0x04F2, new int[]{0x04F3}); - CASE_FOLD.put(0x04F4, new int[]{0x04F5}); - CASE_FOLD.put(0x04F6, new int[]{0x04F7}); - CASE_FOLD.put(0x04F8, new int[]{0x04F9}); - CASE_FOLD.put(0x04FA, new int[]{0x04FB}); - CASE_FOLD.put(0x04FC, new int[]{0x04FD}); - CASE_FOLD.put(0x04FE, new int[]{0x04FF}); - CASE_FOLD.put(0x0500, new int[]{0x0501}); - CASE_FOLD.put(0x0502, new int[]{0x0503}); - CASE_FOLD.put(0x0504, new int[]{0x0505}); - CASE_FOLD.put(0x0506, new int[]{0x0507}); - CASE_FOLD.put(0x0508, new int[]{0x0509}); - CASE_FOLD.put(0x050A, new int[]{0x050B}); - CASE_FOLD.put(0x050C, new int[]{0x050D}); - CASE_FOLD.put(0x050E, new int[]{0x050F}); - CASE_FOLD.put(0x0510, new int[]{0x0511}); - CASE_FOLD.put(0x0512, new int[]{0x0513}); - CASE_FOLD.put(0x0514, new int[]{0x0515}); - CASE_FOLD.put(0x0516, new int[]{0x0517}); - CASE_FOLD.put(0x0518, new int[]{0x0519}); - CASE_FOLD.put(0x051A, new int[]{0x051B}); - CASE_FOLD.put(0x051C, new int[]{0x051D}); - CASE_FOLD.put(0x051E, new int[]{0x051F}); - CASE_FOLD.put(0x0520, new int[]{0x0521}); - CASE_FOLD.put(0x0522, new int[]{0x0523}); - CASE_FOLD.put(0x0524, new int[]{0x0525}); - CASE_FOLD.put(0x0526, new int[]{0x0527}); - CASE_FOLD.put(0x0528, new int[]{0x0529}); - CASE_FOLD.put(0x052A, new int[]{0x052B}); - CASE_FOLD.put(0x052C, new int[]{0x052D}); - CASE_FOLD.put(0x052E, new int[]{0x052F}); - CASE_FOLD.put(0x0531, new int[]{0x0561}); - CASE_FOLD.put(0x0532, new int[]{0x0562}); - CASE_FOLD.put(0x0533, new int[]{0x0563}); - CASE_FOLD.put(0x0534, new int[]{0x0564}); - CASE_FOLD.put(0x0535, new int[]{0x0565}); - CASE_FOLD.put(0x0536, new int[]{0x0566}); - CASE_FOLD.put(0x0537, new int[]{0x0567}); - CASE_FOLD.put(0x0538, new int[]{0x0568}); - CASE_FOLD.put(0x0539, new int[]{0x0569}); - CASE_FOLD.put(0x053A, new int[]{0x056A}); - CASE_FOLD.put(0x053B, new int[]{0x056B}); - CASE_FOLD.put(0x053C, new int[]{0x056C}); - CASE_FOLD.put(0x053D, new int[]{0x056D}); - CASE_FOLD.put(0x053E, new int[]{0x056E}); - CASE_FOLD.put(0x053F, new int[]{0x056F}); - CASE_FOLD.put(0x0540, new int[]{0x0570}); - CASE_FOLD.put(0x0541, new int[]{0x0571}); - CASE_FOLD.put(0x0542, new int[]{0x0572}); - CASE_FOLD.put(0x0543, new int[]{0x0573}); - CASE_FOLD.put(0x0544, new int[]{0x0574}); - CASE_FOLD.put(0x0545, new int[]{0x0575}); - CASE_FOLD.put(0x0546, new int[]{0x0576}); - CASE_FOLD.put(0x0547, new int[]{0x0577}); - CASE_FOLD.put(0x0548, new int[]{0x0578}); - CASE_FOLD.put(0x0549, new int[]{0x0579}); - CASE_FOLD.put(0x054A, new int[]{0x057A}); - CASE_FOLD.put(0x054B, new int[]{0x057B}); - CASE_FOLD.put(0x054C, new int[]{0x057C}); - CASE_FOLD.put(0x054D, new int[]{0x057D}); - CASE_FOLD.put(0x054E, new int[]{0x057E}); - CASE_FOLD.put(0x054F, new int[]{0x057F}); - CASE_FOLD.put(0x0550, new int[]{0x0580}); - CASE_FOLD.put(0x0551, new int[]{0x0581}); - CASE_FOLD.put(0x0552, new int[]{0x0582}); - CASE_FOLD.put(0x0553, new int[]{0x0583}); - CASE_FOLD.put(0x0554, new int[]{0x0584}); - CASE_FOLD.put(0x0555, new int[]{0x0585}); - CASE_FOLD.put(0x0556, new int[]{0x0586}); - CASE_FOLD.put(0x0587, new int[]{0x0565, 0x0582}); - CASE_FOLD.put(0x10A0, new int[]{0x2D00}); - CASE_FOLD.put(0x10A1, new int[]{0x2D01}); - CASE_FOLD.put(0x10A2, new int[]{0x2D02}); - CASE_FOLD.put(0x10A3, new int[]{0x2D03}); - CASE_FOLD.put(0x10A4, new int[]{0x2D04}); - CASE_FOLD.put(0x10A5, new int[]{0x2D05}); - CASE_FOLD.put(0x10A6, new int[]{0x2D06}); - CASE_FOLD.put(0x10A7, new int[]{0x2D07}); - CASE_FOLD.put(0x10A8, new int[]{0x2D08}); - CASE_FOLD.put(0x10A9, new int[]{0x2D09}); - CASE_FOLD.put(0x10AA, new int[]{0x2D0A}); - CASE_FOLD.put(0x10AB, new int[]{0x2D0B}); - CASE_FOLD.put(0x10AC, new int[]{0x2D0C}); - CASE_FOLD.put(0x10AD, new int[]{0x2D0D}); - CASE_FOLD.put(0x10AE, new int[]{0x2D0E}); - CASE_FOLD.put(0x10AF, new int[]{0x2D0F}); - CASE_FOLD.put(0x10B0, new int[]{0x2D10}); - CASE_FOLD.put(0x10B1, new int[]{0x2D11}); - CASE_FOLD.put(0x10B2, new int[]{0x2D12}); - CASE_FOLD.put(0x10B3, new int[]{0x2D13}); - CASE_FOLD.put(0x10B4, new int[]{0x2D14}); - CASE_FOLD.put(0x10B5, new int[]{0x2D15}); - CASE_FOLD.put(0x10B6, new int[]{0x2D16}); - CASE_FOLD.put(0x10B7, new int[]{0x2D17}); - CASE_FOLD.put(0x10B8, new int[]{0x2D18}); - CASE_FOLD.put(0x10B9, new int[]{0x2D19}); - CASE_FOLD.put(0x10BA, new int[]{0x2D1A}); - CASE_FOLD.put(0x10BB, new int[]{0x2D1B}); - CASE_FOLD.put(0x10BC, new int[]{0x2D1C}); - CASE_FOLD.put(0x10BD, new int[]{0x2D1D}); - CASE_FOLD.put(0x10BE, new int[]{0x2D1E}); - CASE_FOLD.put(0x10BF, new int[]{0x2D1F}); - CASE_FOLD.put(0x10C0, new int[]{0x2D20}); - CASE_FOLD.put(0x10C1, new int[]{0x2D21}); - CASE_FOLD.put(0x10C2, new int[]{0x2D22}); - CASE_FOLD.put(0x10C3, new int[]{0x2D23}); - CASE_FOLD.put(0x10C4, new int[]{0x2D24}); - CASE_FOLD.put(0x10C5, new int[]{0x2D25}); - CASE_FOLD.put(0x10C7, new int[]{0x2D27}); - CASE_FOLD.put(0x10CD, new int[]{0x2D2D}); - CASE_FOLD.put(0x13F8, new int[]{0x13F0}); - CASE_FOLD.put(0x13F9, new int[]{0x13F1}); - CASE_FOLD.put(0x13FA, new int[]{0x13F2}); - CASE_FOLD.put(0x13FB, new int[]{0x13F3}); - CASE_FOLD.put(0x13FC, new int[]{0x13F4}); - CASE_FOLD.put(0x13FD, new int[]{0x13F5}); - CASE_FOLD.put(0x1C80, new int[]{0x0432}); - CASE_FOLD.put(0x1C81, new int[]{0x0434}); - CASE_FOLD.put(0x1C82, new int[]{0x043E}); - CASE_FOLD.put(0x1C83, new int[]{0x0441}); - CASE_FOLD.put(0x1C84, new int[]{0x0442}); - CASE_FOLD.put(0x1C85, new int[]{0x0442}); - CASE_FOLD.put(0x1C86, new int[]{0x044A}); - CASE_FOLD.put(0x1C87, new int[]{0x0463}); - CASE_FOLD.put(0x1C88, new int[]{0xA64B}); - CASE_FOLD.put(0x1C90, new int[]{0x10D0}); - CASE_FOLD.put(0x1C91, new int[]{0x10D1}); - CASE_FOLD.put(0x1C92, new int[]{0x10D2}); - CASE_FOLD.put(0x1C93, new int[]{0x10D3}); - CASE_FOLD.put(0x1C94, new int[]{0x10D4}); - CASE_FOLD.put(0x1C95, new int[]{0x10D5}); - CASE_FOLD.put(0x1C96, new int[]{0x10D6}); - CASE_FOLD.put(0x1C97, new int[]{0x10D7}); - CASE_FOLD.put(0x1C98, new int[]{0x10D8}); - CASE_FOLD.put(0x1C99, new int[]{0x10D9}); - CASE_FOLD.put(0x1C9A, new int[]{0x10DA}); - CASE_FOLD.put(0x1C9B, new int[]{0x10DB}); - CASE_FOLD.put(0x1C9C, new int[]{0x10DC}); - CASE_FOLD.put(0x1C9D, new int[]{0x10DD}); - CASE_FOLD.put(0x1C9E, new int[]{0x10DE}); - CASE_FOLD.put(0x1C9F, new int[]{0x10DF}); - CASE_FOLD.put(0x1CA0, new int[]{0x10E0}); - CASE_FOLD.put(0x1CA1, new int[]{0x10E1}); - CASE_FOLD.put(0x1CA2, new int[]{0x10E2}); - CASE_FOLD.put(0x1CA3, new int[]{0x10E3}); - CASE_FOLD.put(0x1CA4, new int[]{0x10E4}); - CASE_FOLD.put(0x1CA5, new int[]{0x10E5}); - CASE_FOLD.put(0x1CA6, new int[]{0x10E6}); - CASE_FOLD.put(0x1CA7, new int[]{0x10E7}); - CASE_FOLD.put(0x1CA8, new int[]{0x10E8}); - CASE_FOLD.put(0x1CA9, new int[]{0x10E9}); - CASE_FOLD.put(0x1CAA, new int[]{0x10EA}); - CASE_FOLD.put(0x1CAB, new int[]{0x10EB}); - CASE_FOLD.put(0x1CAC, new int[]{0x10EC}); - CASE_FOLD.put(0x1CAD, new int[]{0x10ED}); - CASE_FOLD.put(0x1CAE, new int[]{0x10EE}); - CASE_FOLD.put(0x1CAF, new int[]{0x10EF}); - CASE_FOLD.put(0x1CB0, new int[]{0x10F0}); - CASE_FOLD.put(0x1CB1, new int[]{0x10F1}); - CASE_FOLD.put(0x1CB2, new int[]{0x10F2}); - CASE_FOLD.put(0x1CB3, new int[]{0x10F3}); - CASE_FOLD.put(0x1CB4, new int[]{0x10F4}); - CASE_FOLD.put(0x1CB5, new int[]{0x10F5}); - CASE_FOLD.put(0x1CB6, new int[]{0x10F6}); - CASE_FOLD.put(0x1CB7, new int[]{0x10F7}); - CASE_FOLD.put(0x1CB8, new int[]{0x10F8}); - CASE_FOLD.put(0x1CB9, new int[]{0x10F9}); - CASE_FOLD.put(0x1CBA, new int[]{0x10FA}); - CASE_FOLD.put(0x1CBD, new int[]{0x10FD}); - CASE_FOLD.put(0x1CBE, new int[]{0x10FE}); - CASE_FOLD.put(0x1CBF, new int[]{0x10FF}); - CASE_FOLD.put(0x1E00, new int[]{0x1E01}); - CASE_FOLD.put(0x1E02, new int[]{0x1E03}); - CASE_FOLD.put(0x1E04, new int[]{0x1E05}); - CASE_FOLD.put(0x1E06, new int[]{0x1E07}); - CASE_FOLD.put(0x1E08, new int[]{0x1E09}); - CASE_FOLD.put(0x1E0A, new int[]{0x1E0B}); - CASE_FOLD.put(0x1E0C, new int[]{0x1E0D}); - CASE_FOLD.put(0x1E0E, new int[]{0x1E0F}); - CASE_FOLD.put(0x1E10, new int[]{0x1E11}); - CASE_FOLD.put(0x1E12, new int[]{0x1E13}); - CASE_FOLD.put(0x1E14, new int[]{0x1E15}); - CASE_FOLD.put(0x1E16, new int[]{0x1E17}); - CASE_FOLD.put(0x1E18, new int[]{0x1E19}); - CASE_FOLD.put(0x1E1A, new int[]{0x1E1B}); - CASE_FOLD.put(0x1E1C, new int[]{0x1E1D}); - CASE_FOLD.put(0x1E1E, new int[]{0x1E1F}); - CASE_FOLD.put(0x1E20, new int[]{0x1E21}); - CASE_FOLD.put(0x1E22, new int[]{0x1E23}); - CASE_FOLD.put(0x1E24, new int[]{0x1E25}); - CASE_FOLD.put(0x1E26, new int[]{0x1E27}); - CASE_FOLD.put(0x1E28, new int[]{0x1E29}); - CASE_FOLD.put(0x1E2A, new int[]{0x1E2B}); - CASE_FOLD.put(0x1E2C, new int[]{0x1E2D}); - CASE_FOLD.put(0x1E2E, new int[]{0x1E2F}); - CASE_FOLD.put(0x1E30, new int[]{0x1E31}); - CASE_FOLD.put(0x1E32, new int[]{0x1E33}); - CASE_FOLD.put(0x1E34, new int[]{0x1E35}); - CASE_FOLD.put(0x1E36, new int[]{0x1E37}); - CASE_FOLD.put(0x1E38, new int[]{0x1E39}); - CASE_FOLD.put(0x1E3A, new int[]{0x1E3B}); - CASE_FOLD.put(0x1E3C, new int[]{0x1E3D}); - CASE_FOLD.put(0x1E3E, new int[]{0x1E3F}); - CASE_FOLD.put(0x1E40, new int[]{0x1E41}); - CASE_FOLD.put(0x1E42, new int[]{0x1E43}); - CASE_FOLD.put(0x1E44, new int[]{0x1E45}); - CASE_FOLD.put(0x1E46, new int[]{0x1E47}); - CASE_FOLD.put(0x1E48, new int[]{0x1E49}); - CASE_FOLD.put(0x1E4A, new int[]{0x1E4B}); - CASE_FOLD.put(0x1E4C, new int[]{0x1E4D}); - CASE_FOLD.put(0x1E4E, new int[]{0x1E4F}); - CASE_FOLD.put(0x1E50, new int[]{0x1E51}); - CASE_FOLD.put(0x1E52, new int[]{0x1E53}); - CASE_FOLD.put(0x1E54, new int[]{0x1E55}); - CASE_FOLD.put(0x1E56, new int[]{0x1E57}); - CASE_FOLD.put(0x1E58, new int[]{0x1E59}); - CASE_FOLD.put(0x1E5A, new int[]{0x1E5B}); - CASE_FOLD.put(0x1E5C, new int[]{0x1E5D}); - CASE_FOLD.put(0x1E5E, new int[]{0x1E5F}); - CASE_FOLD.put(0x1E60, new int[]{0x1E61}); - CASE_FOLD.put(0x1E62, new int[]{0x1E63}); - CASE_FOLD.put(0x1E64, new int[]{0x1E65}); - CASE_FOLD.put(0x1E66, new int[]{0x1E67}); - CASE_FOLD.put(0x1E68, new int[]{0x1E69}); - CASE_FOLD.put(0x1E6A, new int[]{0x1E6B}); - CASE_FOLD.put(0x1E6C, new int[]{0x1E6D}); - CASE_FOLD.put(0x1E6E, new int[]{0x1E6F}); - CASE_FOLD.put(0x1E70, new int[]{0x1E71}); - CASE_FOLD.put(0x1E72, new int[]{0x1E73}); - CASE_FOLD.put(0x1E74, new int[]{0x1E75}); - CASE_FOLD.put(0x1E76, new int[]{0x1E77}); - CASE_FOLD.put(0x1E78, new int[]{0x1E79}); - CASE_FOLD.put(0x1E7A, new int[]{0x1E7B}); - CASE_FOLD.put(0x1E7C, new int[]{0x1E7D}); - CASE_FOLD.put(0x1E7E, new int[]{0x1E7F}); - CASE_FOLD.put(0x1E80, new int[]{0x1E81}); - CASE_FOLD.put(0x1E82, new int[]{0x1E83}); - CASE_FOLD.put(0x1E84, new int[]{0x1E85}); - CASE_FOLD.put(0x1E86, new int[]{0x1E87}); - CASE_FOLD.put(0x1E88, new int[]{0x1E89}); - CASE_FOLD.put(0x1E8A, new int[]{0x1E8B}); - CASE_FOLD.put(0x1E8C, new int[]{0x1E8D}); - CASE_FOLD.put(0x1E8E, new int[]{0x1E8F}); - CASE_FOLD.put(0x1E90, new int[]{0x1E91}); - CASE_FOLD.put(0x1E92, new int[]{0x1E93}); - CASE_FOLD.put(0x1E94, new int[]{0x1E95}); - CASE_FOLD.put(0x1E96, new int[]{0x0068, 0x0331}); - CASE_FOLD.put(0x1E97, new int[]{0x0074, 0x0308}); - CASE_FOLD.put(0x1E98, new int[]{0x0077, 0x030A}); - CASE_FOLD.put(0x1E99, new int[]{0x0079, 0x030A}); - CASE_FOLD.put(0x1E9A, new int[]{0x0061, 0x02BE}); - CASE_FOLD.put(0x1E9B, new int[]{0x1E61}); - CASE_FOLD.put(0x1E9E, new int[]{0x0073, 0x0073}); - CASE_FOLD.put(0x1EA0, new int[]{0x1EA1}); - CASE_FOLD.put(0x1EA2, new int[]{0x1EA3}); - CASE_FOLD.put(0x1EA4, new int[]{0x1EA5}); - CASE_FOLD.put(0x1EA6, new int[]{0x1EA7}); - CASE_FOLD.put(0x1EA8, new int[]{0x1EA9}); - CASE_FOLD.put(0x1EAA, new int[]{0x1EAB}); - CASE_FOLD.put(0x1EAC, new int[]{0x1EAD}); - CASE_FOLD.put(0x1EAE, new int[]{0x1EAF}); - CASE_FOLD.put(0x1EB0, new int[]{0x1EB1}); - CASE_FOLD.put(0x1EB2, new int[]{0x1EB3}); - CASE_FOLD.put(0x1EB4, new int[]{0x1EB5}); - CASE_FOLD.put(0x1EB6, new int[]{0x1EB7}); - CASE_FOLD.put(0x1EB8, new int[]{0x1EB9}); - CASE_FOLD.put(0x1EBA, new int[]{0x1EBB}); - CASE_FOLD.put(0x1EBC, new int[]{0x1EBD}); - CASE_FOLD.put(0x1EBE, new int[]{0x1EBF}); - CASE_FOLD.put(0x1EC0, new int[]{0x1EC1}); - CASE_FOLD.put(0x1EC2, new int[]{0x1EC3}); - CASE_FOLD.put(0x1EC4, new int[]{0x1EC5}); - CASE_FOLD.put(0x1EC6, new int[]{0x1EC7}); - CASE_FOLD.put(0x1EC8, new int[]{0x1EC9}); - CASE_FOLD.put(0x1ECA, new int[]{0x1ECB}); - CASE_FOLD.put(0x1ECC, new int[]{0x1ECD}); - CASE_FOLD.put(0x1ECE, new int[]{0x1ECF}); - CASE_FOLD.put(0x1ED0, new int[]{0x1ED1}); - CASE_FOLD.put(0x1ED2, new int[]{0x1ED3}); - CASE_FOLD.put(0x1ED4, new int[]{0x1ED5}); - CASE_FOLD.put(0x1ED6, new int[]{0x1ED7}); - CASE_FOLD.put(0x1ED8, new int[]{0x1ED9}); - CASE_FOLD.put(0x1EDA, new int[]{0x1EDB}); - CASE_FOLD.put(0x1EDC, new int[]{0x1EDD}); - CASE_FOLD.put(0x1EDE, new int[]{0x1EDF}); - CASE_FOLD.put(0x1EE0, new int[]{0x1EE1}); - CASE_FOLD.put(0x1EE2, new int[]{0x1EE3}); - CASE_FOLD.put(0x1EE4, new int[]{0x1EE5}); - CASE_FOLD.put(0x1EE6, new int[]{0x1EE7}); - CASE_FOLD.put(0x1EE8, new int[]{0x1EE9}); - CASE_FOLD.put(0x1EEA, new int[]{0x1EEB}); - CASE_FOLD.put(0x1EEC, new int[]{0x1EED}); - CASE_FOLD.put(0x1EEE, new int[]{0x1EEF}); - CASE_FOLD.put(0x1EF0, new int[]{0x1EF1}); - CASE_FOLD.put(0x1EF2, new int[]{0x1EF3}); - CASE_FOLD.put(0x1EF4, new int[]{0x1EF5}); - CASE_FOLD.put(0x1EF6, new int[]{0x1EF7}); - CASE_FOLD.put(0x1EF8, new int[]{0x1EF9}); - CASE_FOLD.put(0x1EFA, new int[]{0x1EFB}); - CASE_FOLD.put(0x1EFC, new int[]{0x1EFD}); - CASE_FOLD.put(0x1EFE, new int[]{0x1EFF}); - CASE_FOLD.put(0x1F08, new int[]{0x1F00}); - CASE_FOLD.put(0x1F09, new int[]{0x1F01}); - CASE_FOLD.put(0x1F0A, new int[]{0x1F02}); - CASE_FOLD.put(0x1F0B, new int[]{0x1F03}); - CASE_FOLD.put(0x1F0C, new int[]{0x1F04}); - CASE_FOLD.put(0x1F0D, new int[]{0x1F05}); - CASE_FOLD.put(0x1F0E, new int[]{0x1F06}); - CASE_FOLD.put(0x1F0F, new int[]{0x1F07}); - CASE_FOLD.put(0x1F18, new int[]{0x1F10}); - CASE_FOLD.put(0x1F19, new int[]{0x1F11}); - CASE_FOLD.put(0x1F1A, new int[]{0x1F12}); - CASE_FOLD.put(0x1F1B, new int[]{0x1F13}); - CASE_FOLD.put(0x1F1C, new int[]{0x1F14}); - CASE_FOLD.put(0x1F1D, new int[]{0x1F15}); - CASE_FOLD.put(0x1F28, new int[]{0x1F20}); - CASE_FOLD.put(0x1F29, new int[]{0x1F21}); - CASE_FOLD.put(0x1F2A, new int[]{0x1F22}); - CASE_FOLD.put(0x1F2B, new int[]{0x1F23}); - CASE_FOLD.put(0x1F2C, new int[]{0x1F24}); - CASE_FOLD.put(0x1F2D, new int[]{0x1F25}); - CASE_FOLD.put(0x1F2E, new int[]{0x1F26}); - CASE_FOLD.put(0x1F2F, new int[]{0x1F27}); - CASE_FOLD.put(0x1F38, new int[]{0x1F30}); - CASE_FOLD.put(0x1F39, new int[]{0x1F31}); - CASE_FOLD.put(0x1F3A, new int[]{0x1F32}); - CASE_FOLD.put(0x1F3B, new int[]{0x1F33}); - CASE_FOLD.put(0x1F3C, new int[]{0x1F34}); - CASE_FOLD.put(0x1F3D, new int[]{0x1F35}); - CASE_FOLD.put(0x1F3E, new int[]{0x1F36}); - CASE_FOLD.put(0x1F3F, new int[]{0x1F37}); - CASE_FOLD.put(0x1F48, new int[]{0x1F40}); - CASE_FOLD.put(0x1F49, new int[]{0x1F41}); - CASE_FOLD.put(0x1F4A, new int[]{0x1F42}); - CASE_FOLD.put(0x1F4B, new int[]{0x1F43}); - CASE_FOLD.put(0x1F4C, new int[]{0x1F44}); - CASE_FOLD.put(0x1F4D, new int[]{0x1F45}); - CASE_FOLD.put(0x1F50, new int[]{0x03C5, 0x0313}); - CASE_FOLD.put(0x1F52, new int[]{0x03C5, 0x0313, 0x0300}); - CASE_FOLD.put(0x1F54, new int[]{0x03C5, 0x0313, 0x0301}); - CASE_FOLD.put(0x1F56, new int[]{0x03C5, 0x0313, 0x0342}); - CASE_FOLD.put(0x1F59, new int[]{0x1F51}); - CASE_FOLD.put(0x1F5B, new int[]{0x1F53}); - CASE_FOLD.put(0x1F5D, new int[]{0x1F55}); - CASE_FOLD.put(0x1F5F, new int[]{0x1F57}); - CASE_FOLD.put(0x1F68, new int[]{0x1F60}); - CASE_FOLD.put(0x1F69, new int[]{0x1F61}); - CASE_FOLD.put(0x1F6A, new int[]{0x1F62}); - CASE_FOLD.put(0x1F6B, new int[]{0x1F63}); - CASE_FOLD.put(0x1F6C, new int[]{0x1F64}); - CASE_FOLD.put(0x1F6D, new int[]{0x1F65}); - CASE_FOLD.put(0x1F6E, new int[]{0x1F66}); - CASE_FOLD.put(0x1F6F, new int[]{0x1F67}); - CASE_FOLD.put(0x1F80, new int[]{0x1F00, 0x03B9}); - CASE_FOLD.put(0x1F81, new int[]{0x1F01, 0x03B9}); - CASE_FOLD.put(0x1F82, new int[]{0x1F02, 0x03B9}); - CASE_FOLD.put(0x1F83, new int[]{0x1F03, 0x03B9}); - CASE_FOLD.put(0x1F84, new int[]{0x1F04, 0x03B9}); - CASE_FOLD.put(0x1F85, new int[]{0x1F05, 0x03B9}); - CASE_FOLD.put(0x1F86, new int[]{0x1F06, 0x03B9}); - CASE_FOLD.put(0x1F87, new int[]{0x1F07, 0x03B9}); - CASE_FOLD.put(0x1F88, new int[]{0x1F00, 0x03B9}); - CASE_FOLD.put(0x1F89, new int[]{0x1F01, 0x03B9}); - CASE_FOLD.put(0x1F8A, new int[]{0x1F02, 0x03B9}); - CASE_FOLD.put(0x1F8B, new int[]{0x1F03, 0x03B9}); - CASE_FOLD.put(0x1F8C, new int[]{0x1F04, 0x03B9}); - CASE_FOLD.put(0x1F8D, new int[]{0x1F05, 0x03B9}); - CASE_FOLD.put(0x1F8E, new int[]{0x1F06, 0x03B9}); - CASE_FOLD.put(0x1F8F, new int[]{0x1F07, 0x03B9}); - CASE_FOLD.put(0x1F90, new int[]{0x1F20, 0x03B9}); - CASE_FOLD.put(0x1F91, new int[]{0x1F21, 0x03B9}); - CASE_FOLD.put(0x1F92, new int[]{0x1F22, 0x03B9}); - CASE_FOLD.put(0x1F93, new int[]{0x1F23, 0x03B9}); - CASE_FOLD.put(0x1F94, new int[]{0x1F24, 0x03B9}); - CASE_FOLD.put(0x1F95, new int[]{0x1F25, 0x03B9}); - CASE_FOLD.put(0x1F96, new int[]{0x1F26, 0x03B9}); - CASE_FOLD.put(0x1F97, new int[]{0x1F27, 0x03B9}); - CASE_FOLD.put(0x1F98, new int[]{0x1F20, 0x03B9}); - CASE_FOLD.put(0x1F99, new int[]{0x1F21, 0x03B9}); - CASE_FOLD.put(0x1F9A, new int[]{0x1F22, 0x03B9}); - CASE_FOLD.put(0x1F9B, new int[]{0x1F23, 0x03B9}); - CASE_FOLD.put(0x1F9C, new int[]{0x1F24, 0x03B9}); - CASE_FOLD.put(0x1F9D, new int[]{0x1F25, 0x03B9}); - CASE_FOLD.put(0x1F9E, new int[]{0x1F26, 0x03B9}); - CASE_FOLD.put(0x1F9F, new int[]{0x1F27, 0x03B9}); - CASE_FOLD.put(0x1FA0, new int[]{0x1F60, 0x03B9}); - CASE_FOLD.put(0x1FA1, new int[]{0x1F61, 0x03B9}); - CASE_FOLD.put(0x1FA2, new int[]{0x1F62, 0x03B9}); - CASE_FOLD.put(0x1FA3, new int[]{0x1F63, 0x03B9}); - CASE_FOLD.put(0x1FA4, new int[]{0x1F64, 0x03B9}); - CASE_FOLD.put(0x1FA5, new int[]{0x1F65, 0x03B9}); - CASE_FOLD.put(0x1FA6, new int[]{0x1F66, 0x03B9}); - CASE_FOLD.put(0x1FA7, new int[]{0x1F67, 0x03B9}); - CASE_FOLD.put(0x1FA8, new int[]{0x1F60, 0x03B9}); - CASE_FOLD.put(0x1FA9, new int[]{0x1F61, 0x03B9}); - CASE_FOLD.put(0x1FAA, new int[]{0x1F62, 0x03B9}); - CASE_FOLD.put(0x1FAB, new int[]{0x1F63, 0x03B9}); - CASE_FOLD.put(0x1FAC, new int[]{0x1F64, 0x03B9}); - CASE_FOLD.put(0x1FAD, new int[]{0x1F65, 0x03B9}); - CASE_FOLD.put(0x1FAE, new int[]{0x1F66, 0x03B9}); - CASE_FOLD.put(0x1FAF, new int[]{0x1F67, 0x03B9}); - CASE_FOLD.put(0x1FB2, new int[]{0x1F70, 0x03B9}); - CASE_FOLD.put(0x1FB3, new int[]{0x03B1, 0x03B9}); - CASE_FOLD.put(0x1FB4, new int[]{0x03AC, 0x03B9}); - CASE_FOLD.put(0x1FB6, new int[]{0x03B1, 0x0342}); - CASE_FOLD.put(0x1FB7, new int[]{0x03B1, 0x0342, 0x03B9}); - CASE_FOLD.put(0x1FB8, new int[]{0x1FB0}); - CASE_FOLD.put(0x1FB9, new int[]{0x1FB1}); - CASE_FOLD.put(0x1FBA, new int[]{0x1F70}); - CASE_FOLD.put(0x1FBB, new int[]{0x1F71}); - CASE_FOLD.put(0x1FBC, new int[]{0x03B1, 0x03B9}); - CASE_FOLD.put(0x1FBE, new int[]{0x03B9}); - CASE_FOLD.put(0x1FC2, new int[]{0x1F74, 0x03B9}); - CASE_FOLD.put(0x1FC3, new int[]{0x03B7, 0x03B9}); - CASE_FOLD.put(0x1FC4, new int[]{0x03AE, 0x03B9}); - CASE_FOLD.put(0x1FC6, new int[]{0x03B7, 0x0342}); - CASE_FOLD.put(0x1FC7, new int[]{0x03B7, 0x0342, 0x03B9}); - CASE_FOLD.put(0x1FC8, new int[]{0x1F72}); - CASE_FOLD.put(0x1FC9, new int[]{0x1F73}); - CASE_FOLD.put(0x1FCA, new int[]{0x1F74}); - CASE_FOLD.put(0x1FCB, new int[]{0x1F75}); - CASE_FOLD.put(0x1FCC, new int[]{0x03B7, 0x03B9}); - CASE_FOLD.put(0x1FD2, new int[]{0x03B9, 0x0308, 0x0300}); - CASE_FOLD.put(0x1FD3, new int[]{0x03B9, 0x0308, 0x0301}); - CASE_FOLD.put(0x1FD6, new int[]{0x03B9, 0x0342}); - CASE_FOLD.put(0x1FD7, new int[]{0x03B9, 0x0308, 0x0342}); - CASE_FOLD.put(0x1FD8, new int[]{0x1FD0}); - CASE_FOLD.put(0x1FD9, new int[]{0x1FD1}); - CASE_FOLD.put(0x1FDA, new int[]{0x1F76}); - CASE_FOLD.put(0x1FDB, new int[]{0x1F77}); - CASE_FOLD.put(0x1FE2, new int[]{0x03C5, 0x0308, 0x0300}); - CASE_FOLD.put(0x1FE3, new int[]{0x03C5, 0x0308, 0x0301}); - CASE_FOLD.put(0x1FE4, new int[]{0x03C1, 0x0313}); - CASE_FOLD.put(0x1FE6, new int[]{0x03C5, 0x0342}); - CASE_FOLD.put(0x1FE7, new int[]{0x03C5, 0x0308, 0x0342}); - CASE_FOLD.put(0x1FE8, new int[]{0x1FE0}); - CASE_FOLD.put(0x1FE9, new int[]{0x1FE1}); - CASE_FOLD.put(0x1FEA, new int[]{0x1F7A}); - CASE_FOLD.put(0x1FEB, new int[]{0x1F7B}); - CASE_FOLD.put(0x1FEC, new int[]{0x1FE5}); - CASE_FOLD.put(0x1FF2, new int[]{0x1F7C, 0x03B9}); - CASE_FOLD.put(0x1FF3, new int[]{0x03C9, 0x03B9}); - CASE_FOLD.put(0x1FF4, new int[]{0x03CE, 0x03B9}); - CASE_FOLD.put(0x1FF6, new int[]{0x03C9, 0x0342}); - CASE_FOLD.put(0x1FF7, new int[]{0x03C9, 0x0342, 0x03B9}); - CASE_FOLD.put(0x1FF8, new int[]{0x1F78}); - CASE_FOLD.put(0x1FF9, new int[]{0x1F79}); - CASE_FOLD.put(0x1FFA, new int[]{0x1F7C}); - CASE_FOLD.put(0x1FFB, new int[]{0x1F7D}); - CASE_FOLD.put(0x1FFC, new int[]{0x03C9, 0x03B9}); - CASE_FOLD.put(0x2126, new int[]{0x03C9}); - CASE_FOLD.put(0x212A, new int[]{0x006B}); - CASE_FOLD.put(0x212B, new int[]{0x00E5}); - CASE_FOLD.put(0x2132, new int[]{0x214E}); - CASE_FOLD.put(0x2160, new int[]{0x2170}); - CASE_FOLD.put(0x2161, new int[]{0x2171}); - CASE_FOLD.put(0x2162, new int[]{0x2172}); - CASE_FOLD.put(0x2163, new int[]{0x2173}); - CASE_FOLD.put(0x2164, new int[]{0x2174}); - CASE_FOLD.put(0x2165, new int[]{0x2175}); - CASE_FOLD.put(0x2166, new int[]{0x2176}); - CASE_FOLD.put(0x2167, new int[]{0x2177}); - CASE_FOLD.put(0x2168, new int[]{0x2178}); - CASE_FOLD.put(0x2169, new int[]{0x2179}); - CASE_FOLD.put(0x216A, new int[]{0x217A}); - CASE_FOLD.put(0x216B, new int[]{0x217B}); - CASE_FOLD.put(0x216C, new int[]{0x217C}); - CASE_FOLD.put(0x216D, new int[]{0x217D}); - CASE_FOLD.put(0x216E, new int[]{0x217E}); - CASE_FOLD.put(0x216F, new int[]{0x217F}); - CASE_FOLD.put(0x2183, new int[]{0x2184}); - CASE_FOLD.put(0x24B6, new int[]{0x24D0}); - CASE_FOLD.put(0x24B7, new int[]{0x24D1}); - CASE_FOLD.put(0x24B8, new int[]{0x24D2}); - CASE_FOLD.put(0x24B9, new int[]{0x24D3}); - CASE_FOLD.put(0x24BA, new int[]{0x24D4}); - CASE_FOLD.put(0x24BB, new int[]{0x24D5}); - CASE_FOLD.put(0x24BC, new int[]{0x24D6}); - CASE_FOLD.put(0x24BD, new int[]{0x24D7}); - CASE_FOLD.put(0x24BE, new int[]{0x24D8}); - CASE_FOLD.put(0x24BF, new int[]{0x24D9}); - CASE_FOLD.put(0x24C0, new int[]{0x24DA}); - CASE_FOLD.put(0x24C1, new int[]{0x24DB}); - CASE_FOLD.put(0x24C2, new int[]{0x24DC}); - CASE_FOLD.put(0x24C3, new int[]{0x24DD}); - CASE_FOLD.put(0x24C4, new int[]{0x24DE}); - CASE_FOLD.put(0x24C5, new int[]{0x24DF}); - CASE_FOLD.put(0x24C6, new int[]{0x24E0}); - CASE_FOLD.put(0x24C7, new int[]{0x24E1}); - CASE_FOLD.put(0x24C8, new int[]{0x24E2}); - CASE_FOLD.put(0x24C9, new int[]{0x24E3}); - CASE_FOLD.put(0x24CA, new int[]{0x24E4}); - CASE_FOLD.put(0x24CB, new int[]{0x24E5}); - CASE_FOLD.put(0x24CC, new int[]{0x24E6}); - CASE_FOLD.put(0x24CD, new int[]{0x24E7}); - CASE_FOLD.put(0x24CE, new int[]{0x24E8}); - CASE_FOLD.put(0x24CF, new int[]{0x24E9}); - CASE_FOLD.put(0x2C00, new int[]{0x2C30}); - CASE_FOLD.put(0x2C01, new int[]{0x2C31}); - CASE_FOLD.put(0x2C02, new int[]{0x2C32}); - CASE_FOLD.put(0x2C03, new int[]{0x2C33}); - CASE_FOLD.put(0x2C04, new int[]{0x2C34}); - CASE_FOLD.put(0x2C05, new int[]{0x2C35}); - CASE_FOLD.put(0x2C06, new int[]{0x2C36}); - CASE_FOLD.put(0x2C07, new int[]{0x2C37}); - CASE_FOLD.put(0x2C08, new int[]{0x2C38}); - CASE_FOLD.put(0x2C09, new int[]{0x2C39}); - CASE_FOLD.put(0x2C0A, new int[]{0x2C3A}); - CASE_FOLD.put(0x2C0B, new int[]{0x2C3B}); - CASE_FOLD.put(0x2C0C, new int[]{0x2C3C}); - CASE_FOLD.put(0x2C0D, new int[]{0x2C3D}); - CASE_FOLD.put(0x2C0E, new int[]{0x2C3E}); - CASE_FOLD.put(0x2C0F, new int[]{0x2C3F}); - CASE_FOLD.put(0x2C10, new int[]{0x2C40}); - CASE_FOLD.put(0x2C11, new int[]{0x2C41}); - CASE_FOLD.put(0x2C12, new int[]{0x2C42}); - CASE_FOLD.put(0x2C13, new int[]{0x2C43}); - CASE_FOLD.put(0x2C14, new int[]{0x2C44}); - CASE_FOLD.put(0x2C15, new int[]{0x2C45}); - CASE_FOLD.put(0x2C16, new int[]{0x2C46}); - CASE_FOLD.put(0x2C17, new int[]{0x2C47}); - CASE_FOLD.put(0x2C18, new int[]{0x2C48}); - CASE_FOLD.put(0x2C19, new int[]{0x2C49}); - CASE_FOLD.put(0x2C1A, new int[]{0x2C4A}); - CASE_FOLD.put(0x2C1B, new int[]{0x2C4B}); - CASE_FOLD.put(0x2C1C, new int[]{0x2C4C}); - CASE_FOLD.put(0x2C1D, new int[]{0x2C4D}); - CASE_FOLD.put(0x2C1E, new int[]{0x2C4E}); - CASE_FOLD.put(0x2C1F, new int[]{0x2C4F}); - CASE_FOLD.put(0x2C20, new int[]{0x2C50}); - CASE_FOLD.put(0x2C21, new int[]{0x2C51}); - CASE_FOLD.put(0x2C22, new int[]{0x2C52}); - CASE_FOLD.put(0x2C23, new int[]{0x2C53}); - CASE_FOLD.put(0x2C24, new int[]{0x2C54}); - CASE_FOLD.put(0x2C25, new int[]{0x2C55}); - CASE_FOLD.put(0x2C26, new int[]{0x2C56}); - CASE_FOLD.put(0x2C27, new int[]{0x2C57}); - CASE_FOLD.put(0x2C28, new int[]{0x2C58}); - CASE_FOLD.put(0x2C29, new int[]{0x2C59}); - CASE_FOLD.put(0x2C2A, new int[]{0x2C5A}); - CASE_FOLD.put(0x2C2B, new int[]{0x2C5B}); - CASE_FOLD.put(0x2C2C, new int[]{0x2C5C}); - CASE_FOLD.put(0x2C2D, new int[]{0x2C5D}); - CASE_FOLD.put(0x2C2E, new int[]{0x2C5E}); - CASE_FOLD.put(0x2C2F, new int[]{0x2C5F}); - CASE_FOLD.put(0x2C60, new int[]{0x2C61}); - CASE_FOLD.put(0x2C62, new int[]{0x026B}); - CASE_FOLD.put(0x2C63, new int[]{0x1D7D}); - CASE_FOLD.put(0x2C64, new int[]{0x027D}); - CASE_FOLD.put(0x2C67, new int[]{0x2C68}); - CASE_FOLD.put(0x2C69, new int[]{0x2C6A}); - CASE_FOLD.put(0x2C6B, new int[]{0x2C6C}); - CASE_FOLD.put(0x2C6D, new int[]{0x0251}); - CASE_FOLD.put(0x2C6E, new int[]{0x0271}); - CASE_FOLD.put(0x2C6F, new int[]{0x0250}); - CASE_FOLD.put(0x2C70, new int[]{0x0252}); - CASE_FOLD.put(0x2C72, new int[]{0x2C73}); - CASE_FOLD.put(0x2C75, new int[]{0x2C76}); - CASE_FOLD.put(0x2C7E, new int[]{0x023F}); - CASE_FOLD.put(0x2C7F, new int[]{0x0240}); - CASE_FOLD.put(0x2C80, new int[]{0x2C81}); - CASE_FOLD.put(0x2C82, new int[]{0x2C83}); - CASE_FOLD.put(0x2C84, new int[]{0x2C85}); - CASE_FOLD.put(0x2C86, new int[]{0x2C87}); - CASE_FOLD.put(0x2C88, new int[]{0x2C89}); - CASE_FOLD.put(0x2C8A, new int[]{0x2C8B}); - CASE_FOLD.put(0x2C8C, new int[]{0x2C8D}); - CASE_FOLD.put(0x2C8E, new int[]{0x2C8F}); - CASE_FOLD.put(0x2C90, new int[]{0x2C91}); - CASE_FOLD.put(0x2C92, new int[]{0x2C93}); - CASE_FOLD.put(0x2C94, new int[]{0x2C95}); - CASE_FOLD.put(0x2C96, new int[]{0x2C97}); - CASE_FOLD.put(0x2C98, new int[]{0x2C99}); - CASE_FOLD.put(0x2C9A, new int[]{0x2C9B}); - CASE_FOLD.put(0x2C9C, new int[]{0x2C9D}); - CASE_FOLD.put(0x2C9E, new int[]{0x2C9F}); - CASE_FOLD.put(0x2CA0, new int[]{0x2CA1}); - CASE_FOLD.put(0x2CA2, new int[]{0x2CA3}); - CASE_FOLD.put(0x2CA4, new int[]{0x2CA5}); - CASE_FOLD.put(0x2CA6, new int[]{0x2CA7}); - CASE_FOLD.put(0x2CA8, new int[]{0x2CA9}); - CASE_FOLD.put(0x2CAA, new int[]{0x2CAB}); - CASE_FOLD.put(0x2CAC, new int[]{0x2CAD}); - CASE_FOLD.put(0x2CAE, new int[]{0x2CAF}); - CASE_FOLD.put(0x2CB0, new int[]{0x2CB1}); - CASE_FOLD.put(0x2CB2, new int[]{0x2CB3}); - CASE_FOLD.put(0x2CB4, new int[]{0x2CB5}); - CASE_FOLD.put(0x2CB6, new int[]{0x2CB7}); - CASE_FOLD.put(0x2CB8, new int[]{0x2CB9}); - CASE_FOLD.put(0x2CBA, new int[]{0x2CBB}); - CASE_FOLD.put(0x2CBC, new int[]{0x2CBD}); - CASE_FOLD.put(0x2CBE, new int[]{0x2CBF}); - CASE_FOLD.put(0x2CC0, new int[]{0x2CC1}); - CASE_FOLD.put(0x2CC2, new int[]{0x2CC3}); - CASE_FOLD.put(0x2CC4, new int[]{0x2CC5}); - CASE_FOLD.put(0x2CC6, new int[]{0x2CC7}); - CASE_FOLD.put(0x2CC8, new int[]{0x2CC9}); - CASE_FOLD.put(0x2CCA, new int[]{0x2CCB}); - CASE_FOLD.put(0x2CCC, new int[]{0x2CCD}); - CASE_FOLD.put(0x2CCE, new int[]{0x2CCF}); - CASE_FOLD.put(0x2CD0, new int[]{0x2CD1}); - CASE_FOLD.put(0x2CD2, new int[]{0x2CD3}); - CASE_FOLD.put(0x2CD4, new int[]{0x2CD5}); - CASE_FOLD.put(0x2CD6, new int[]{0x2CD7}); - CASE_FOLD.put(0x2CD8, new int[]{0x2CD9}); - CASE_FOLD.put(0x2CDA, new int[]{0x2CDB}); - CASE_FOLD.put(0x2CDC, new int[]{0x2CDD}); - CASE_FOLD.put(0x2CDE, new int[]{0x2CDF}); - CASE_FOLD.put(0x2CE0, new int[]{0x2CE1}); - CASE_FOLD.put(0x2CE2, new int[]{0x2CE3}); - CASE_FOLD.put(0x2CEB, new int[]{0x2CEC}); - CASE_FOLD.put(0x2CED, new int[]{0x2CEE}); - CASE_FOLD.put(0x2CF2, new int[]{0x2CF3}); - CASE_FOLD.put(0xA640, new int[]{0xA641}); - CASE_FOLD.put(0xA642, new int[]{0xA643}); - CASE_FOLD.put(0xA644, new int[]{0xA645}); - CASE_FOLD.put(0xA646, new int[]{0xA647}); - CASE_FOLD.put(0xA648, new int[]{0xA649}); - CASE_FOLD.put(0xA64A, new int[]{0xA64B}); - CASE_FOLD.put(0xA64C, new int[]{0xA64D}); - CASE_FOLD.put(0xA64E, new int[]{0xA64F}); - CASE_FOLD.put(0xA650, new int[]{0xA651}); - CASE_FOLD.put(0xA652, new int[]{0xA653}); - CASE_FOLD.put(0xA654, new int[]{0xA655}); - CASE_FOLD.put(0xA656, new int[]{0xA657}); - CASE_FOLD.put(0xA658, new int[]{0xA659}); - CASE_FOLD.put(0xA65A, new int[]{0xA65B}); - CASE_FOLD.put(0xA65C, new int[]{0xA65D}); - CASE_FOLD.put(0xA65E, new int[]{0xA65F}); - CASE_FOLD.put(0xA660, new int[]{0xA661}); - CASE_FOLD.put(0xA662, new int[]{0xA663}); - CASE_FOLD.put(0xA664, new int[]{0xA665}); - CASE_FOLD.put(0xA666, new int[]{0xA667}); - CASE_FOLD.put(0xA668, new int[]{0xA669}); - CASE_FOLD.put(0xA66A, new int[]{0xA66B}); - CASE_FOLD.put(0xA66C, new int[]{0xA66D}); - CASE_FOLD.put(0xA680, new int[]{0xA681}); - CASE_FOLD.put(0xA682, new int[]{0xA683}); - CASE_FOLD.put(0xA684, new int[]{0xA685}); - CASE_FOLD.put(0xA686, new int[]{0xA687}); - CASE_FOLD.put(0xA688, new int[]{0xA689}); - CASE_FOLD.put(0xA68A, new int[]{0xA68B}); - CASE_FOLD.put(0xA68C, new int[]{0xA68D}); - CASE_FOLD.put(0xA68E, new int[]{0xA68F}); - CASE_FOLD.put(0xA690, new int[]{0xA691}); - CASE_FOLD.put(0xA692, new int[]{0xA693}); - CASE_FOLD.put(0xA694, new int[]{0xA695}); - CASE_FOLD.put(0xA696, new int[]{0xA697}); - CASE_FOLD.put(0xA698, new int[]{0xA699}); - CASE_FOLD.put(0xA69A, new int[]{0xA69B}); - CASE_FOLD.put(0xA722, new int[]{0xA723}); - CASE_FOLD.put(0xA724, new int[]{0xA725}); - CASE_FOLD.put(0xA726, new int[]{0xA727}); - CASE_FOLD.put(0xA728, new int[]{0xA729}); - CASE_FOLD.put(0xA72A, new int[]{0xA72B}); - CASE_FOLD.put(0xA72C, new int[]{0xA72D}); - CASE_FOLD.put(0xA72E, new int[]{0xA72F}); - CASE_FOLD.put(0xA732, new int[]{0xA733}); - CASE_FOLD.put(0xA734, new int[]{0xA735}); - CASE_FOLD.put(0xA736, new int[]{0xA737}); - CASE_FOLD.put(0xA738, new int[]{0xA739}); - CASE_FOLD.put(0xA73A, new int[]{0xA73B}); - CASE_FOLD.put(0xA73C, new int[]{0xA73D}); - CASE_FOLD.put(0xA73E, new int[]{0xA73F}); - CASE_FOLD.put(0xA740, new int[]{0xA741}); - CASE_FOLD.put(0xA742, new int[]{0xA743}); - CASE_FOLD.put(0xA744, new int[]{0xA745}); - CASE_FOLD.put(0xA746, new int[]{0xA747}); - CASE_FOLD.put(0xA748, new int[]{0xA749}); - CASE_FOLD.put(0xA74A, new int[]{0xA74B}); - CASE_FOLD.put(0xA74C, new int[]{0xA74D}); - CASE_FOLD.put(0xA74E, new int[]{0xA74F}); - CASE_FOLD.put(0xA750, new int[]{0xA751}); - CASE_FOLD.put(0xA752, new int[]{0xA753}); - CASE_FOLD.put(0xA754, new int[]{0xA755}); - CASE_FOLD.put(0xA756, new int[]{0xA757}); - CASE_FOLD.put(0xA758, new int[]{0xA759}); - CASE_FOLD.put(0xA75A, new int[]{0xA75B}); - CASE_FOLD.put(0xA75C, new int[]{0xA75D}); - CASE_FOLD.put(0xA75E, new int[]{0xA75F}); - CASE_FOLD.put(0xA760, new int[]{0xA761}); - CASE_FOLD.put(0xA762, new int[]{0xA763}); - CASE_FOLD.put(0xA764, new int[]{0xA765}); - CASE_FOLD.put(0xA766, new int[]{0xA767}); - CASE_FOLD.put(0xA768, new int[]{0xA769}); - CASE_FOLD.put(0xA76A, new int[]{0xA76B}); - CASE_FOLD.put(0xA76C, new int[]{0xA76D}); - CASE_FOLD.put(0xA76E, new int[]{0xA76F}); - CASE_FOLD.put(0xA779, new int[]{0xA77A}); - CASE_FOLD.put(0xA77B, new int[]{0xA77C}); - CASE_FOLD.put(0xA77D, new int[]{0x1D79}); - CASE_FOLD.put(0xA77E, new int[]{0xA77F}); - CASE_FOLD.put(0xA780, new int[]{0xA781}); - CASE_FOLD.put(0xA782, new int[]{0xA783}); - CASE_FOLD.put(0xA784, new int[]{0xA785}); - CASE_FOLD.put(0xA786, new int[]{0xA787}); - CASE_FOLD.put(0xA78B, new int[]{0xA78C}); - CASE_FOLD.put(0xA78D, new int[]{0x0265}); - CASE_FOLD.put(0xA790, new int[]{0xA791}); - CASE_FOLD.put(0xA792, new int[]{0xA793}); - CASE_FOLD.put(0xA796, new int[]{0xA797}); - CASE_FOLD.put(0xA798, new int[]{0xA799}); - CASE_FOLD.put(0xA79A, new int[]{0xA79B}); - CASE_FOLD.put(0xA79C, new int[]{0xA79D}); - CASE_FOLD.put(0xA79E, new int[]{0xA79F}); - CASE_FOLD.put(0xA7A0, new int[]{0xA7A1}); - CASE_FOLD.put(0xA7A2, new int[]{0xA7A3}); - CASE_FOLD.put(0xA7A4, new int[]{0xA7A5}); - CASE_FOLD.put(0xA7A6, new int[]{0xA7A7}); - CASE_FOLD.put(0xA7A8, new int[]{0xA7A9}); - CASE_FOLD.put(0xA7AA, new int[]{0x0266}); - CASE_FOLD.put(0xA7AB, new int[]{0x025C}); - CASE_FOLD.put(0xA7AC, new int[]{0x0261}); - CASE_FOLD.put(0xA7AD, new int[]{0x026C}); - CASE_FOLD.put(0xA7AE, new int[]{0x026A}); - CASE_FOLD.put(0xA7B0, new int[]{0x029E}); - CASE_FOLD.put(0xA7B1, new int[]{0x0287}); - CASE_FOLD.put(0xA7B2, new int[]{0x029D}); - CASE_FOLD.put(0xA7B3, new int[]{0xAB53}); - CASE_FOLD.put(0xA7B4, new int[]{0xA7B5}); - CASE_FOLD.put(0xA7B6, new int[]{0xA7B7}); - CASE_FOLD.put(0xA7B8, new int[]{0xA7B9}); - CASE_FOLD.put(0xA7BA, new int[]{0xA7BB}); - CASE_FOLD.put(0xA7BC, new int[]{0xA7BD}); - CASE_FOLD.put(0xA7BE, new int[]{0xA7BF}); - CASE_FOLD.put(0xA7C0, new int[]{0xA7C1}); - CASE_FOLD.put(0xA7C2, new int[]{0xA7C3}); - CASE_FOLD.put(0xA7C4, new int[]{0xA794}); - CASE_FOLD.put(0xA7C5, new int[]{0x0282}); - CASE_FOLD.put(0xA7C6, new int[]{0x1D8E}); - CASE_FOLD.put(0xA7C7, new int[]{0xA7C8}); - CASE_FOLD.put(0xA7C9, new int[]{0xA7CA}); - CASE_FOLD.put(0xA7D0, new int[]{0xA7D1}); - CASE_FOLD.put(0xA7D6, new int[]{0xA7D7}); - CASE_FOLD.put(0xA7D8, new int[]{0xA7D9}); - CASE_FOLD.put(0xA7F5, new int[]{0xA7F6}); - CASE_FOLD.put(0xAB70, new int[]{0x13A0}); - CASE_FOLD.put(0xAB71, new int[]{0x13A1}); - CASE_FOLD.put(0xAB72, new int[]{0x13A2}); - CASE_FOLD.put(0xAB73, new int[]{0x13A3}); - CASE_FOLD.put(0xAB74, new int[]{0x13A4}); - CASE_FOLD.put(0xAB75, new int[]{0x13A5}); - CASE_FOLD.put(0xAB76, new int[]{0x13A6}); - CASE_FOLD.put(0xAB77, new int[]{0x13A7}); - CASE_FOLD.put(0xAB78, new int[]{0x13A8}); - CASE_FOLD.put(0xAB79, new int[]{0x13A9}); - CASE_FOLD.put(0xAB7A, new int[]{0x13AA}); - CASE_FOLD.put(0xAB7B, new int[]{0x13AB}); - CASE_FOLD.put(0xAB7C, new int[]{0x13AC}); - CASE_FOLD.put(0xAB7D, new int[]{0x13AD}); - CASE_FOLD.put(0xAB7E, new int[]{0x13AE}); - CASE_FOLD.put(0xAB7F, new int[]{0x13AF}); - CASE_FOLD.put(0xAB80, new int[]{0x13B0}); - CASE_FOLD.put(0xAB81, new int[]{0x13B1}); - CASE_FOLD.put(0xAB82, new int[]{0x13B2}); - CASE_FOLD.put(0xAB83, new int[]{0x13B3}); - CASE_FOLD.put(0xAB84, new int[]{0x13B4}); - CASE_FOLD.put(0xAB85, new int[]{0x13B5}); - CASE_FOLD.put(0xAB86, new int[]{0x13B6}); - CASE_FOLD.put(0xAB87, new int[]{0x13B7}); - CASE_FOLD.put(0xAB88, new int[]{0x13B8}); - CASE_FOLD.put(0xAB89, new int[]{0x13B9}); - CASE_FOLD.put(0xAB8A, new int[]{0x13BA}); - CASE_FOLD.put(0xAB8B, new int[]{0x13BB}); - CASE_FOLD.put(0xAB8C, new int[]{0x13BC}); - CASE_FOLD.put(0xAB8D, new int[]{0x13BD}); - CASE_FOLD.put(0xAB8E, new int[]{0x13BE}); - CASE_FOLD.put(0xAB8F, new int[]{0x13BF}); - CASE_FOLD.put(0xAB90, new int[]{0x13C0}); - CASE_FOLD.put(0xAB91, new int[]{0x13C1}); - CASE_FOLD.put(0xAB92, new int[]{0x13C2}); - CASE_FOLD.put(0xAB93, new int[]{0x13C3}); - CASE_FOLD.put(0xAB94, new int[]{0x13C4}); - CASE_FOLD.put(0xAB95, new int[]{0x13C5}); - CASE_FOLD.put(0xAB96, new int[]{0x13C6}); - CASE_FOLD.put(0xAB97, new int[]{0x13C7}); - CASE_FOLD.put(0xAB98, new int[]{0x13C8}); - CASE_FOLD.put(0xAB99, new int[]{0x13C9}); - CASE_FOLD.put(0xAB9A, new int[]{0x13CA}); - CASE_FOLD.put(0xAB9B, new int[]{0x13CB}); - CASE_FOLD.put(0xAB9C, new int[]{0x13CC}); - CASE_FOLD.put(0xAB9D, new int[]{0x13CD}); - CASE_FOLD.put(0xAB9E, new int[]{0x13CE}); - CASE_FOLD.put(0xAB9F, new int[]{0x13CF}); - CASE_FOLD.put(0xABA0, new int[]{0x13D0}); - CASE_FOLD.put(0xABA1, new int[]{0x13D1}); - CASE_FOLD.put(0xABA2, new int[]{0x13D2}); - CASE_FOLD.put(0xABA3, new int[]{0x13D3}); - CASE_FOLD.put(0xABA4, new int[]{0x13D4}); - CASE_FOLD.put(0xABA5, new int[]{0x13D5}); - CASE_FOLD.put(0xABA6, new int[]{0x13D6}); - CASE_FOLD.put(0xABA7, new int[]{0x13D7}); - CASE_FOLD.put(0xABA8, new int[]{0x13D8}); - CASE_FOLD.put(0xABA9, new int[]{0x13D9}); - CASE_FOLD.put(0xABAA, new int[]{0x13DA}); - CASE_FOLD.put(0xABAB, new int[]{0x13DB}); - CASE_FOLD.put(0xABAC, new int[]{0x13DC}); - CASE_FOLD.put(0xABAD, new int[]{0x13DD}); - CASE_FOLD.put(0xABAE, new int[]{0x13DE}); - CASE_FOLD.put(0xABAF, new int[]{0x13DF}); - CASE_FOLD.put(0xABB0, new int[]{0x13E0}); - CASE_FOLD.put(0xABB1, new int[]{0x13E1}); - CASE_FOLD.put(0xABB2, new int[]{0x13E2}); - CASE_FOLD.put(0xABB3, new int[]{0x13E3}); - CASE_FOLD.put(0xABB4, new int[]{0x13E4}); - CASE_FOLD.put(0xABB5, new int[]{0x13E5}); - CASE_FOLD.put(0xABB6, new int[]{0x13E6}); - CASE_FOLD.put(0xABB7, new int[]{0x13E7}); - CASE_FOLD.put(0xABB8, new int[]{0x13E8}); - CASE_FOLD.put(0xABB9, new int[]{0x13E9}); - CASE_FOLD.put(0xABBA, new int[]{0x13EA}); - CASE_FOLD.put(0xABBB, new int[]{0x13EB}); - CASE_FOLD.put(0xABBC, new int[]{0x13EC}); - CASE_FOLD.put(0xABBD, new int[]{0x13ED}); - CASE_FOLD.put(0xABBE, new int[]{0x13EE}); - CASE_FOLD.put(0xABBF, new int[]{0x13EF}); - CASE_FOLD.put(0xFB00, new int[]{0x0066, 0x0066}); - CASE_FOLD.put(0xFB01, new int[]{0x0066, 0x0069}); - CASE_FOLD.put(0xFB02, new int[]{0x0066, 0x006C}); - CASE_FOLD.put(0xFB03, new int[]{0x0066, 0x0066, 0x0069}); - CASE_FOLD.put(0xFB04, new int[]{0x0066, 0x0066, 0x006C}); - CASE_FOLD.put(0xFB05, new int[]{0x0073, 0x0074}); - CASE_FOLD.put(0xFB06, new int[]{0x0073, 0x0074}); - CASE_FOLD.put(0xFB13, new int[]{0x0574, 0x0576}); - CASE_FOLD.put(0xFB14, new int[]{0x0574, 0x0565}); - CASE_FOLD.put(0xFB15, new int[]{0x0574, 0x056B}); - CASE_FOLD.put(0xFB16, new int[]{0x057E, 0x0576}); - CASE_FOLD.put(0xFB17, new int[]{0x0574, 0x056D}); - CASE_FOLD.put(0xFF21, new int[]{0xFF41}); - CASE_FOLD.put(0xFF22, new int[]{0xFF42}); - CASE_FOLD.put(0xFF23, new int[]{0xFF43}); - CASE_FOLD.put(0xFF24, new int[]{0xFF44}); - CASE_FOLD.put(0xFF25, new int[]{0xFF45}); - CASE_FOLD.put(0xFF26, new int[]{0xFF46}); - CASE_FOLD.put(0xFF27, new int[]{0xFF47}); - CASE_FOLD.put(0xFF28, new int[]{0xFF48}); - CASE_FOLD.put(0xFF29, new int[]{0xFF49}); - CASE_FOLD.put(0xFF2A, new int[]{0xFF4A}); - CASE_FOLD.put(0xFF2B, new int[]{0xFF4B}); - CASE_FOLD.put(0xFF2C, new int[]{0xFF4C}); - CASE_FOLD.put(0xFF2D, new int[]{0xFF4D}); - CASE_FOLD.put(0xFF2E, new int[]{0xFF4E}); - CASE_FOLD.put(0xFF2F, new int[]{0xFF4F}); - CASE_FOLD.put(0xFF30, new int[]{0xFF50}); - CASE_FOLD.put(0xFF31, new int[]{0xFF51}); - CASE_FOLD.put(0xFF32, new int[]{0xFF52}); - CASE_FOLD.put(0xFF33, new int[]{0xFF53}); - CASE_FOLD.put(0xFF34, new int[]{0xFF54}); - CASE_FOLD.put(0xFF35, new int[]{0xFF55}); - CASE_FOLD.put(0xFF36, new int[]{0xFF56}); - CASE_FOLD.put(0xFF37, new int[]{0xFF57}); - CASE_FOLD.put(0xFF38, new int[]{0xFF58}); - CASE_FOLD.put(0xFF39, new int[]{0xFF59}); - CASE_FOLD.put(0xFF3A, new int[]{0xFF5A}); - CASE_FOLD.put(0x10400, new int[]{0x10428}); - CASE_FOLD.put(0x10401, new int[]{0x10429}); - CASE_FOLD.put(0x10402, new int[]{0x1042A}); - CASE_FOLD.put(0x10403, new int[]{0x1042B}); - CASE_FOLD.put(0x10404, new int[]{0x1042C}); - CASE_FOLD.put(0x10405, new int[]{0x1042D}); - CASE_FOLD.put(0x10406, new int[]{0x1042E}); - CASE_FOLD.put(0x10407, new int[]{0x1042F}); - CASE_FOLD.put(0x10408, new int[]{0x10430}); - CASE_FOLD.put(0x10409, new int[]{0x10431}); - CASE_FOLD.put(0x1040A, new int[]{0x10432}); - CASE_FOLD.put(0x1040B, new int[]{0x10433}); - CASE_FOLD.put(0x1040C, new int[]{0x10434}); - CASE_FOLD.put(0x1040D, new int[]{0x10435}); - CASE_FOLD.put(0x1040E, new int[]{0x10436}); - CASE_FOLD.put(0x1040F, new int[]{0x10437}); - CASE_FOLD.put(0x10410, new int[]{0x10438}); - CASE_FOLD.put(0x10411, new int[]{0x10439}); - CASE_FOLD.put(0x10412, new int[]{0x1043A}); - CASE_FOLD.put(0x10413, new int[]{0x1043B}); - CASE_FOLD.put(0x10414, new int[]{0x1043C}); - CASE_FOLD.put(0x10415, new int[]{0x1043D}); - CASE_FOLD.put(0x10416, new int[]{0x1043E}); - CASE_FOLD.put(0x10417, new int[]{0x1043F}); - CASE_FOLD.put(0x10418, new int[]{0x10440}); - CASE_FOLD.put(0x10419, new int[]{0x10441}); - CASE_FOLD.put(0x1041A, new int[]{0x10442}); - CASE_FOLD.put(0x1041B, new int[]{0x10443}); - CASE_FOLD.put(0x1041C, new int[]{0x10444}); - CASE_FOLD.put(0x1041D, new int[]{0x10445}); - CASE_FOLD.put(0x1041E, new int[]{0x10446}); - CASE_FOLD.put(0x1041F, new int[]{0x10447}); - CASE_FOLD.put(0x10420, new int[]{0x10448}); - CASE_FOLD.put(0x10421, new int[]{0x10449}); - CASE_FOLD.put(0x10422, new int[]{0x1044A}); - CASE_FOLD.put(0x10423, new int[]{0x1044B}); - CASE_FOLD.put(0x10424, new int[]{0x1044C}); - CASE_FOLD.put(0x10425, new int[]{0x1044D}); - CASE_FOLD.put(0x10426, new int[]{0x1044E}); - CASE_FOLD.put(0x10427, new int[]{0x1044F}); - CASE_FOLD.put(0x104B0, new int[]{0x104D8}); - CASE_FOLD.put(0x104B1, new int[]{0x104D9}); - CASE_FOLD.put(0x104B2, new int[]{0x104DA}); - CASE_FOLD.put(0x104B3, new int[]{0x104DB}); - CASE_FOLD.put(0x104B4, new int[]{0x104DC}); - CASE_FOLD.put(0x104B5, new int[]{0x104DD}); - CASE_FOLD.put(0x104B6, new int[]{0x104DE}); - CASE_FOLD.put(0x104B7, new int[]{0x104DF}); - CASE_FOLD.put(0x104B8, new int[]{0x104E0}); - CASE_FOLD.put(0x104B9, new int[]{0x104E1}); - CASE_FOLD.put(0x104BA, new int[]{0x104E2}); - CASE_FOLD.put(0x104BB, new int[]{0x104E3}); - CASE_FOLD.put(0x104BC, new int[]{0x104E4}); - CASE_FOLD.put(0x104BD, new int[]{0x104E5}); - CASE_FOLD.put(0x104BE, new int[]{0x104E6}); - CASE_FOLD.put(0x104BF, new int[]{0x104E7}); - CASE_FOLD.put(0x104C0, new int[]{0x104E8}); - CASE_FOLD.put(0x104C1, new int[]{0x104E9}); - CASE_FOLD.put(0x104C2, new int[]{0x104EA}); - CASE_FOLD.put(0x104C3, new int[]{0x104EB}); - CASE_FOLD.put(0x104C4, new int[]{0x104EC}); - CASE_FOLD.put(0x104C5, new int[]{0x104ED}); - CASE_FOLD.put(0x104C6, new int[]{0x104EE}); - CASE_FOLD.put(0x104C7, new int[]{0x104EF}); - CASE_FOLD.put(0x104C8, new int[]{0x104F0}); - CASE_FOLD.put(0x104C9, new int[]{0x104F1}); - CASE_FOLD.put(0x104CA, new int[]{0x104F2}); - CASE_FOLD.put(0x104CB, new int[]{0x104F3}); - CASE_FOLD.put(0x104CC, new int[]{0x104F4}); - CASE_FOLD.put(0x104CD, new int[]{0x104F5}); - CASE_FOLD.put(0x104CE, new int[]{0x104F6}); - CASE_FOLD.put(0x104CF, new int[]{0x104F7}); - CASE_FOLD.put(0x104D0, new int[]{0x104F8}); - CASE_FOLD.put(0x104D1, new int[]{0x104F9}); - CASE_FOLD.put(0x104D2, new int[]{0x104FA}); - CASE_FOLD.put(0x104D3, new int[]{0x104FB}); - CASE_FOLD.put(0x10570, new int[]{0x10597}); - CASE_FOLD.put(0x10571, new int[]{0x10598}); - CASE_FOLD.put(0x10572, new int[]{0x10599}); - CASE_FOLD.put(0x10573, new int[]{0x1059A}); - CASE_FOLD.put(0x10574, new int[]{0x1059B}); - CASE_FOLD.put(0x10575, new int[]{0x1059C}); - CASE_FOLD.put(0x10576, new int[]{0x1059D}); - CASE_FOLD.put(0x10577, new int[]{0x1059E}); - CASE_FOLD.put(0x10578, new int[]{0x1059F}); - CASE_FOLD.put(0x10579, new int[]{0x105A0}); - CASE_FOLD.put(0x1057A, new int[]{0x105A1}); - CASE_FOLD.put(0x1057C, new int[]{0x105A3}); - CASE_FOLD.put(0x1057D, new int[]{0x105A4}); - CASE_FOLD.put(0x1057E, new int[]{0x105A5}); - CASE_FOLD.put(0x1057F, new int[]{0x105A6}); - CASE_FOLD.put(0x10580, new int[]{0x105A7}); - CASE_FOLD.put(0x10581, new int[]{0x105A8}); - CASE_FOLD.put(0x10582, new int[]{0x105A9}); - CASE_FOLD.put(0x10583, new int[]{0x105AA}); - CASE_FOLD.put(0x10584, new int[]{0x105AB}); - CASE_FOLD.put(0x10585, new int[]{0x105AC}); - CASE_FOLD.put(0x10586, new int[]{0x105AD}); - CASE_FOLD.put(0x10587, new int[]{0x105AE}); - CASE_FOLD.put(0x10588, new int[]{0x105AF}); - CASE_FOLD.put(0x10589, new int[]{0x105B0}); - CASE_FOLD.put(0x1058A, new int[]{0x105B1}); - CASE_FOLD.put(0x1058C, new int[]{0x105B3}); - CASE_FOLD.put(0x1058D, new int[]{0x105B4}); - CASE_FOLD.put(0x1058E, new int[]{0x105B5}); - CASE_FOLD.put(0x1058F, new int[]{0x105B6}); - CASE_FOLD.put(0x10590, new int[]{0x105B7}); - CASE_FOLD.put(0x10591, new int[]{0x105B8}); - CASE_FOLD.put(0x10592, new int[]{0x105B9}); - CASE_FOLD.put(0x10594, new int[]{0x105BB}); - CASE_FOLD.put(0x10595, new int[]{0x105BC}); - CASE_FOLD.put(0x10C80, new int[]{0x10CC0}); - CASE_FOLD.put(0x10C81, new int[]{0x10CC1}); - CASE_FOLD.put(0x10C82, new int[]{0x10CC2}); - CASE_FOLD.put(0x10C83, new int[]{0x10CC3}); - CASE_FOLD.put(0x10C84, new int[]{0x10CC4}); - CASE_FOLD.put(0x10C85, new int[]{0x10CC5}); - CASE_FOLD.put(0x10C86, new int[]{0x10CC6}); - CASE_FOLD.put(0x10C87, new int[]{0x10CC7}); - CASE_FOLD.put(0x10C88, new int[]{0x10CC8}); - CASE_FOLD.put(0x10C89, new int[]{0x10CC9}); - CASE_FOLD.put(0x10C8A, new int[]{0x10CCA}); - CASE_FOLD.put(0x10C8B, new int[]{0x10CCB}); - CASE_FOLD.put(0x10C8C, new int[]{0x10CCC}); - CASE_FOLD.put(0x10C8D, new int[]{0x10CCD}); - CASE_FOLD.put(0x10C8E, new int[]{0x10CCE}); - CASE_FOLD.put(0x10C8F, new int[]{0x10CCF}); - CASE_FOLD.put(0x10C90, new int[]{0x10CD0}); - CASE_FOLD.put(0x10C91, new int[]{0x10CD1}); - CASE_FOLD.put(0x10C92, new int[]{0x10CD2}); - CASE_FOLD.put(0x10C93, new int[]{0x10CD3}); - CASE_FOLD.put(0x10C94, new int[]{0x10CD4}); - CASE_FOLD.put(0x10C95, new int[]{0x10CD5}); - CASE_FOLD.put(0x10C96, new int[]{0x10CD6}); - CASE_FOLD.put(0x10C97, new int[]{0x10CD7}); - CASE_FOLD.put(0x10C98, new int[]{0x10CD8}); - CASE_FOLD.put(0x10C99, new int[]{0x10CD9}); - CASE_FOLD.put(0x10C9A, new int[]{0x10CDA}); - CASE_FOLD.put(0x10C9B, new int[]{0x10CDB}); - CASE_FOLD.put(0x10C9C, new int[]{0x10CDC}); - CASE_FOLD.put(0x10C9D, new int[]{0x10CDD}); - CASE_FOLD.put(0x10C9E, new int[]{0x10CDE}); - CASE_FOLD.put(0x10C9F, new int[]{0x10CDF}); - CASE_FOLD.put(0x10CA0, new int[]{0x10CE0}); - CASE_FOLD.put(0x10CA1, new int[]{0x10CE1}); - CASE_FOLD.put(0x10CA2, new int[]{0x10CE2}); - CASE_FOLD.put(0x10CA3, new int[]{0x10CE3}); - CASE_FOLD.put(0x10CA4, new int[]{0x10CE4}); - CASE_FOLD.put(0x10CA5, new int[]{0x10CE5}); - CASE_FOLD.put(0x10CA6, new int[]{0x10CE6}); - CASE_FOLD.put(0x10CA7, new int[]{0x10CE7}); - CASE_FOLD.put(0x10CA8, new int[]{0x10CE8}); - CASE_FOLD.put(0x10CA9, new int[]{0x10CE9}); - CASE_FOLD.put(0x10CAA, new int[]{0x10CEA}); - CASE_FOLD.put(0x10CAB, new int[]{0x10CEB}); - CASE_FOLD.put(0x10CAC, new int[]{0x10CEC}); - CASE_FOLD.put(0x10CAD, new int[]{0x10CED}); - CASE_FOLD.put(0x10CAE, new int[]{0x10CEE}); - CASE_FOLD.put(0x10CAF, new int[]{0x10CEF}); - CASE_FOLD.put(0x10CB0, new int[]{0x10CF0}); - CASE_FOLD.put(0x10CB1, new int[]{0x10CF1}); - CASE_FOLD.put(0x10CB2, new int[]{0x10CF2}); - CASE_FOLD.put(0x118A0, new int[]{0x118C0}); - CASE_FOLD.put(0x118A1, new int[]{0x118C1}); - CASE_FOLD.put(0x118A2, new int[]{0x118C2}); - CASE_FOLD.put(0x118A3, new int[]{0x118C3}); - CASE_FOLD.put(0x118A4, new int[]{0x118C4}); - CASE_FOLD.put(0x118A5, new int[]{0x118C5}); - CASE_FOLD.put(0x118A6, new int[]{0x118C6}); - CASE_FOLD.put(0x118A7, new int[]{0x118C7}); - CASE_FOLD.put(0x118A8, new int[]{0x118C8}); - CASE_FOLD.put(0x118A9, new int[]{0x118C9}); - CASE_FOLD.put(0x118AA, new int[]{0x118CA}); - CASE_FOLD.put(0x118AB, new int[]{0x118CB}); - CASE_FOLD.put(0x118AC, new int[]{0x118CC}); - CASE_FOLD.put(0x118AD, new int[]{0x118CD}); - CASE_FOLD.put(0x118AE, new int[]{0x118CE}); - CASE_FOLD.put(0x118AF, new int[]{0x118CF}); - CASE_FOLD.put(0x118B0, new int[]{0x118D0}); - CASE_FOLD.put(0x118B1, new int[]{0x118D1}); - CASE_FOLD.put(0x118B2, new int[]{0x118D2}); - CASE_FOLD.put(0x118B3, new int[]{0x118D3}); - CASE_FOLD.put(0x118B4, new int[]{0x118D4}); - CASE_FOLD.put(0x118B5, new int[]{0x118D5}); - CASE_FOLD.put(0x118B6, new int[]{0x118D6}); - CASE_FOLD.put(0x118B7, new int[]{0x118D7}); - CASE_FOLD.put(0x118B8, new int[]{0x118D8}); - CASE_FOLD.put(0x118B9, new int[]{0x118D9}); - CASE_FOLD.put(0x118BA, new int[]{0x118DA}); - CASE_FOLD.put(0x118BB, new int[]{0x118DB}); - CASE_FOLD.put(0x118BC, new int[]{0x118DC}); - CASE_FOLD.put(0x118BD, new int[]{0x118DD}); - CASE_FOLD.put(0x118BE, new int[]{0x118DE}); - CASE_FOLD.put(0x118BF, new int[]{0x118DF}); - CASE_FOLD.put(0x16E40, new int[]{0x16E60}); - CASE_FOLD.put(0x16E41, new int[]{0x16E61}); - CASE_FOLD.put(0x16E42, new int[]{0x16E62}); - CASE_FOLD.put(0x16E43, new int[]{0x16E63}); - CASE_FOLD.put(0x16E44, new int[]{0x16E64}); - CASE_FOLD.put(0x16E45, new int[]{0x16E65}); - CASE_FOLD.put(0x16E46, new int[]{0x16E66}); - CASE_FOLD.put(0x16E47, new int[]{0x16E67}); - CASE_FOLD.put(0x16E48, new int[]{0x16E68}); - CASE_FOLD.put(0x16E49, new int[]{0x16E69}); - CASE_FOLD.put(0x16E4A, new int[]{0x16E6A}); - CASE_FOLD.put(0x16E4B, new int[]{0x16E6B}); - CASE_FOLD.put(0x16E4C, new int[]{0x16E6C}); - CASE_FOLD.put(0x16E4D, new int[]{0x16E6D}); - CASE_FOLD.put(0x16E4E, new int[]{0x16E6E}); - CASE_FOLD.put(0x16E4F, new int[]{0x16E6F}); - CASE_FOLD.put(0x16E50, new int[]{0x16E70}); - CASE_FOLD.put(0x16E51, new int[]{0x16E71}); - CASE_FOLD.put(0x16E52, new int[]{0x16E72}); - CASE_FOLD.put(0x16E53, new int[]{0x16E73}); - CASE_FOLD.put(0x16E54, new int[]{0x16E74}); - CASE_FOLD.put(0x16E55, new int[]{0x16E75}); - CASE_FOLD.put(0x16E56, new int[]{0x16E76}); - CASE_FOLD.put(0x16E57, new int[]{0x16E77}); - CASE_FOLD.put(0x16E58, new int[]{0x16E78}); - CASE_FOLD.put(0x16E59, new int[]{0x16E79}); - CASE_FOLD.put(0x16E5A, new int[]{0x16E7A}); - CASE_FOLD.put(0x16E5B, new int[]{0x16E7B}); - CASE_FOLD.put(0x16E5C, new int[]{0x16E7C}); - CASE_FOLD.put(0x16E5D, new int[]{0x16E7D}); - CASE_FOLD.put(0x16E5E, new int[]{0x16E7E}); - CASE_FOLD.put(0x16E5F, new int[]{0x16E7F}); - CASE_FOLD.put(0x1E900, new int[]{0x1E922}); - CASE_FOLD.put(0x1E901, new int[]{0x1E923}); - CASE_FOLD.put(0x1E902, new int[]{0x1E924}); - CASE_FOLD.put(0x1E903, new int[]{0x1E925}); - CASE_FOLD.put(0x1E904, new int[]{0x1E926}); - CASE_FOLD.put(0x1E905, new int[]{0x1E927}); - CASE_FOLD.put(0x1E906, new int[]{0x1E928}); - CASE_FOLD.put(0x1E907, new int[]{0x1E929}); - CASE_FOLD.put(0x1E908, new int[]{0x1E92A}); - CASE_FOLD.put(0x1E909, new int[]{0x1E92B}); - CASE_FOLD.put(0x1E90A, new int[]{0x1E92C}); - CASE_FOLD.put(0x1E90B, new int[]{0x1E92D}); - CASE_FOLD.put(0x1E90C, new int[]{0x1E92E}); - CASE_FOLD.put(0x1E90D, new int[]{0x1E92F}); - CASE_FOLD.put(0x1E90E, new int[]{0x1E930}); - CASE_FOLD.put(0x1E90F, new int[]{0x1E931}); - CASE_FOLD.put(0x1E910, new int[]{0x1E932}); - CASE_FOLD.put(0x1E911, new int[]{0x1E933}); - CASE_FOLD.put(0x1E912, new int[]{0x1E934}); - CASE_FOLD.put(0x1E913, new int[]{0x1E935}); - CASE_FOLD.put(0x1E914, new int[]{0x1E936}); - CASE_FOLD.put(0x1E915, new int[]{0x1E937}); - CASE_FOLD.put(0x1E916, new int[]{0x1E938}); - CASE_FOLD.put(0x1E917, new int[]{0x1E939}); - CASE_FOLD.put(0x1E918, new int[]{0x1E93A}); - CASE_FOLD.put(0x1E919, new int[]{0x1E93B}); - CASE_FOLD.put(0x1E91A, new int[]{0x1E93C}); - CASE_FOLD.put(0x1E91B, new int[]{0x1E93D}); - CASE_FOLD.put(0x1E91C, new int[]{0x1E93E}); - CASE_FOLD.put(0x1E91D, new int[]{0x1E93F}); - CASE_FOLD.put(0x1E91E, new int[]{0x1E940}); - CASE_FOLD.put(0x1E91F, new int[]{0x1E941}); - CASE_FOLD.put(0x1E920, new int[]{0x1E942}); - CASE_FOLD.put(0x1E921, new int[]{0x1E943}); - } -} diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java index 229263bcbad7..f42add383e20 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyFlavor.java @@ -40,20 +40,21 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; +import java.util.function.BiPredicate; + import com.oracle.truffle.regex.RegexLanguage; import com.oracle.truffle.regex.RegexSource; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; import com.oracle.truffle.regex.tregex.nfa.QuantifierGuard; import com.oracle.truffle.regex.tregex.nodes.nfa.TRegexBacktrackingNFAExecutorNode; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; import com.oracle.truffle.regex.tregex.parser.JSRegexParser; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.ast.RegexAST; import com.oracle.truffle.regex.tregex.parser.ast.visitors.NFATraversalRegexASTVisitor; -import java.util.Arrays; -import java.util.function.BiPredicate; - /** * An implementation of the Ruby regex flavor. * @@ -244,16 +245,6 @@ public BiPredicate getEqualsIgnoreCasePredicate(RegexAST ast) } private static boolean equalsIgnoreCase(int codePointA, int codePointB) { - int[] foldedA = RubyCaseFolding.caseFold(codePointA); - int[] foldedB = RubyCaseFolding.caseFold(codePointB); - if (foldedA == null && foldedB == null) { - return codePointA == codePointB; - } else if (foldedA == null) { - return foldedB.length == 1 && codePointA == foldedB[0]; - } else if (foldedB == null) { - return foldedA.length == 1 && foldedA[0] == codePointB; - } else { - return Arrays.equals(foldedA, foldedB); - } + return MultiCharacterCaseFolding.equalsIgnoreCase(CaseFoldData.CaseFoldAlgorithm.Ruby, codePointA, codePointB); } } diff --git a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java index 9465267b7782..6a5582aeebfe 100644 --- a/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java +++ b/regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/RubyRegexParser.java @@ -40,6 +40,8 @@ */ package com.oracle.truffle.regex.tregex.parser.flavors; +import static com.oracle.truffle.regex.tregex.parser.RegexLexer.isAscii; + import java.math.BigInteger; import java.util.ArrayDeque; import java.util.ArrayList; @@ -50,7 +52,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.function.BiConsumer; import java.util.function.Predicate; import org.graalvm.collections.Pair; @@ -64,12 +65,14 @@ import com.oracle.truffle.regex.UnsupportedRegexException; import com.oracle.truffle.regex.charset.CodePointSet; import com.oracle.truffle.regex.charset.CodePointSetAccumulator; -import com.oracle.truffle.regex.charset.Range; import com.oracle.truffle.regex.charset.UnicodeProperties; import com.oracle.truffle.regex.errors.RbErrorMessages; import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer; import com.oracle.truffle.regex.tregex.buffer.IntArrayBuffer; +import com.oracle.truffle.regex.tregex.parser.CaseFoldData; +import com.oracle.truffle.regex.tregex.parser.MultiCharacterCaseFolding; import com.oracle.truffle.regex.tregex.parser.RegexASTBuilder; +import com.oracle.truffle.regex.tregex.parser.RegexLexer; import com.oracle.truffle.regex.tregex.parser.RegexParser; import com.oracle.truffle.regex.tregex.parser.RegexValidator; import com.oracle.truffle.regex.tregex.parser.Token; @@ -620,24 +623,6 @@ private RegexSyntaxException syntaxErrorAt(String message, int pos) { return RegexSyntaxException.createPattern(inSource, message, pos); } - // Character predicates - - private static boolean isOctDigit(int c) { - return c >= '0' && c <= '7'; - } - - private static boolean isDecDigit(int c) { - return c >= '0' && c <= '9'; - } - - private static boolean isHexDigit(int c) { - return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); - } - - static boolean isAscii(int c) { - return c < 128; - } - // First pass - identifying capture groups private void scanForCaptureGroups() { @@ -701,7 +686,7 @@ private void scanForCaptureGroups() { parseGroupReference('>', true, true, true, false); } else if (match("'")) { parseGroupReference('\'', true, true, true, false); - } else if (isDecDigit(curChar())) { + } else if (RegexLexer.isDecimalDigit(curChar())) { parseGroupReference(')', true, false, true, false); } } @@ -968,7 +953,7 @@ private void string(int firstCodepoint) { } if (getLocalFlags().isIgnoreCase()) { - RubyCaseFolding.caseFoldUnfoldString(codepointsBuffer.toArray(), inSource.getEncoding().getFullSet(), astBuilder); + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.Ruby, codepointsBuffer.toArray(), inSource.getEncoding().getFullSet(), astBuilder); } else { for (int i = 0; i < codepointsBuffer.length(); i++) { addChar(codepointsBuffer.get(i)); @@ -993,7 +978,7 @@ private void string(int firstCodepoint) { private void buildChar(int codepoint) { if (!silent) { if (getLocalFlags().isIgnoreCase()) { - RubyCaseFolding.caseFoldUnfoldString(new int[]{codepoint}, inSource.getEncoding().getFullSet(), astBuilder); + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.Ruby, new int[]{codepoint}, inSource.getEncoding().getFullSet(), astBuilder); } else { addChar(codepoint); } @@ -1038,10 +1023,10 @@ public boolean isQuantifierNext() { return false; } else { // lower bound - getMany(RubyRegexParser::isDecDigit); + getMany(RegexLexer::isDecimalDigit); // upper bound if (match(",")) { - getMany(RubyRegexParser::isDecDigit); + getMany(RegexLexer::isDecimalDigit); } if (!match("}")) { return false; @@ -1316,7 +1301,7 @@ private boolean backreference() { int restorePosition = position; if (curChar() >= '1' && curChar() <= '9') { // Joni only considers backreferences numbered <= 1000. - String number = getUpTo(4, RubyRegexParser::isDecDigit); + String number = getUpTo(4, RegexLexer::isDecimalDigit); int groupNumber = Integer.parseInt(number); if (groupNumber > 1000) { position = restorePosition; @@ -1354,7 +1339,7 @@ private boolean namedBackreference() { List groupNumbers = parseGroupReference('>', true, true, true, true); int nameEnd = position - 1; // named references cannot point forward, so filter out reference > groupIndex - buildNamedBackreference(groupNumbers.stream().filter(groupNumber -> groupNumber <= groupIndex).toArray(n -> new Integer[n]), inPattern.substring(nameStart, nameEnd)); + buildNamedBackreference(groupNumbers.stream().filter(groupNumber -> groupNumber <= groupIndex).toArray(Integer[]::new), inPattern.substring(nameStart, nameEnd)); return true; } else { return false; @@ -1365,12 +1350,12 @@ private List parseGroupReference(char terminator, boolean allowNumeric, String groupName; List groupNumbers = null; int beginPos = position; - if (curChar() == '-' || isDecDigit(curChar())) { + if (curChar() == '-' || RegexLexer.isDecimalDigit(curChar())) { if (!allowNumeric) { throw syntaxErrorHere(RbErrorMessages.INVALID_GROUP_NAME); } int sign = match("-") ? -1 : 1; - groupName = getMany(RubyRegexParser::isDecDigit); + groupName = getMany(RegexLexer::isDecimalDigit); int groupNumber; try { groupNumber = sign * Integer.parseInt(groupName); @@ -1413,7 +1398,7 @@ private List parseGroupReference(char terminator, boolean allowNumeric, } if (allowLevels && (curChar() == '+' || curChar() == '-')) { advance(); // consume sign - String level = getMany(RubyRegexParser::isDecDigit); + String level = getMany(RegexLexer::isDecimalDigit); if (level.isEmpty()) { throw syntaxErrorAt(RbErrorMessages.INVALID_GROUP_NAME, beginPos); } @@ -1560,7 +1545,7 @@ private boolean stringEscape() { if (match("u{")) { getMany(c -> ASCII_POSIX_CHAR_CLASSES.get("space").contains(c)); while (!match("}")) { - String code = getMany(RubyRegexParser::isHexDigit); + String code = getMany(RegexLexer::isHexDigit); try { int codePoint = Integer.parseInt(code, 16); if (codePoint > 0x10FFFF) { @@ -1570,7 +1555,7 @@ private boolean stringEscape() { } catch (NumberFormatException e) { throw syntaxErrorAt(RbErrorMessages.badEscape(code), beginPos); } - getMany(c -> WHITESPACE.get(c)); + getMany(WHITESPACE::get); } return true; } else { @@ -1666,7 +1651,7 @@ private Optional characterEscape() { switch (curChar()) { case 'x': { advance(); - String code = getUpTo(2, RubyRegexParser::isHexDigit); + String code = getUpTo(2, RegexLexer::isHexDigit); int byteValue = Integer.parseInt(code, 16); if (byteValue > 0x7F) { // This is a non-ASCII byte escape. The escaped character might be part of a @@ -1685,10 +1670,10 @@ private Optional characterEscape() { advance(); String code; if (match("{")) { - code = getMany(RubyRegexParser::isHexDigit); + code = getMany(RegexLexer::isHexDigit); mustMatch("}"); } else { - code = getUpTo(4, RubyRegexParser::isHexDigit); + code = getUpTo(4, RegexLexer::isHexDigit); if (code.length() < 4) { throw syntaxErrorAt(RbErrorMessages.incompleteEscape(code), beginPos); } @@ -1711,7 +1696,7 @@ private Optional characterEscape() { case '5': case '6': case '7': { - String code = getUpTo(3, RubyRegexParser::isOctDigit); + String code = getUpTo(3, c -> RegexLexer.isOctalDigit(c)); int codePoint = Integer.parseInt(code, 8); if (codePoint > 0xFF) { throw syntaxErrorAt(RbErrorMessages.TOO_BIG_NUMBER, beginPos); @@ -1741,7 +1726,7 @@ private void characterClass() { private void buildCharClass() { if (!silent) { if (getLocalFlags().isIgnoreCase()) { - List> multiCodePointExpansions = caseClosureMultiCodePoint(); + List> multiCodePointExpansions = MultiCharacterCaseFolding.caseClosureMultiCodePoint(CaseFoldData.CaseFoldAlgorithm.Ruby, curCharClass); if (multiCodePointExpansions.size() > 0) { pushGroup(); addCharClass(curCharClass.toCodePointSet()); @@ -1750,7 +1735,7 @@ private void buildCharClass() { int from = pair.getLeft(); int[] to = pair.getRight(); boolean dropAsciiOnStart = !fullyFoldableCharacters.get().contains(from); - RubyCaseFolding.caseFoldUnfoldString(to, inSource.getEncoding().getFullSet(), dropAsciiOnStart, astBuilder); + MultiCharacterCaseFolding.caseFoldUnfoldString(CaseFoldData.CaseFoldAlgorithm.Ruby, to, inSource.getEncoding().getFullSet(), dropAsciiOnStart, astBuilder); } popGroup(); } else { @@ -1976,87 +1961,14 @@ private PosixClassParseResult collectPosixCharClass() { } } - /** - * Calls the argument on any element of the character class which has a case-folding. - */ - private void caseFoldCharClass(BiConsumer caseFoldItem) { - if (curCharClass.get().size() < RubyCaseFoldingData.CASE_FOLD.size()) { - for (Range r : curCharClass) { - RubyCaseFoldingData.CASE_FOLD.subMap(r.lo, r.hi + 1).forEach((Integer from, int[] to) -> { - caseFoldItem.accept(from, to); - }); - } - } else { - RubyCaseFoldingData.CASE_FOLD.forEach((Integer from, int[] to) -> { - if (curCharClass.get().contains(from)) { - caseFoldItem.accept(from, to); - } - }); - } - } - private boolean acceptableCaseFold(int from, int to) { // Characters which are not "fully case-foldable" are only treated as equivalent if the // relation doesn't cross the ASCII boundary. return fullyFoldableCharacters.get().contains(from) || isAscii(from) == isAscii(to); } - /** - * This method modifies {@code curCharClass} to contains its closure on case mapping. - */ private void caseClosure() { - charClassTmp.clear(); - - caseFoldCharClass((from, to) -> { - if (to.length == 1) { - // Add the case-folded version to the character class... - if (acceptableCaseFold(from, to[0])) { - charClassTmp.addCodePoint(to[0]); - } - } - // ... and also any characters which case-fold to the same. - for (int unfolding : RubyCaseUnfoldingTrie.findSingleCharUnfoldings(to)) { - if (unfolding != from && acceptableCaseFold(from, unfolding)) { - charClassTmp.addCodePoint(unfolding); - } - } - }); - - // We also handle all the characters which might have no case-folding, i.e. they case-fold - // to themselves. - for (Range r : curCharClass) { - for (int codepoint = r.lo; codepoint <= r.hi; codepoint++) { - for (int unfolding : RubyCaseUnfoldingTrie.findSingleCharUnfoldings(codepoint)) { - if (acceptableCaseFold(codepoint, unfolding)) { - charClassTmp.addCodePoint(unfolding); - } - } - } - } - - // Only include characters that are admissible in the given encoding. - charClassTmp.intersectWith(inSource.getEncoding().getFullSet()); - - curCharClass.addSet(charClassTmp.get()); - } - - /** - * Finds any characters in {@link #curCharClass} that have multi-codepoint expansions. - * - * @return a list of pairs, with the first element being the expanded codepoint and the second - * element the expansion - */ - private List> caseClosureMultiCodePoint() { - List> multiCodePointExpansions = new ArrayList<>(); - - caseFoldCharClass((from, to) -> { - if (to.length > 1) { - assert !isAscii(from); - multiCodePointExpansions.add(Pair.create(from, to)); - } - }); - - return multiCodePointExpansions; + MultiCharacterCaseFolding.caseClosure(CaseFoldData.CaseFoldAlgorithm.Ruby, curCharClass, charClassTmp, this::acceptableCaseFold, inSource.getEncoding().getFullSet()); } /** @@ -2110,12 +2022,12 @@ private Quantifier parseQuantifier(int ch) { Optional lowerBound = Optional.empty(); Optional upperBound = Optional.empty(); boolean canBeNonGreedy = true; - String lower = getMany(RubyRegexParser::isDecDigit); + String lower = getMany(RegexLexer::isDecimalDigit); if (!lower.isEmpty()) { lowerBound = Optional.of(new BigInteger(lower)); } if (match(",")) { - String upper = getMany(RubyRegexParser::isDecDigit); + String upper = getMany(RegexLexer::isDecimalDigit); if (!upper.isEmpty()) { upperBound = Optional.of(new BigInteger(upper)); } @@ -2380,14 +2292,14 @@ private void conditionalBackReference() { List groupNumbers; boolean namedReference; if (match("<")) { - namedReference = curChar() != '-' && !isDecDigit(curChar()); + namedReference = curChar() != '-' && !RegexLexer.isDecimalDigit(curChar()); groupNumbers = parseGroupReference('>', true, true, true, true); mustMatch(")"); } else if (match("'")) { - namedReference = curChar() != '-' && !isDecDigit(curChar()); + namedReference = curChar() != '-' && !RegexLexer.isDecimalDigit(curChar()); groupNumbers = parseGroupReference('\'', true, true, true, true); mustMatch(")"); - } else if (isDecDigit(curChar())) { + } else if (RegexLexer.isDecimalDigit(curChar())) { namedReference = false; groupNumbers = parseGroupReference(')', true, false, true, true); } else { diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore b/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore new file mode 100644 index 000000000000..ea8c4bf7f35f --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/.gitignore @@ -0,0 +1 @@ +/target diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock new file mode 100644 index 000000000000..8e5668087923 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.lock @@ -0,0 +1,1661 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "async-compression" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" +dependencies = [ + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "console" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.45.0", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "errno" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "backtrace", + "version_check", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "flate2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-core", + "futures-io", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gimli" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" + +[[package]] +name = "h2" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.4.9", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "icu_collator" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2223603c703f1f6395206b2c0196b4cacc70f3ac4560f92c9386c1a416f92ef6" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collator_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec0a6848b88d80435d8b5f960ff6310715ad7cfcf3e042b874532c3b6af11a8" + +[[package]] +name = "icu_collections" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3907b2246e8dd5a29ead8a965e7c0c8a90e9b928e614a4279257d45c5e553e91" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f284eb342dc49d3e9d9f3b188489d76b5d22dfb1d1a5e0d1941811253bac625c" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6551daf80882d8e68eee186cc19e132d8bde1b1f059a79b93384a5ca0e8fc5e7" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a741eba5431f75eb2f1f9022d3cffabcadda6771e54fb4e77c8ba8653e4da44" + +[[package]] +name = "icu_normalizer" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080fc33a720d50a7342b0c58df010fbcfb842d6f78ef81555f8b1ac6bba57d3c" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d22f74066c2e6442db2a9aa14950278e86719e811e304e48bae03094b369d" + +[[package]] +name = "icu_properties" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3477ae70f8ca8dc08ff7574b5398ed0a2f2e4e6b66bdff2558a92ed67e262be1" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59" + +[[package]] +name = "icu_provider" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68acdef80034b5e35d8524e9817479d389a4f9774f3f0cbe1bf3884d80fd5934" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2060258edfcfe32ca7058849bf0f146cb5c59aadbedf480333c0d0002f97bc99" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "indicatif" +version = "0.17.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "ipnet" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" + +[[package]] +name = "linux-raw-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" + +[[package]] +name = "litemap" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a1a2647d5b7134127971a6de0d533c49de2159167e7f259c427195f87168a1" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "object" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "openssl" +version = "0.10.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +dependencies = [ + "bitflags 2.4.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "oracle" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe80334af1fbaea016fbef0af77f5fa32452362e29a039389b8c93737585003" +dependencies = [ + "cc", + "lazy_static", + "oracle_procmacro", + "paste", +] + +[[package]] +name = "oracle_procmacro" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad247f3421d57de56a0d0408d3249d4b1048a522be2013656d92f022c3d8af27" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "portable-atomic" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b" + +[[package]] +name = "proc-macro2" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "reqwest" +version = "0.11.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +dependencies = [ + "async-compression", + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "system-configuration", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.38.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "schannel" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "serde_json" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" + +[[package]] +name = "socket2" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "socket2" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "unicode-xid", +] + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "tinystr" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d0e245e80bdc9b4e5356fc45a72184abbc3861992603f515270e9340f5a219" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2 0.5.4", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tregex-casefolding" +version = "0.1.0" +dependencies = [ + "csv", + "error-chain", + "icu_collator", + "icu_locid", + "indicatif", + "oracle", + "reqwest", +] + +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "url" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" + +[[package]] +name = "utf8_iter" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.38", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0af0c3d13faebf8dda0b5256fa7096a2d5ccb662f7b9f54a40fe201077ab1c2" + +[[package]] +name = "yoke" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e38c508604d6bbbd292dadb3c02559aa7fff6b654a078a36217cad871636e4" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5e19fb6ed40002bab5403ffa37e53e0e56f914a4450c8765f533018db1db35f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1194130c5b155bf8ae50ab16c86ab758cd695cf9ad176d2f870b744cbdbb572e" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acabf549809064225ff8878baedc4ce3732ac3b07e7c7ce6e5c2ccdbc485c324" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml new file mode 100644 index 000000000000..36709645e557 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "tregex-casefolding" +version = "0.1.0" +edition = "2021" + +[dependencies] +csv = "1.3.0" +error-chain = "0.12.4" +icu_collator = "1.3.2" +icu_locid = "1.3.2" +indicatif = "0.17.7" +oracle = "0.5.7" +reqwest = { version = "0.11.22", features = ["blocking", "gzip"] } diff --git a/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs new file mode 100644 index 000000000000..ad5bf1b0da98 --- /dev/null +++ b/regex/src/com.oracle.truffle.regex/tools/casefolding/src/main.rs @@ -0,0 +1,2031 @@ +/* + * Copyright (c) 2023, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +use core::cmp::Ordering; +use std::cmp::{max, min}; +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Display, Formatter}; +use std::fs; +use std::fs::File; +use std::io::Write; +use std::path::Path; +use std::process::Command; +use std::time::Instant; + +use csv::{Reader, StringRecord, Trim}; +use error_chain::{bail, error_chain}; +use icu_collator::{CaseLevel, Collator, CollatorOptions, Strength}; +use icu_locid::Locale; +use indicatif::ProgressIterator; +use oracle::{Connection, Connector, Privilege, Statement}; +use oracle::sql_type::OracleType; +use reqwest::Url; + +use crate::OrderMapping::{IntegerOffset, LUT}; +use crate::UnicodeCaseFoldingVariant::{Full, Simple}; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(reqwest::Error); + CSV(csv::Error); + OracleDB(oracle::Error); + } +} + +/// refers to the index of a codepoint or string in a global index +type IElement = usize; + +const FILE_FORMAT_VERSION: u16 = 0; +const OUTPUT_FOLDER: &str = "./out"; +const PATH_GRAAL_REPO: &str = "../../../../../"; +const PATH_CASE_FOLD_DATA: &str = "regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java"; +const PATH_ORACLE_DB_CONSTANTS: &str = "regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBConstants.java"; +const PATH_ORACLE_DB_TESTS: &str = "regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java"; +const GENERATED_CODE_MARKER_BEGIN: &str = " /* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */"; +const GENERATED_CODE_MARKER_END: &str = " /* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */"; + +#[derive(Debug, Clone)] +struct CollationElement { + string: String, +} + +#[derive(Debug, Clone)] +struct CollationElementIndex { + index_base: usize, + index_src: usize, + index_dst: usize, + element: CollationElement, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +enum EqMapping { + IntegerOffset(i32), + Set(usize), + AlternatingAL, + AlternatingUL, + Single(IElement), +} + +impl EqMapping { + fn from_single_mapping(src: IElement, dst: IElement) -> EqMapping { + let offset = (dst as i32) - (src as i32); + if offset == 1 { + if src & 1 == 0 { EqMapping::AlternatingAL } else { EqMapping::AlternatingUL } + } else { + EqMapping::IntegerOffset(offset) + } + } +} + +#[derive(Debug)] +enum OrderMapping { + IntegerOffset(i32), + LUT(Vec), +} + +trait RangeMapping { + fn lo(&self) -> IElement; + fn hi(&self) -> IElement; + fn mapping(&self) -> &T; +} + +#[derive(Debug)] +struct OrderTableEntry { + lo: usize, + hi: usize, + mapping: OrderMapping, +} + +impl RangeMapping for OrderTableEntry { + fn lo(&self) -> IElement { + self.lo + } + + fn hi(&self) -> IElement { + self.hi + } + + fn mapping(&self) -> &OrderMapping { + &self.mapping + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +struct EqTableEntry { + lo: IElement, + hi: IElement, + mapping: EqMapping, +} + +impl EqTableEntry { + fn as_dummy(&self) -> EqTableEntry { + EqTableEntry { + lo: self.lo, + hi: self.hi, + mapping: EqMapping::IntegerOffset(0), + } + } + + fn with_hi(&self, hi: IElement) -> EqTableEntry { + EqTableEntry { + lo: self.lo, + hi, + mapping: self.mapping.clone(), + } + } + + fn with_lo(&self, lo: IElement) -> EqTableEntry { + EqTableEntry { + lo, + hi: self.hi, + mapping: self.mapping.clone(), + } + } + + fn with_mapping(&self, mapping: EqMapping) -> EqTableEntry { + EqTableEntry { + lo: self.lo, + hi: self.hi, + mapping, + } + } +} + +impl RangeMapping for EqTableEntry { + fn lo(&self) -> IElement { + self.lo + } + + fn hi(&self) -> IElement { + self.hi + } + + fn mapping(&self) -> &EqMapping { + &self.mapping + } +} + +trait RangeMappingTable> { + fn table(&self) -> &Vec; + + fn binary_search(&self, key: IElement) -> Option<&M> { + let table = self.table(); + let mut lo: i32 = 0; + let mut hi: i32 = (table.len() as i32) - 1; + while lo <= hi { + let mid = (lo + hi) >> 1; + let mid_val = table[mid as usize].lo(); + if mid_val < key { + lo = mid + 1; + } else if mid_val > key { + hi = mid - 1; + } else { + assert!(table[mid as usize].lo() <= key && key <= table[mid as usize].hi(), "{:?}, key: {}", table[mid as usize], key); + return Some(&table[mid as usize]); + } + } + if lo > 0 && table[(lo - 1) as usize].lo() <= key && key <= table[(lo - 1) as usize].hi() { + return Some(&table[(lo - 1) as usize]); + } + return None; + } +} + +struct OrderTable { + table: Vec, +} + +impl RangeMappingTable for OrderTable { + fn table(&self) -> &Vec { + &self.table + } +} + +struct EqTable { + table: Vec, + sets: Vec>, +} + +impl RangeMappingTable for EqTable { + fn table(&self) -> &Vec { + &self.table + } +} + +impl OrderTable { + /// Creates a new compressed table from an exhaustive list of collation elements `full_map` mapping `index_src` to `index_dst`. + /// `full_map` must be sorted by `index_src`. + /// + fn create(full_map: &Vec, index_src: fn(&CollationElementIndex) -> usize, index_dst: fn(&CollationElementIndex) -> usize) -> OrderTable { + fn push_entry(full_map: &Vec, + index_src: fn(&CollationElementIndex) -> usize, + index_dst: fn(&CollationElementIndex) -> usize, + table: &mut Vec, last_range_end: usize, prev: usize, cur_index_src: usize) { + let last_index_src = index_src(&full_map[last_range_end]); + let prev_index_src = index_src(&full_map[prev]); + // if range size is 1, use a lookup table + if (cur_index_src - last_index_src) == 1 { + // if the last entry in the mapping is already a lookup table, append to it + if table.last_mut().map(|e| { + match &mut e.mapping { + LUT(lut) => { + assert_eq!(e.hi, prev_index_src - 1, "lookup table must be adjacent to current element"); + e.hi = prev_index_src; + lut.push(index_dst(&full_map[prev])); + false + } + _ => true + } + }).unwrap_or(true) { + // otherwise, create a new lookup table + table.push(OrderTableEntry { + lo: last_index_src, + hi: last_index_src, + mapping: LUT(vec![index_dst(&full_map[prev])]), + }); + } + } else { + // range size is greater than one, create an integer offset mapping + table.push(OrderTableEntry { + lo: last_index_src, + hi: prev_index_src, + mapping: IntegerOffset((index_dst(&full_map[last_range_end]) as i32) - (last_index_src as i32)), + }); + } + } + + let mut table: Vec = Vec::new(); + let mut last_range_end = 0; + // try to find consecutive ranges in the mapping that can be expressed with integer offsets, e.g. [1..4] -> [3..6] + for i in 1..full_map.len() { + let prev = i - 1; + if index_src(&full_map[prev]) != index_src(&full_map[i]) - 1 || index_dst(&full_map[prev]) != index_dst(&full_map[i]) - 1 { + push_entry(&full_map, index_src, index_dst, &mut table, last_range_end, prev, index_src(&full_map[i])); + last_range_end = i; + } + } + push_entry(&full_map, index_src, index_dst, &mut table, last_range_end, full_map.len() - 1, index_src(&full_map[full_map.len() - 1]) + 1); + OrderTable { table } + } + + /// Returns the `dst_index` for a given `src_index` + fn lookup(&self, key: usize) -> usize { + self.binary_search(key).map(|e| { + return match &e.mapping { + IntegerOffset(offset) => { + ((key as i32) + offset) as usize + } + LUT(lut) => { + lut[key - e.lo] + } + }; + }).unwrap_or(key) + } + + #[allow(dead_code)] + fn print_size(&self, name: &str) { + let size = self.table.iter().map(|e| { + match &e.mapping { + IntegerOffset(_) => { 12 } + LUT(lut) => { 8 + (lut.len() * 4) } + } + }).reduce(|a, b| a + b).unwrap_or(0); + println!("{:>25} size: {:>6} bytes", name, size); + } +} + +impl EqTable { + /// Creates a table mapping all equivalent collation elements in the given exhaustive list `full_map` to each other. + /// `full_map` must be sorted by the collator, so that equivalent elements are next to each other. + /// + fn create Ordering>(collator: F, full_map: &Vec) -> EqTable { + let mut eq_map_0: Vec = Vec::with_capacity(full_map.len()); + let mut eq_sets: Vec> = Vec::new(); + let mut buf: Vec = Vec::new(); + // first pass: find equivalent elements and create mappings + for i in 1..full_map.len() { + if collator(&full_map[i - 1].element.string, &full_map[i].element.string) == Ordering::Equal { + if buf.is_empty() { + buf.push(full_map[i - 1].index_base); + } + buf.push(full_map[i].index_base); + } else { + if !buf.is_empty() { + EqTable::eq_map_push_first_pass(&mut eq_map_0, &mut eq_sets, &buf); + buf.clear(); + } + } + } + if !buf.is_empty() { + EqTable::eq_map_push_first_pass(&mut eq_map_0, &mut eq_sets, &buf); + } + for eq_table in &mut eq_sets { + eq_table.sort(); + } + EqTable { table: EqTable::eq_map_merge_adjacent(&mut eq_map_0), sets: eq_sets } + } + + fn from_vec<'a>(mut equivalences: Vec>) -> EqTable { + for vec in equivalences.iter_mut() { + vec.sort(); + } + equivalences.sort(); + let mut eq_map_0: Vec = Vec::with_capacity(equivalences.len()); + let mut eq_sets: Vec> = Vec::new(); + // first pass: find equivalent elements and create mappings + for buf in equivalences { + if buf.len() > 1 { + EqTable::eq_map_push_first_pass(&mut eq_map_0, &mut eq_sets, &buf); + } + } + for eq_table in &mut eq_sets { + eq_table.sort(); + } + EqTable { table: EqTable::eq_map_merge_adjacent(&mut eq_map_0), sets: eq_sets } + } + + fn eq_map_push_first_pass(eq_map_0: &mut Vec, eq_tables: &mut Vec>, buf: &Vec) { + if buf.len() == 2 { + let offset = (buf[0] as i32) - (buf[1] as i32); + if offset.abs() == 1 { + // elements indices are adjacent, we can map them with AlternatingAL/UL + let min = min(buf[0], buf[1]); + let max = max(buf[0], buf[1]); + eq_map_0.push(EqTableEntry { + lo: min, + hi: max, + mapping: if min & 1 == 0 { EqMapping::AlternatingAL } else { EqMapping::AlternatingUL }, + }); + } else { + // indices are not adjacent, map both with integer offset + eq_map_0.push(EqTableEntry { + lo: buf[0], + hi: buf[0], + mapping: EqMapping::IntegerOffset((buf[1] as i32) - (buf[0] as i32)), + }); + eq_map_0.push(EqTableEntry { + lo: buf[1], + hi: buf[1], + mapping: EqMapping::IntegerOffset(offset), + }); + } + } else { + // more than two equivalent elements, we need a set + for i in buf { + eq_map_0.push(EqTableEntry { + lo: *i, + hi: *i, + mapping: EqMapping::Set(eq_tables.len()), + }); + } + eq_tables.push(buf.to_vec()); + } + } + + fn eq_map_merge_adjacent(eq_map_0: &mut Vec) -> Vec { + // merge adjacent mappings into range-based entries, e.g. `1 -> offset(10), 2 -> offset(10) becomes [1-2] -> offset(10) + eq_map_0.sort_by_key(|x| x.lo); + let mut eq_map: Vec = Vec::new(); + eq_map.push(eq_map_0[0].clone()); + for e in &eq_map_0[1..] { + let last = eq_map.last_mut().unwrap(); + if last.hi == e.lo - 1 && last.mapping == e.mapping { + last.hi = e.hi; + } else { + eq_map.push(e.clone()); + } + } + eq_map + } + + fn create_one_way_mapping(mappings: Vec<(IElement, IElement)>) -> EqTable { + fn can_use_single_mapping(last: &EqTableEntry, dst: IElement) -> bool { + match last.mapping { + EqMapping::IntegerOffset(offset) => { + last.lo == last.hi && last.lo as i32 + offset == dst as i32 + } + EqMapping::Single(last_dst) => { + last_dst == dst + } + _ => false + } + } + + assert!(mappings.len() > 0); + let mut table: Vec = vec![]; + let (src_0, dst_0) = mappings[0]; + table.push(EqTableEntry { + lo: src_0, + hi: src_0, + mapping: EqMapping::from_single_mapping(src_0, dst_0), + }); + for (src, dst) in mappings[1..].iter().cloned() { + let last = table.last().unwrap(); + assert!(src > last.hi); + let mapping = EqMapping::from_single_mapping(src, dst); + if can_use_single_mapping(last, dst) { + table.last_mut().unwrap().mapping = EqMapping::Single(dst); + table.last_mut().unwrap().hi = src; + } else if mapping == last.mapping && src == last.hi + (match mapping { + EqMapping::IntegerOffset(_) => { 1 } + EqMapping::Set(_) => { 1 } + EqMapping::AlternatingAL => { 2 } + EqMapping::AlternatingUL => { 2 } + EqMapping::Single(_) => { 1 } + }) { + table.last_mut().unwrap().hi = src; + } else { + table.push(EqTableEntry { + lo: src, + hi: src, + mapping, + }); + } + } + return EqTable { table, sets: vec![] }; + } + + /// Creates a diff-based equivalence table from a given full table `child` and parent mapping `parent`, + /// such that mappings that are equal in both `parent` and `child` are removed from the new table. + /// + fn create_diff(parent: &EqTable, child: &EqTable) -> EqTable { + fn mapping_eq(parent: &EqTable, child: &EqTable, cur_parent: &EqTableEntry, cur_child: &EqTableEntry) -> bool { + match (&cur_parent.mapping, &cur_child.mapping) { + (EqMapping::Set(lut_parent), EqMapping::Set(lut_child)) => { + parent.sets[*lut_parent].eq(&child.sets[*lut_child]) + } + (a, b) => { a.eq(b) } + } + } + fn mapping_clone(child: &EqTable, cur_child: &EqTableEntry, eq_table_diff: &mut Vec, sets_diff: &mut Vec>, sets_map: &mut Vec>) { + if eq_table_diff.last().map(|last| last.hi == cur_child.hi).unwrap_or(false) { + return; + } + match cur_child.mapping { + EqMapping::Set(set_index) => { + match &sets_map[set_index] { + Some(mapped_index) => { + eq_table_diff.push(cur_child.with_mapping(EqMapping::Set(*mapped_index))); + } + None => { + eq_table_diff.push(cur_child.with_mapping(EqMapping::Set(sets_diff.len()))); + sets_map[set_index] = Some(sets_diff.len()); + sets_diff.push(child.sets[set_index].clone()); + } + } + } + _ => { + eq_table_diff.push(cur_child.clone()); + } + } + } + + let mut eq_table_diff: Vec = Vec::with_capacity(child.table.len()); + let mut lut_diff: Vec> = Vec::with_capacity(child.sets.len()); + let mut lut_map: Vec> = vec![None; child.sets.len()]; + let mut i_parent = parent.table.iter(); + let mut i_child = child.table.iter(); + let mut next_parent = i_parent.next(); + let mut next_child = i_child.next(); + let mut tmp; + loop { + match (next_parent, next_child) { + (Some(cur_parent), Some(cur_child)) => { + if cur_parent.hi < cur_child.lo { + // parent mapping not present in child - overwrite with dummy + eq_table_diff.push(cur_parent.as_dummy()); + next_parent = i_parent.next(); + } else if cur_child.hi < cur_parent.lo { + // child mapping not present in parent - keep + mapping_clone(child, cur_child, &mut eq_table_diff, &mut lut_diff, &mut lut_map); + next_child = i_child.next(); + } else { + // ranges intersect + if cur_parent.lo < cur_child.lo { + // parent mapping partially not present in child, overwrite non-intersecting lower range with dummy + assert!(cur_parent.hi >= cur_child.lo, "{:?}, {:?}", cur_parent, cur_child); + eq_table_diff.push(cur_parent.with_hi(cur_child.lo - 1).as_dummy()); + } + if cur_child.lo < cur_parent.lo || !mapping_eq(&parent, &child, &cur_parent, &cur_child) { + // child mapping partially not present in parent, or not equal, keep + mapping_clone(child, cur_child, &mut eq_table_diff, &mut lut_diff, &mut lut_map); + } + if cur_parent.hi > cur_child.hi { + // remove intersecting part of parent range + tmp = cur_parent.with_lo(cur_child.hi + 1); + next_parent = Some(&tmp); + next_child = i_child.next(); + } else if cur_child.hi > cur_parent.hi { + // remove intersecting part of child range + tmp = cur_child.with_lo(cur_parent.hi + 1); + next_child = Some(&tmp); + next_parent = i_parent.next(); + } else { + next_child = i_child.next(); + next_parent = i_parent.next(); + } + } + } + (Some(cur_parent), None) => { + // parent mapping not present in child - overwrite with dummy + eq_table_diff.push(cur_parent.as_dummy()); + next_parent = i_parent.next(); + } + (None, Some(cur_child)) => { + // child mapping not present in parent - keep + mapping_clone(child, cur_child, &mut eq_table_diff, &mut lut_diff, &mut lut_map); + next_child = i_child.next(); + } + (None, None) => { + break; + } + } + } + let diff = EqTable { table: eq_table_diff, sets: lut_diff }; + for e in &child.table { + for i in e.lo..e.hi { + let vec1: Vec = child.lookup(i).unwrap(); + let vec2: Vec = diff.lookup(i).unwrap_or_else(|| { parent.lookup(i).unwrap() }); + assert_eq!(HashSet::::from_iter(vec1), HashSet::::from_iter(vec2), ""); + } + } + diff + } + + fn lookup(&self, key: IElement) -> Option> { + self.binary_search(key).map(|e| { + return match &e.mapping { + EqMapping::IntegerOffset(o) => { + vec![key, (o + (key as i32)) as IElement] + } + EqMapping::Set(i) => { + self.sets[*i].clone() + } + EqMapping::AlternatingAL => { + vec![key, key ^ 1] + } + EqMapping::AlternatingUL => { + vec![key, ((key - 1) ^ 1) + 1] + } + EqMapping::Single(value) => { + vec![key, *value] + } + }; + }) + } + + #[allow(dead_code)] + fn print_size(&self) { + println!("{:>25} size: {:>6} bytes", "equivalence table", (self.table.len() * 3 + + self.sets.len() + + self.sets.iter().map(|x| x.len()).reduce(|a, b| a + b).unwrap_or(0)) * 4); + } + + fn dump_java(&self, out: &mut Vec, name: &str, parent: Option<&str>) -> Result<()> { + writeln!(out, "private static final CaseFoldEquivalenceTable {} = new CaseFoldEquivalenceTable({}, new CodePointSet[] {{", name, parent.unwrap_or("null"))?; + for set in &self.sets { + writeln!(out, "rangeSet({}),", list_to_ranges_str(set))?; + } + write!(out, "}},")?; + self.dump_java_table(out)?; + Ok(()) + } + + fn dump_java_one_way(&self, out: &mut Vec, name: &str, parent: Option<&str>) -> Result<()> { + write!(out, "private static final CaseFoldTable {} = new CaseFoldTable({}, ", name, parent.unwrap_or("null"))?; + assert!(self.sets.is_empty()); + self.dump_java_table(out)?; + Ok(()) + } + + fn dump_java_table(&self, out: &mut Vec) -> Result<()> { + writeln!(out, "new int[] {{")?; + for e in &self.table { + write!(out, "{:#08x}, {:#08x}, ", e.lo, e.hi)?; + match &e.mapping { + EqMapping::IntegerOffset(o) => { + writeln!(out, "INTEGER_OFFSET, {},", *o)?; + } + EqMapping::Set(i) => { + writeln!(out, "DIRECT_MAPPING, {},", *i)?; + } + EqMapping::AlternatingAL => { + writeln!(out, "ALTERNATING_AL, 0,")?; + } + EqMapping::AlternatingUL => { + writeln!(out, "ALTERNATING_UL, 0,")?; + } + EqMapping::Single(v) => { + writeln!(out, "DIRECT_SINGLE, {},", *v)?; + } + } + } + writeln!(out, "}});")?; + Ok(()) + } +} + +fn list_to_ranges(set: &Vec) -> Vec { + let mut ranges: Vec = vec![]; + if set.len() > 0 { + ranges.push(set[0]); + let mut last = set[0]; + for v in set[1..].iter().cloned() { + assert!(v >= last); + if v != last + 1 { + ranges.push(last); + ranges.push(v); + } + last = v; + } + ranges.push(last); + } + return ranges; +} + +fn list_to_ranges_str(set: &Vec) -> String { + list_to_ranges(set).iter().map(|v| format!("{:#08x}", v)).collect::>().join(", ") +} + +struct CollationMap<'a> { + full_map: Vec, + name: &'a str, + equality: EqTable, + equality_diff: Option, + order: OrderTable, + order_reverse: OrderTable, + parent: Option<&'a CollationMap<'a>>, +} + +impl CollationMap<'_> { + /// Sorts a given list of collation elements with a given collator and creates lookup tables that allow + /// - looking up the sorting index of a given element (table `order`) + /// - looking up the element corresponding to a given sorting index (table `order_reverse`) + /// - looking up the set of elements that are considered equivalent to a given element (table `equality`) + /// + fn create<'a, F: Fn(&str, &str) -> Ordering>(collator: F, name: &'a str, + base_map: &'a Vec, + collation_elements: &Vec, + parent: Option<&'a CollationMap<'a>>) -> CollationMap<'a> { + let mut full_map: Vec = base_map.to_vec(); + // sort by initial index first, to keep the order of equal elements stable + full_map.sort_by_key(|a| a.index_base); + full_map.sort_by(|a, b| collator(&a.element.string, &b.element.string)); + + let eq_table = EqTable::create(&collator, &full_map); + let eq_diff = parent.map(|p| { EqTable::create_diff(&p.equality, &eq_table) }); + + for i in 0..full_map.len() { + full_map[i].index_src = full_map[i].index_dst; + full_map[i].index_dst = i; + } + + let table_dst_src = OrderTable::create(&full_map, |e| e.index_dst, |e| e.index_src); + full_map.sort_by_key(|e| e.index_src); + + let table_src_dst = OrderTable::create(&full_map, |e| e.index_src, |e| e.index_dst); + + let map = CollationMap { name, full_map, equality: eq_table, equality_diff: eq_diff, order: table_src_dst, order_reverse: table_dst_src, parent }; + map.verify(&collator, collation_elements); + map + } + + fn equality_diff(&self) -> &EqTable { + match &self.equality_diff { + None => { &self.equality } + Some(diff) => { diff } + } + } + + fn verify Ordering>(&self, collator: F, collation_elements: &Vec) { + for e in &self.full_map { + assert_eq!(self.order.lookup(e.index_src), e.index_dst, "elem: {:?}, table entry: {:?}", e, self.order.binary_search(e.index_src)); + assert_eq!(self.order_reverse.lookup(e.index_dst), e.index_src, "elem: {:?}, table entry: {:?}", e, self.order_reverse.binary_search(e.index_dst)); + self.equality_diff().lookup(e.index_base).map(|x| { + for pair in x.windows(2) { + assert_eq!(collator(&collation_elements[pair[0]].string, &collation_elements[pair[1]].string), Ordering::Equal, "{:?} <=> {:?}", + collation_elements[pair[0]].string, collation_elements[pair[1]].string); + } + }); + } + } + + fn dump(&self, path: &Path) -> std::io::Result { + let mut file = File::create(path).expect("File open failed"); + file.write("TRGX".as_bytes())?; + file.write(&FILE_FORMAT_VERSION.to_le_bytes())?; + match self.parent { + Some(parent) => { + write_str(&mut file, parent.name)?; + } + None => { + write_usize(&mut file, 0)?; + } + } + write_usize(&mut file, self.equality_diff().sets.len())?; + for e in &self.equality_diff().sets { + write_usize(&mut file, e.len())?; + for v in e { + write_usize(&mut file, *v)?; + } + } + write_usize(&mut file, self.equality_diff().table.len())?; + for e in &self.equality_diff().table { + write_usize(&mut file, e.lo)?; + write_usize(&mut file, e.hi)?; + match &e.mapping { + EqMapping::IntegerOffset(o) => { + file.write(&[0])?; + write_i32(&mut file, *o)?; + } + EqMapping::Set(i) => { + file.write(&[1])?; + write_usize(&mut file, *i)?; + } + EqMapping::AlternatingAL => { + file.write(&[2])?; + } + EqMapping::AlternatingUL => { + file.write(&[3])?; + } + EqMapping::Single(v) => { + file.write(&[4])?; + write_usize(&mut file, *v)?; + } + } + } + Self::dump_order_table(&mut file, &self.order)?; + Self::dump_order_table(&mut file, &self.order_reverse) + } + + fn dump_order_table(mut file: &mut File, order_table: &OrderTable) -> std::io::Result { + write_usize(&mut file, order_table.table.len())?; + for e in &order_table.table { + write_usize(&mut file, e.lo)?; + write_usize(&mut file, e.hi)?; + match &e.mapping { + IntegerOffset(o) => { + file.write(&[0])?; + write_i32(&mut file, *o)?; + } + LUT(tbl) => { + file.write(&[1])?; + write_usize(&mut file, tbl.len())?; + for v in tbl { + write_usize(&mut file, *v)?; + } + } + } + } + Ok(0) + } + + #[allow(dead_code)] + fn print_size(&self) { + println!("collation \"{}\":", self.name); + self.equality_diff().print_size(); + self.order.print_size("order mapping"); + self.order_reverse.print_size("reverse order mapping"); + println!(); + } +} + +fn write_str(file: &mut File, string: &str) -> std::io::Result { + write_usize(file, string.len())?; + file.write(string.as_bytes()) +} + +fn write_i32(file: &mut File, i: i32) -> std::io::Result { + let bytes = &i.to_le_bytes(); + assert!(bytes[3] == 0 || bytes[3] == 0xff, "assumption broken: {:?} is larger than 0xff_ffff", i); + file.write(&bytes[0..3]) +} + +fn write_usize(file: &mut File, i: usize) -> std::io::Result { + assert!(i <= 0x7f_ffff, "assumption broken: {:?} is larger than 0x7f_ffff", i); + file.write(&i.to_le_bytes()[0..3]) +} + +enum CollatorSetting { + Default, + CI, + AI, + CIAI, +} + +impl Display for CollatorSetting { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + CollatorSetting::Default => { "DEFAULT" } + CollatorSetting::CI => { "CI" } + CollatorSetting::AI => { "AI" } + CollatorSetting::CIAI => { "CI_AI" } + }) + } +} + +fn get_collator_from_locale(locale: &Locale, collator_setting: &CollatorSetting) -> impl Fn(&str, &str) -> Ordering { + let mut options = CollatorOptions::new(); + match collator_setting { + CollatorSetting::Default => { + options.strength = Some(Strength::Secondary); + options.case_level = Some(CaseLevel::On); + } + CollatorSetting::CI => { + options.strength = Some(Strength::Secondary); + options.case_level = Some(CaseLevel::Off); + } + CollatorSetting::AI => { + options.strength = Some(Strength::Primary); + options.case_level = Some(CaseLevel::On); + } + CollatorSetting::CIAI => { + options.strength = Some(Strength::Primary); + options.case_level = Some(CaseLevel::Off); + } + } + let collator: Collator = Collator::try_new( + &locale.into(), + options, + ).unwrap(); + move |a: &str, b: &str| { + collator.compare(a, b) + } +} + +#[allow(dead_code)] +fn dump_collation<'a, F: Fn(&str, &str) -> Ordering>(collator: F, folder: &Path, name: &'a str, collation_elements: &Vec, parent_map: &'a CollationMap<'a>) -> CollationMap<'a> { + let map = CollationMap::create(collator, name, &parent_map.full_map, &collation_elements, Some(&parent_map)); + map.dump(folder.join(format!("{}.trtbl", name).as_str()).as_path()).expect("file dump failed"); + map +} + +#[allow(dead_code)] +fn dump_collations<'a>(base_map: &Vec, collation_elements: &Vec) -> std::io::Result<()> { + let time = Instant::now(); + + for collator_setting in [CollatorSetting::Default, CollatorSetting::CI, CollatorSetting::AI, CollatorSetting::CIAI] { + let folder = Path::new(OUTPUT_FOLDER).join(collator_setting.to_string()); + if !folder.exists() { + std::fs::create_dir(folder.as_path())?; + } + let map_ducet = CollationMap::create(get_collator_from_locale(&Locale::default(), &collator_setting), "ducet", &base_map, &collation_elements, None); + map_ducet.dump(folder.join("ducet.trtbl").as_path())?; + println!("A -> {:?}", map_ducet.equality_diff().lookup(0x41).map(|m| m.iter().map(|x| char::from_u32(*x as u32).unwrap()).collect::>()).unwrap_or(Vec::new())); + } + + println!("done, took {:?}", time.elapsed()); + Ok(()) +} + +fn main() -> Result<()> { + oracledb_start_docker_container(); + generate_case_fold_data()?; + oracledb_generate_posix_char_classes()?; + oracledb_generate_tests()?; + Ok(()) +} + +fn generate_case_fold_data() -> Result<()> { + let mut multi_character_strings: HashMap = HashMap::new(); + + let unicode_version = "15.0.0"; + let unicode_version_oracle_db = "12.1.0"; + let unicode_data_txt = fetch(format!("https://www.unicode.org/Public/{}/ucd/UnicodeData.txt", unicode_version))?; + let unicode_case_folding_txt = fetch(format!("https://www.unicode.org/Public/{}/ucd/CaseFolding.txt", unicode_version))?; + let unicode_case_folding_txt_oracle = fetch(format!("https://www.unicode.org/Public/{}/ucd/CaseFolding.txt", unicode_version_oracle_db))?; + let unicode_special_casing = fetch(format!("https://www.unicode.org/Public/{}/ucd/SpecialCasing.txt", unicode_version))?; + + let eq_unicode_simple = unicode_case_folding(&unicode_case_folding_txt, &mut multi_character_strings, Simple)?; + let eq_js_nu = js_non_unicode_case_folding(&unicode_data_txt, &unicode_special_casing, &mut multi_character_strings)?; + let eq_python = python_unicode_case_folding(&unicode_data_txt, &unicode_special_casing, &mut multi_character_strings)?; + let eq_ruby = unicode_case_folding_one_way(&unicode_case_folding_txt, &mut multi_character_strings, Full)?; + let eq_oracle = unicode_case_folding_one_way(&unicode_case_folding_txt_oracle, &mut multi_character_strings, Full)?; + let eq_oracle_ai = oracledb_extract_ai_case_fold_table(&mut multi_character_strings)?; + let foldable_chars: Vec = parse_case_folding_txt(&unicode_case_folding_txt, Simple)?.iter().map(|(src, _)| src.chars().next().unwrap() as IElement).collect(); + + let mut out = vec![]; + writeln!(out)?; + writeln!(out)?; + + writeln!(out, "public static final String[] MULTI_CHAR_SEQUENCES = {{")?; + let mut strings_ordered = vec![""; multi_character_strings.len()]; + for (s, i) in multi_character_strings.iter() { + strings_ordered[*i - 0x11_0000] = s.as_str(); + } + for s in strings_ordered { + writeln!(out, "\"{}\",", java_string_escape(s))?; + } + writeln!(out, "}};")?; + let unicode_simple_name = format!("UNICODE_{}_SIMPLE", unicode_version.replace(".", "_")); + let unicode_full_name = format!("UNICODE_{}_FULL", unicode_version.replace(".", "_")); + eq_unicode_simple.dump_java(&mut out, unicode_simple_name.as_str(), None)?; + EqTable::create_diff(&eq_unicode_simple, &eq_js_nu).dump_java(&mut out, "JS_NON_UNICODE", Some(unicode_simple_name.as_str()))?; + EqTable::create_diff(&eq_unicode_simple, &eq_python).dump_java(&mut out, "PYTHON_UNICODE", Some(unicode_simple_name.as_str()))?; + eq_ruby.dump_java_one_way(&mut out, unicode_full_name.as_str(), None)?; + EqTable::create_diff(&eq_ruby, &eq_oracle).dump_java_one_way(&mut out, "ORACLE_DB", Some(unicode_full_name.as_str()))?; + eq_oracle_ai.dump_java_one_way(&mut out, "ORACLE_DB_AI", None)?; + writeln!(out, "public static final CodePointSet FOLDABLE_CHARACTERS = rangeSet({});", list_to_ranges_str(&foldable_chars))?; + + writeln!(out)?; + insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_CASE_FOLD_DATA).as_path(), &out)?; + Ok(()) +} + +fn java_string_escape(s: &str) -> String { + s.chars().map(|c| { + if c == '\\' { + return "\\\\".to_string(); + } + if ' ' <= c && c <= '~' { + return c.to_string(); + } + let mut buf = [0; 2]; + return c.encode_utf16(&mut buf).iter().map(|v| format!("\\u{:04x}", v)).collect::(); + }).collect::() +} + +fn insert_generated_code(path: &Path, code: &Vec) -> Result<()> { + let file_content = fs::read_to_string(path)?; + let pos_begin = file_content.find(GENERATED_CODE_MARKER_BEGIN).expect(format!("generated code begin marker not found in {}", path.to_str().unwrap()).as_str()); + let pos_end = file_content.find(GENERATED_CODE_MARKER_END).expect(format!("generated code end marker not found in {}", path.to_str().unwrap()).as_str()); + let mut f = File::create(path)?; + f.write(file_content[0..pos_begin + GENERATED_CODE_MARKER_BEGIN.len()].as_bytes())?; + f.write(code)?; + f.write(file_content[pos_end..file_content.len()].as_bytes())?; + Ok(()) +} + +fn fetch(url: String) -> Result { + println!("fetching {}", url); + let path = Path::new("tmp").join(Path::new(&Url::parse(url.as_str()).unwrap().path()[1..])); + fs::create_dir_all(path.parent().unwrap()).expect("mkdir failed"); + if path.exists() { + return Ok(fs::read_to_string(path)?); + } + let body = reqwest::blocking::get(url)?.text()?; + fs::write(path, &body).expect("write to download cache failed"); + Ok(body) +} + +fn unicode_table(file: &String) -> Result> { + Ok(csv::ReaderBuilder::new().has_headers(false).delimiter(b';').comment(Some(b'#')).trim(Trim::All).flexible(true).from_reader(file.as_bytes())) +} + +fn unicode_table_cell(record: &StringRecord, i: usize) -> String { + parse_hex_chars(record.get(i).unwrap()) +} + +fn parse_hex_chars(s: &str) -> String { + s.split(' ').map(|c| { + char::from_u32(u32::from_str_radix(c, 16).unwrap()).unwrap() + }).collect::() +} + +enum UnicodeCaseFoldingVariant { + Simple, + Full, +} + +impl UnicodeCaseFoldingVariant { + fn type_name(&self) -> &'static str { + match self { + Simple => { "S" } + Full => { "F" } + } + } +} + +fn parse_case_folding_txt(unicode_case_folding: &String, variant: UnicodeCaseFoldingVariant) -> Result> { + Ok(Vec::from_iter(unicode_table(unicode_case_folding)?.records().flat_map(|result| { + let record = result.ok()?; + let t = record.get(1).unwrap(); + if t == "C" || t == variant.type_name() { + return Some((unicode_table_cell(&record, 0), unicode_table_cell(&record, 2))); + } + None + }))) +} + +fn unicode_case_folding(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for (src, dst) in parse_case_folding_txt(unicode_case_folding, variant)? { + eq_builder.add_equivalence(src.as_str(), dst.as_str()); + } + Ok(eq_builder.create_eq_table()) +} + +fn unicode_case_folding_one_way(unicode_case_folding: &String, multi_character_strings: &mut HashMap, variant: UnicodeCaseFoldingVariant) -> Result { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + let mut mappings: Vec<(IElement, IElement)> = vec![]; + for (src, dst) in parse_case_folding_txt(unicode_case_folding, variant)? { + mappings.push((eq_builder.index(src.as_str()), eq_builder.index(dst.as_str()))); + } + Ok(EqTable::create_one_way_mapping(mappings)) +} + +fn js_non_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { + let mut upper_map: HashMap = HashMap::new(); + for result in unicode_table(unicode_data)?.records() { + let record = result?; + if record.get(12).unwrap() == "" { + // Drop entries without toUppercase mapping + continue; + } + upper_map.insert(unicode_table_cell(&record, 0), unicode_table_cell(&record, 12)); + } + for result in unicode_table(unicode_special_casing)?.records() { + let record = result?; + if record.len() > 5 { + // Drop entries with conditions + continue; + } + upper_map.insert(unicode_table_cell(&record, 0), unicode_table_cell(&record, 3)); + } + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for (chr, upper) in upper_map { + let c = chr.chars().next().unwrap(); + let u = upper.chars().next().unwrap(); + if upper.chars().count() > 1 || u >= '\u{10000}' { + // Only follow rules which give map to a single UTF-16 code unit + continue; + } + if c > '\u{7f}' && u <= '\u{7f}' { + // Do not allow non-ASCII characters to cross into ASCII. + continue; + } + if c == u { + // Drop trivial mappings + continue; + } + eq_builder.add_equivalence(chr.as_str(), upper.as_str()); + } + Ok(eq_builder.create_eq_table()) +} + +fn python_unicode_case_folding(unicode_data: &String, unicode_special_casing: &String, multi_character_strings: &mut HashMap) -> Result { + fn read_data_file_mapping(unicode_data_file: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for result in unicode_table(unicode_data_file)?.records() { + let record = result?; + let dst = record.get(cell_dst).unwrap(); + if dst != "" { + eq_builder.add_equivalence(unicode_table_cell(&record, cell_src).as_str(), parse_hex_chars(dst).as_str()); + } + } + Ok(eq_builder.equivalences) + } + + fn read_special_casing_mapping(unicode_special_casing: &String, multi_character_strings: &mut HashMap, cell_src: usize, cell_dst: usize) -> Result>> { + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + for result in unicode_table(unicode_special_casing)?.records() { + let record = result?; + if record.len() > 5 { + // Drop entries with conditions + continue; + } + let c = unicode_table_cell(&record, cell_src); + let dst = unicode_table_cell(&record, cell_dst); + if dst.chars().count() > 1 { + eq_builder.add_equivalence_src_only(c.as_str(), dst.as_str()); + } + } + Ok(eq_builder.equivalences) + } + + let eq_lower = read_data_file_mapping(unicode_data, multi_character_strings, 0, 12)?; + let eq_upper = read_data_file_mapping(unicode_data, multi_character_strings, 0, 13)?; + let eq_special_lower = read_special_casing_mapping(unicode_special_casing, multi_character_strings, 0, 1)?; + let eq_special_upper = read_special_casing_mapping(unicode_special_casing, multi_character_strings, 0, 3)?; + let merged = Vec::from_iter(merge_eq_classes(merge_eq_classes(eq_lower.values(), eq_special_lower.values()).iter(), merge_eq_classes(eq_upper.values(), eq_special_upper.values()).iter()).iter().map(|set| Vec::from_iter(set.iter().cloned()))); + Ok(EqTable::from_vec(merged)) +} + +struct EquivalenceBuilder<'a> { + multi_character_strings: &'a mut HashMap, + equivalences: HashMap>, +} + +impl EquivalenceBuilder<'_> { + fn new(multi_character_strings: &mut HashMap) -> EquivalenceBuilder { + EquivalenceBuilder { multi_character_strings, equivalences: Default::default() } + } + + fn index(&mut self, s: &str) -> IElement { + if s.chars().count() == 1 { + return s.chars().next().unwrap() as IElement; + } + let next_id = self.multi_character_strings.len() + 0x11_0000; + return *self.multi_character_strings.entry(s.to_string()).or_insert(next_id); + } + + fn add_equivalence(&mut self, a: &str, b: &str) { + let i = self.index(a); + let j = self.index(b); + let buf = self.equivalences.entry(j).or_default(); + if buf.len() == 0 { + buf.push(j); + } + buf.push(i); + } + + fn add_equivalence_src_only(&mut self, a: &str, b: &str) { + let i = self.index(a); + let j = self.index(b); + self.equivalences.entry(j).or_default().push(i); + } + + fn create_eq_table(&mut self) -> EqTable { + EqTable::from_vec(Vec::from_iter(self.equivalences.values().cloned())) + } +} + +fn merge_eq_classes<'a, I, Inner>(a: I, b: I) -> Vec> where I: Iterator, Inner: IntoIterator + Copy + Debug { + let eq_classes_a: Vec> = Vec::from_iter(a.map(|eq_class_a| HashSet::from_iter(eq_class_a.into_iter().cloned()))); + let chars_a_mapped_to_class_index: HashMap = HashMap::from_iter(eq_classes_a.iter().enumerate().flat_map(|(i, set)| { + set.iter().cloned().map(move |v| (v, i)) + })); + let mut eq_class_a_copy = vec![true; eq_classes_a.len()]; + let mut merged_classes: Vec> = b.map(|eq_class_b| { + HashSet::from_iter(eq_class_b.into_iter().flat_map(|char_b: &IElement| chars_a_mapped_to_class_index.get(char_b)).flat_map(|i| { + eq_class_a_copy[*i] = false; + eq_classes_a.get(*i) + }).flatten().cloned().chain(eq_class_b.into_iter().cloned())) + }).collect(); + for (i, copy) in eq_class_a_copy.iter().enumerate() { + if *copy { + merged_classes.push(eq_classes_a.get(i).unwrap().clone()); + } + } + merged_classes +} + +fn oracledb_start_docker_container() { + if String::from_utf8(Command::new("docker").args(["container", "ls", "--filter", "name=oracle-db", "--format", "{{json .Names}}"]).output().expect("docker ls failed").stdout).expect("could not decode output of 'docker ls'").trim() == "\"oracle-db\"" { + return; + } + if String::from_utf8(Command::new("docker").args(["container", "ls", "-a", "--filter", "name=oracle-db", "--format", "{{json .Names}}"]).output().expect("docker ls failed").stdout).expect("could not decode output of 'docker ls'").trim() != "\"oracle-db\"" { + Command::new("docker").args(["run", "-d", "--name", "oracle-db", "-p", "1521:1521", "-p", "5500:5500", "-e", "ORACLE_PWD=passwd", "container-registry.oracle.com/database/express:21.3.0-xe"]).output().expect("docker run failed"); + } + let docker_start = Command::new("docker").args(["start", "oracle-db"]).output().expect("docker start failed"); + if docker_start.status.code().unwrap() != 0 { + println!("{}", String::from_utf8(docker_start.stderr).unwrap()); + panic!("docker start failed"); + } + // wait for db startup + std::thread::sleep(std::time::Duration::from_secs(8)); +} + +fn oracledb_connect() -> std::result::Result { + Connector::new("sys", "passwd", "//localhost/XE").privilege(Privilege::Sysdba).connect().map_err(|error| { + match &error { + oracle::Error::OciError(db_error) => { + if db_error.code() == 12637 { + println!("Could not connect to docker container, you may have to add {{ \"userland-proxy\": false }} to /etc/docker/daemon.json"); + println!("see https://franckpachot.medium.com/19c-instant-client-and-docker-1566630ab20e"); + } + error + } + _ => error + } + }) +} + +fn oracledb_extract_ai_case_fold_table<'a>(multi_character_strings: &mut HashMap) -> Result { + let conn = oracledb_connect()?; + + let mut eq_builder = EquivalenceBuilder::new(multi_character_strings); + let mut mappings: Vec<(IElement, IElement)> = vec![]; + + let query = "select nlssort(:c, 'nls_sort = binary_ai') from dual"; + println!("extracting accent insensitive mappings from OracleDB"); + let mut statement = conn.statement(query).build()?; + for s in (0u32..0xd800).chain(0xe000..0x110000).map(|i| String::from(char::from_u32(i).unwrap())).progress_count(0xd800 + (0x110000 - 0xe000)) { + assert_eq!(s.chars().count(), 1); + let base_chars_bytes = statement.query_row_as::>(&[&s]).unwrap(); + let base_chars_u16: Vec = base_chars_bytes.chunks_exact(2).into_iter().map(|a| u16::from_le_bytes([a[1], a[0]])).collect(); + let base_chars = String::from_utf16(base_chars_u16.as_slice()).unwrap(); + if base_chars != s { + mappings.push((eq_builder.index(s.as_str()), eq_builder.index(base_chars.as_str()))); + } + } + Ok(EqTable::create_one_way_mapping(mappings)) +} + +fn oracledb_create_chars_table(conn: &Connection) -> Result<()> { + match conn.query("select * from chars where v = 0", &[]) { + Ok(_) => { + Ok(()) + } + Err(oracle::Error::OciError(db_error)) if db_error.code() == 942 => { + // table does not exist + conn.execute("create table chars(v int, c varchar2(32))", &[])?; + let query = "insert into chars(v, c) values (:v, :c)"; + let mut statement = conn.batch(query, 0x1000).build()?; + for i in (0u32..0xd800).chain(0xe000..0x110000) { + statement.append_row(&[&i, &String::from(char::from_u32(i).unwrap())])?; + } + statement.execute()?; + conn.commit()?; + Ok(()) + } + Err(e) => Err(e.into()) + } +} + +fn oracledb_generate_posix_char_classes() -> Result<()> { + let conn = oracledb_connect()?; + oracledb_create_chars_table(&conn)?; + let query = "SELECT v from chars WHERE REGEXP_LIKE(c, :r, '') ORDER BY v"; + let mut statement = conn.statement(query).build()?; + let mut out = vec![]; + for name in ["alpha", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit"] { + let mut chars: Vec = vec![]; + for row_result in statement.query_as::(&[&format!("[[:{}:]]", name).as_str()])? { + chars.push(row_result?); + } + writeln!(out, "\n\nPOSIX_CHAR_CLASSES.put(\"{}\", CodePointSet.createNoDedup(", name)?; + writeln!(out, "{}));\n", list_to_ranges_str(&chars))?; + } + insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_ORACLE_DB_CONSTANTS).as_path(), &out)?; + Ok(()) +} + +fn oracledb_generate_tests() -> Result<()> { + enum TestResult { + Match(Vec), + NoMatch, + SyntaxError(String), + } + + fn run_test(statement: &mut Statement, pattern: &str, flags: &str, input: &str, from_index: i32) -> Result { + fn count_groups(pattern: &str) -> i32 { + let mut par_open = 0; + let mut escaped = false; + let mut n = 1; + for c in pattern.chars() { + if !escaped { + if c == '(' { + par_open += 1; + } else if c == ')' { + if par_open > 0 { + par_open -= 1; + n += 1; + } + } + } + escaped = c == '\\'; + } + return min(n, 10); + } + let occurrence = 1; + let n_groups = count_groups(pattern); + let mut groups = vec![]; + for i_group in 0..n_groups { + for start_or_end in [0, 1] { + // explicit type for flags string: the client library will set the data type of strings to NVARCHAR2, but REGEXP_INSTR only accepts VARCHAR or CHAR on the flags parameter + match statement.query_row_as::(&[&input, &pattern, &from_index, &occurrence, &start_or_end, &(&flags, &OracleType::Char(10)), &i_group]) { + Ok(i) => { + if i_group == 0 && i == 0 { + return Ok(TestResult::NoMatch); + } + groups.push(i - 1); + } + Err(oracle::Error::OciError(e)) => { + return Ok(TestResult::SyntaxError(e.message()[(e.message().find(": ").unwrap() + 2)..].to_string())); + } + Err(e) => { + bail!(e); + } + } + } + } + return Ok(TestResult::Match(groups)); + } + + let conn = oracledb_connect()?; + let query = "SELECT REGEXP_INSTR(:input, :pattern, :fromIndex, :occurrence, :startOrEnd, :flags, :iGroup) from dual"; + let mut statement = conn.statement(query).build()?; + let mut out = vec![]; + writeln!(out)?; + for (pattern, flags, input) in [ + ("abracadabra$", "", "abracadabracadabra"), + ("a...b", "", "abababbb"), + ("XXXXXX", "", "..XXXXXX"), + ("\\)", "", "()"), + ("a]", "", "a]a"), + ("}", "", "}"), + ("\\}", "", "}"), + ("\\]", "", "]"), + ("]", "", "]"), + ("]", "", "]"), + ("{", "", "{"), + ("}", "", "}"), + ("^a", "", "ax"), + ("\\^a", "", "a^a"), + ("a\\^", "", "a^"), + ("a$", "", "aa"), + ("a\\$", "", "a$"), + ("a($)", "", "aa"), + ("a*(^a)", "", "aa"), + ("(..)*(...)*", "", "a"), + ("(..)*(...)*", "", "abcd"), + ("(ab|a)(bc|c)", "", "abc"), + ("(ab)c|abc", "", "abc"), + ("a{0}b", "", "ab"), + ("(a*)(b?)(b+)b{3}", "", "aaabbbbbbb"), + ("(a*)(b{0,1})(b{1,})b{3}", "", "aaabbbbbbb"), + ("a{9876543210}", "", "a"), + ("((a|a)|a)", "", "a"), + ("(a*)(a|aa)", "", "aaaa"), + ("a*(a.|aa)", "", "aaaa"), + ("a(b)|c(d)|a(e)f", "", "aef"), + ("(a|b)?.*", "", "b"), + ("(a|b)c|a(b|c)", "", "ac"), + ("(a|b)c|a(b|c)", "", "ab"), + ("(a|b)*c|(a|ab)*c", "", "abc"), + ("(a|b)*c|(a|ab)*c", "", "xc"), + ("(.a|.b).*|.*(.a|.b)", "", "xa"), + ("a?(ab|ba)ab", "", "abab"), + ("a?(ac{0}b|ba)ab", "", "abab"), + ("ab|abab", "", "abbabab"), + ("aba|bab|bba", "", "baaabbbaba"), + ("aba|bab", "", "baaabbbaba"), + ("(aa|aaa)*|(a|aaaaa)", "", "aa"), + ("(a.|.a.)*|(a|.a...)", "", "aa"), + ("ab|a", "", "xabc"), + ("ab|a", "", "xxabc"), + ("(Ab|cD)*", "", "aBcD"), + ("[^-]", "", "--a"), + ("[a-]*", "", "--a"), + ("[a-m-]*", "", "--amoma--"), + (":::1:::0:|:::1:1:0:", "", ":::0:::1:::1:::0:"), + (":::1:::0:|:::1:1:1:", "", ":::0:::1:::1:::0:"), + ("[[:upper:]]", "", "A"), + ("[[:lower:]]+", "", "`az{"), + ("[[:upper:]]+", "", "@AZ["), + ("[[-]]", "", "[[-]]"), + ("\\n", "", "\\n"), + ("\\n", "", "\\n"), + ("[^a]", "", "\\n"), + ("\\na", "", "\\na"), + ("(a)(b)(c)", "", "abc"), + ("xxx", "", "xxx"), + ("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 6,"), + ("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "2/7"), + ("(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\\* */?)0*[6-7]))([^0-9]|$)", "", "feb 1,Feb 6"), + ("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", "", "x"), + ("((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", "", "xx"), + ("a?(ab|ba)*", "", "ababababababababababababababababababababababababababababababababababababababababa"), + ("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabbbbaa"), + ("abaa|abbaa|abbbaa|abbbbaa", "", "ababbabbbabbbabbbbabaa"), + ("aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", "", "baaabbbabac"), + (".*", "", "\\x01\\xff"), + ("aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa"), + ("aaaa\\nbbbb\\ncccc\\nddddd\\neeeeee\\nfffffff\\ngggg\\nhhhh\\niiiii\\njjjjj\\nkkkkk\\nllll", "", "XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa"), + ("a*a*a*a*a*b", "", "aaaaaaaaab"), + ("^", "", "a"), + ("$", "", "a"), + ("^$", "", "a"), + ("^a$", "", "a"), + ("abc", "", "abc"), + ("abc", "", "xabcy"), + ("abc", "", "ababc"), + ("ab*c", "", "abc"), + ("ab*bc", "", "abc"), + ("ab*bc", "", "abbc"), + ("ab*bc", "", "abbbbc"), + ("ab+bc", "", "abbc"), + ("ab+bc", "", "abbbbc"), + ("ab?bc", "", "abbc"), + ("ab?bc", "", "abc"), + ("ab?c", "", "abc"), + ("^abc$", "", "abc"), + ("^abc", "", "abcc"), + ("abc$", "", "aabc"), + ("^", "", "abc"), + ("$", "", "abc"), + ("a.c", "", "abc"), + ("a.c", "", "axc"), + ("a.*c", "", "axyzc"), + ("a[bc]d", "", "abd"), + ("a[b-d]e", "", "ace"), + ("a[b-d]", "", "aac"), + ("a[-b]", "", "a-"), + ("a[b-]", "", "a-"), + ("a]", "", "a]"), + ("a[]]b", "", "a]b"), + ("a[^bc]d", "", "aed"), + ("a[^-b]c", "", "adc"), + ("a[^]b]c", "", "adc"), + ("ab|cd", "", "abc"), + ("ab|cd", "", "abcd"), + ("a\\(b", "", "a(b"), + ("a\\(*b", "", "ab"), + ("a\\(*b", "", "a((b"), + ("((a))", "", "abc"), + ("(a)b(c)", "", "abc"), + ("a+b+c", "", "aabbabc"), + ("a*", "", "aaa"), + ("(a*)*", "", "-"), + ("(a*)+", "", "-"), + ("(a*|b)*", "", "-"), + ("(a+|b)*", "", "ab"), + ("(a+|b)+", "", "ab"), + ("(a+|b)?", "", "ab"), + ("[^ab]*", "", "cde"), + ("(^)*", "", "-"), + ("a*", "", "a"), + ("([abc])*d", "", "abbbcd"), + ("([abc])*bcd", "", "abcd"), + ("a|b|c|d|e", "", "e"), + ("(a|b|c|d|e)f", "", "ef"), + ("((a*|b))*", "", "-"), + ("abcd*efg", "", "abcdefg"), + ("ab*", "", "xabyabbbz"), + ("ab*", "", "xayabbbz"), + ("(ab|cd)e", "", "abcde"), + ("[abhgefdc]ij", "", "hij"), + ("(a|b)c*d", "", "abcd"), + ("(ab|ab*)bc", "", "abc"), + ("a([bc]*)c*", "", "abc"), + ("a([bc]*)(c*d)", "", "abcd"), + ("a([bc]+)(c*d)", "", "abcd"), + ("a([bc]*)(c+d)", "", "abcd"), + ("a[bcd]*dcdcde", "", "adcdcde"), + ("(ab|a)b*c", "", "abc"), + ("((a)(b)c)(d)", "", "abcd"), + ("[A-Za-z_][A-Za-z0-9_]*", "", "alpha"), + ("^a(bc+|b[eh])g|.h$", "", "abh"), + ("(bc+d$|ef*g.|h?i(j|k))", "", "effgz"), + ("(bc+d$|ef*g.|h?i(j|k))", "", "ij"), + ("(bc+d$|ef*g.|h?i(j|k))", "", "reffgz"), + ("(((((((((a)))))))))", "", "a"), + ("multiple words", "", "multiple words yeah"), + ("(.*)c(.*)", "", "abcde"), + ("abcd", "", "abcd"), + ("a(bc)d", "", "abcd"), + ("a[\u{0001}-\u{0003}]?c", "", "a\u{0002}c"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mo'ammar Gadhafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Kaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Qadhafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gadafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moamar Gaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Mu'ammar Qadhdhafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Khaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafy"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghadafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Ghaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muamar Kaddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Quathafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Muammar Gheddafi"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Khadafy"), + ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "", "Moammar Qudhafi"), + ("a+(b|c)*d+", "", "aabcdd"), + ("^.+$", "", "vivi"), + ("^(.+)$", "", "vivi"), + ("^([^!.]+).att.com!(.+)$", "", "gryphon.att.com!eby"), + ("^([^!]+!)?([^!]+)$", "", "bas"), + ("^([^!]+!)?([^!]+)$", "", "bar!bas"), + ("^([^!]+!)?([^!]+)$", "", "foo!bas"), + ("^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas"), + ("((foo)|(bar))!bas", "", "bar!bas"), + ("((foo)|(bar))!bas", "", "foo!bar!bas"), + ("((foo)|(bar))!bas", "", "foo!bas"), + ("((foo)|bar)!bas", "", "bar!bas"), + ("((foo)|bar)!bas", "", "foo!bar!bas"), + ("((foo)|bar)!bas", "", "foo!bas"), + ("(foo|(bar))!bas", "", "bar!bas"), + ("(foo|(bar))!bas", "", "foo!bar!bas"), + ("(foo|(bar))!bas", "", "foo!bas"), + ("(foo|bar)!bas", "", "bar!bas"), + ("(foo|bar)!bas", "", "foo!bar!bas"), + ("(foo|bar)!bas", "", "foo!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "bar!bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bar!bas"), + ("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", "", "foo!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "bar!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bar!bas"), + ("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", "", "foo!bas"), + (".*(/XXX).*", "", "/XXX"), + (".*(\\\\XXX).*", "", "\\XXX"), + ("\\\\XXX", "", "\\XXX"), + (".*(/000).*", "", "/000"), + (".*(\\\\000).*", "", "\\000"), + ("\\\\000", "", "\\000"), + ("aa*", "", "xaxaax"), + ("(a*)(ab)*(b*)", "", "abc"), + ("(a*)(ab)*(b*)", "", "abc"), + ("((a*)(ab)*)((b*)(a*))", "", "aba"), + ("((a*)(ab)*)((b*)(a*))", "", "aba"), + ("(...?.?)*", "", "xxxxxx"), + ("(...?.?)*", "", "xxxxxx"), + ("(...?.?)*", "", "xxxxxx"), + ("(a|ab)(bc|c)", "", "abcabc"), + ("(a|ab)(bc|c)", "", "abcabc"), + ("(aba|a*b)(aba|a*b)", "", "ababa"), + ("(aba|a*b)(aba|a*b)", "", "ababa"), + ("a(b)*\\1", "", "a"), + ("a(b)*\\1", "", "a"), + ("a(b)*\\1", "", "abab"), + ("(a*){2}", "", "xxxxx"), + ("(a*){2}", "", "xxxxx"), + ("a(b)*\\1", "", "abab"), + ("a(b)*\\1", "", "abab"), + ("a(b)*\\1", "", "abab"), + ("(a*)*", "", "a"), + ("(a*)*", "", "ax"), + ("(a*)*", "", "a"), + ("(aba|a*b)*", "", "ababa"), + ("(aba|a*b)*", "", "ababa"), + ("(aba|a*b)*", "", "ababa"), + ("(a(b)?)+", "", "aba"), + ("(a(b)?)+", "", "aba"), + ("(a(b)*)*\\2", "", "abab"), + ("(a(b)*)*\\2", "", "abab"), + ("(a?)((ab)?)(b?)a?(ab)?b?", "", "abab"), + (".*(.*)", "", "ab"), + (".*(.*)", "", "ab"), + ("(a|ab)(c|bcd)", "", "abcd"), + ("(a|ab)(bcd|c)", "", "abcd"), + ("(ab|a)(c|bcd)", "", "abcd"), + ("(ab|a)(bcd|c)", "", "abcd"), + ("((a|ab)(c|bcd))(d*)", "", "abcd"), + ("((a|ab)(bcd|c))(d*)", "", "abcd"), + ("((ab|a)(c|bcd))(d*)", "", "abcd"), + ("((ab|a)(bcd|c))(d*)", "", "abcd"), + ("(a|ab)((c|bcd)(d*))", "", "abcd"), + ("(a|ab)((bcd|c)(d*))", "", "abcd"), + ("(ab|a)((c|bcd)(d*))", "", "abcd"), + ("(ab|a)((bcd|c)(d*))", "", "abcd"), + ("(a*)(b|abc)", "", "abc"), + ("(a*)(abc|b)", "", "abc"), + ("((a*)(b|abc))(c*)", "", "abc"), + ("((a*)(abc|b))(c*)", "", "abc"), + ("(a*)((b|abc)(c*))", "", "abc"), + ("(a*)((abc|b)(c*))", "", "abc"), + ("(a*)(b|abc)", "", "abc"), + ("(a*)(abc|b)", "", "abc"), + ("((a*)(b|abc))(c*)", "", "abc"), + ("((a*)(abc|b))(c*)", "", "abc"), + ("(a*)((b|abc)(c*))", "", "abc"), + ("(a*)((abc|b)(c*))", "", "abc"), + ("(a|ab)", "", "ab"), + ("(ab|a)", "", "ab"), + ("(a|ab)(b*)", "", "ab"), + ("(ab|a)(b*)", "", "ab"), + ("a+", "", "xaax"), + (".(a*).", "", "xaax"), + ("(a?)((ab)?)", "", "ab"), + ("(a?)((ab)?)(b?)", "", "ab"), + ("((a?)((ab)?))(b?)", "", "ab"), + ("(a?)(((ab)?)(b?))", "", "ab"), + ("(.?)", "", "x"), + ("(.?){1}", "", "x"), + ("(.?)(.?)", "", "x"), + ("(.?){2}", "", "x"), + ("(.?)*", "", "x"), + ("(.?.?)", "", "xxx"), + ("(.?.?){1}", "", "xxx"), + ("(.?.?)(.?.?)", "", "xxx"), + ("(.?.?){2}", "", "xxx"), + ("(.?.?)(.?.?)(.?.?)", "", "xxx"), + ("(.?.?){3}", "", "xxx"), + ("(.?.?)*", "", "xxx"), + ("a?((ab)?)(b?)", "", "ab"), + ("(a?)((ab)?)b?", "", "ab"), + ("a?((ab)?)b?", "", "ab"), + ("(a*){2}", "", "xxxxx"), + ("(ab?)(b?a)", "", "aba"), + ("(a|ab)(ba|a)", "", "aba"), + ("(a|ab|ba)", "", "aba"), + ("(a|ab|ba)(a|ab|ba)", "", "aba"), + ("(a|ab|ba)*", "", "aba"), + ("(aba|a*b)", "", "ababa"), + ("(aba|a*b)(aba|a*b)", "", "ababa"), + ("(aba|a*b)*", "", "ababa"), + ("(aba|ab|a)", "", "ababa"), + ("(aba|ab|a)(aba|ab|a)", "", "ababa"), + ("(aba|ab|a)*", "", "ababa"), + ("(a(b)?)", "", "aba"), + ("(a(b)?)(a(b)?)", "", "aba"), + ("(a(b)?)+", "", "aba"), + ("(.*)(.*)", "", "xx"), + (".*(.*)", "", "xx"), + ("(a.*z|b.*y)", "", "azbazby"), + ("(a.*z|b.*y)(a.*z|b.*y)", "", "azbazby"), + ("(a.*z|b.*y)*", "", "azbazby"), + ("(.|..)(.*)", "", "ab"), + ("((..)*(...)*)", "", "xxx"), + ("((..)*(...)*)((..)*(...)*)", "", "xxx"), + ("((..)*(...)*)*", "", "xxx"), + ("(a{0,1})*b\\1", "", "ab"), + ("(a*)*b\\1", "", "ab"), + ("(a*)b\\1*", "", "ab"), + ("(a*)*b\\1*", "", "ab"), + ("(a{0,1})*b(\\1)", "", "ab"), + ("(a*)*b(\\1)", "", "ab"), + ("(a*)b(\\1)*", "", "ab"), + ("(a*)*b(\\1)*", "", "ab"), + ("(a{0,1})*b\\1", "", "aba"), + ("(a*)*b\\1", "", "aba"), + ("(a*)b\\1*", "", "aba"), + ("(a*)*b\\1*", "", "aba"), + ("(a*)*b(\\1)*", "", "aba"), + ("(a{0,1})*b\\1", "", "abaa"), + ("(a*)*b\\1", "", "abaa"), + ("(a*)b\\1*", "", "abaa"), + ("(a*)*b\\1*", "", "abaa"), + ("(a*)*b(\\1)*", "", "abaa"), + // ("(a{0,1})*b\\1", "", "aab"), LXR bug + ("(a*)*b\\1", "", "aab"), + ("(a*)b\\1*", "", "aab"), + ("(a*)*b\\1*", "", "aab"), + ("(a*)*b(\\1)*", "", "aab"), + // ("(a{0,1})*b\\1", "", "aaba"), LXR bug + ("(a*)*b\\1", "", "aaba"), + ("(a*)b\\1*", "", "aaba"), + ("(a*)*b\\1*", "", "aaba"), + ("(a*)*b(\\1)*", "", "aaba"), + // ("(a{0,1})*b\\1", "", "aabaa"), LXR bug + ("(a*)*b\\1", "", "aabaa"), + ("(a*)b\\1*", "", "aabaa"), + ("(a*)*b\\1*", "", "aabaa"), + ("(a*)*b(\\1)*", "", "aabaa"), + ("(x)*a\\1", "", "a"), + ("(x)*a\\1*", "", "a"), + ("(x)*a(\\1)", "", "a"), + ("(x)*a(\\1)*", "", "a"), + ("(aa(b(b))?)+", "", "aabbaa"), + ("(a(b)?)+", "", "aba"), + ("([ab]+)([bc]+)([cd]*)", "", "abcd"), + ("([ab]*)([bc]*)([cd]*)\\1", "", "abcdaa"), + ("([ab]*)([bc]*)([cd]*)\\1", "", "abcdab"), + ("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdaa"), + ("([ab]*)([bc]*)([cd]*)\\1*", "", "abcdab"), + ("^(A([^B]*))?(B(.*))?", "", "Aa"), + ("^(A([^B]*))?(B(.*))?", "", "Bb"), + (".*([AB]).*\\1", "", "ABA"), + ("[^A]*A", "", "\\nA"), + ("(a|ab)(c|bcd)(d*)", "", "abcd"), + ("(a|ab)(bcd|c)(d*)", "", "abcd"), + ("(ab|a)(c|bcd)(d*)", "", "abcd"), + ("(ab|a)(bcd|c)(d*)", "", "abcd"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a|ab)(c|bcd)(d|.*)", "", "abcd"), + ("(a|ab)(bcd|c)(d|.*)", "", "abcd"), + ("(ab|a)(c|bcd)(d|.*)", "", "abcd"), + ("(ab|a)(bcd|c)(d|.*)", "", "abcd"), + ("(a*)*", "", "a"), + ("(a*)*", "", "x"), + ("(a*)*", "", "aaaaaa"), + ("(a*)*", "", "aaaaaax"), + ("(a*)+", "", "a"), + ("(a*)+", "", "x"), + ("(a*)+", "", "aaaaaa"), + ("(a*)+", "", "aaaaaax"), + ("(a+)*", "", "a"), + ("(a+)*", "", "x"), + ("(a+)*", "", "aaaaaa"), + ("(a+)*", "", "aaaaaax"), + ("(a+)+", "", "a"), + ("(a+)+", "", "x"), + ("(a+)+", "", "aaaaaa"), + ("(a+)+", "", "aaaaaax"), + ("([a]*)*", "", "a"), + ("([a]*)*", "", "x"), + ("([a]*)*", "", "aaaaaa"), + ("([a]*)*", "", "aaaaaax"), + ("([a]*)+", "", "a"), + ("([a]*)+", "", "x"), + ("([a]*)+", "", "aaaaaa"), + ("([a]*)+", "", "aaaaaax"), + ("([^b]*)*", "", "a"), + ("([^b]*)*", "", "b"), + ("([^b]*)*", "", "aaaaaa"), + ("([^b]*)*", "", "aaaaaab"), + ("([ab]*)*", "", "a"), + ("([ab]*)*", "", "aaaaaa"), + ("([ab]*)*", "", "ababab"), + ("([ab]*)*", "", "bababa"), + ("([ab]*)*", "", "b"), + ("([ab]*)*", "", "bbbbbb"), + ("([ab]*)*", "", "aaaabcde"), + ("([^a]*)*", "", "b"), + ("([^a]*)*", "", "bbbbbb"), + ("([^a]*)*", "", "aaaaaa"), + ("([^ab]*)*", "", "ccccxx"), + ("([^ab]*)*", "", "ababab"), + ("((z)+|a)*", "", "zabcde"), + ("a+?", "", "aaaaaa"), + ("(a)", "", "aaa"), + ("(a*?)", "", "aaa"), + ("(a)*?", "", "aaa"), + ("(a*?)*?", "", "aaa"), + ("(a*)*(x)", "", "x"), + ("(a*)*(x)", "", "ax"), + ("(a*)*(x)", "", "axa"), + ("(a*)*(x)(\\1)", "", "x"), + ("(a*)*(x)(\\1)", "", "ax"), + ("(a*)*(x)(\\1)", "", "axa"), + ("(a*)*(x)(\\1)(x)", "", "axax"), + ("(a*)*(x)(\\1)(x)", "", "axxa"), + ("(a*)*(x)", "", "x"), + ("(a*)*(x)", "", "ax"), + ("(a*)*(x)", "", "axa"), + ("(a*)+(x)", "", "x"), + ("(a*)+(x)", "", "ax"), + ("(a*)+(x)", "", "axa"), + ("(a*){2}(x)", "", "x"), + ("(a*){2}(x)", "", "ax"), + ("(a*){2}(x)", "", "axa"), + ("((..)|(.))", "", "a"), + ("((..)|(.))((..)|(.))", "", "a"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "a"), + ("((..)|(.)){1}", "", "a"), + ("((..)|(.)){2}", "", "a"), + ("((..)|(.)){3}", "", "a"), + ("((..)|(.))*", "", "a"), + ("((..)|(.))", "", "aa"), + ("((..)|(.))((..)|(.))", "", "aa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aa"), + ("((..)|(.)){1}", "", "aa"), + ("((..)|(.)){2}", "", "aa"), + ("((..)|(.)){3}", "", "aa"), + ("((..)|(.))*", "", "aa"), + ("((..)|(.))", "", "aaa"), + ("((..)|(.))((..)|(.))", "", "aaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaa"), + ("((..)|(.)){1}", "", "aaa"), + ("((..)|(.)){2}", "", "aaa"), + ("((..)|(.)){3}", "", "aaa"), + ("((..)|(.))*", "", "aaa"), + ("((..)|(.))", "", "aaaa"), + ("((..)|(.))((..)|(.))", "", "aaaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaaa"), + ("((..)|(.)){1}", "", "aaaa"), + ("((..)|(.)){2}", "", "aaaa"), + ("((..)|(.)){3}", "", "aaaa"), + ("((..)|(.))*", "", "aaaa"), + ("((..)|(.))", "", "aaaaa"), + ("((..)|(.))((..)|(.))", "", "aaaaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaa"), + ("((..)|(.)){1}", "", "aaaaa"), + ("((..)|(.)){2}", "", "aaaaa"), + ("((..)|(.)){3}", "", "aaaaa"), + ("((..)|(.))*", "", "aaaaa"), + ("((..)|(.))", "", "aaaaaa"), + ("((..)|(.))((..)|(.))", "", "aaaaaa"), + ("((..)|(.))((..)|(.))((..)|(.))", "", "aaaaaa"), + ("((..)|(.)){1}", "", "aaaaaa"), + ("((..)|(.)){2}", "", "aaaaaa"), + ("((..)|(.)){3}", "", "aaaaaa"), + ("((..)|(.))*", "", "aaaaaa"), + ("X(.?){0,}Y", "", "X1234567Y"), + ("X(.?){1,}Y", "", "X1234567Y"), + ("X(.?){2,}Y", "", "X1234567Y"), + ("X(.?){3,}Y", "", "X1234567Y"), + ("X(.?){4,}Y", "", "X1234567Y"), + ("X(.?){5,}Y", "", "X1234567Y"), + ("X(.?){6,}Y", "", "X1234567Y"), + ("X(.?){7,}Y", "", "X1234567Y"), + ("X(.?){8,}Y", "", "X1234567Y"), + ("X(.?){0,8}Y", "", "X1234567Y"), + ("X(.?){1,8}Y", "", "X1234567Y"), + ("X(.?){2,8}Y", "", "X1234567Y"), + ("X(.?){3,8}Y", "", "X1234567Y"), + ("X(.?){4,8}Y", "", "X1234567Y"), + ("X(.?){5,8}Y", "", "X1234567Y"), + ("X(.?){6,8}Y", "", "X1234567Y"), + ("X(.?){7,8}Y", "", "X1234567Y"), + ("X(.?){8,8}Y", "", "X1234567Y"), + ("(a|ab|c|bcd){0,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){1,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){2,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){3,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){4,}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){0,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){1,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){2,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){3,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd){4,10}(d*)", "", "ababcd"), + ("(a|ab|c|bcd)*(d*)", "", "ababcd"), + ("(a|ab|c|bcd)+(d*)", "", "ababcd"), + ("(ab|a|c|bcd){0,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){1,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){2,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){3,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){4,}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){0,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){1,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){2,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){3,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd){4,10}(d*)", "", "ababcd"), + ("(ab|a|c|bcd)*(d*)", "", "ababcd"), + ("(ab|a|c|bcd)+(d*)", "", "ababcd"), + ("(a|ab)(c|bcd)(d*)", "", "abcd"), + ("(a|ab)(bcd|c)(d*)", "", "abcd"), + ("(ab|a)(c|bcd)(d*)", "", "abcd"), + ("(ab|a)(bcd|c)(d*)", "", "abcd"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a|ab)(c|bcd)(d|.*)", "", "abcd"), + ("(a|ab)(bcd|c)(d|.*)", "", "abcd"), + ("(ab|a)(c|bcd)(d|.*)", "", "abcd"), + ("(ab|a)(bcd|c)(d|.*)", "", "abcd"), + ("(a|ab)(c|bcd)(d*)", "", "abcd"), + ("(a|ab)(bcd|c)(d*)", "", "abcd"), + ("(ab|a)(c|bcd)(d*)", "", "abcd"), + ("(ab|a)(bcd|c)(d*)", "", "abcd"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a*)(b|abc)(c*)", "", "abc"), + ("(a*)(abc|b)(c*)", "", "abc"), + ("(a|ab)(c|bcd)(d|.*)", "", "abcd"), + ("(a|ab)(bcd|c)(d|.*)", "", "abcd"), + ("(ab|a)(c|bcd)(d|.*)", "", "abcd"), + ("(ab|a)(bcd|c)(d|.*)", "", "abcd"), + ("\u{fb00}", "i", "FF"), + ("(\u{fb00})\\1", "i", "FFFF"), + ("(\u{fb00})\\1", "i", "FF\u{fb00}"), + ("(\u{fb00})\\1", "i", "\u{fb00}FF"), + ("\u{fb01}", "i", "FI"), + ("(\u{fb01})\\1", "i", "FIFI"), + ("\u{fb02}", "i", "FL"), + ("\u{fb03}", "i", "FFI"), + ("\u{fb04}", "i", "FFL"), + ("\u{fb00}I", "i", "\u{fb03}"), + ("\u{fb03}", "i", "\u{fb00}I"), + ("F\u{fb01}", "i", "\u{fb03}"), + ("\u{fb03}", "i", "F\u{fb01}"), + ("\u{fb00}L", "i", "\u{fb04}"), + ("\u{fb04}", "i", "\u{fb00}L"), + ("F\u{fb02}", "i", "\u{fb04}"), + ("\u{fb04}", "i", "F\u{fb02}"), + ("[\u{fb04}[=a=]o]+", "i", "F\u{fb02}aÄö"), + ("\u{1f50}", "i", "\u{03c5}\u{0313}"), + ("\u{1f52}", "i", "\u{03c5}\u{0313}\u{0300}"), + ("\u{1f54}", "i", "\u{03c5}\u{0313}\u{0301}"), + ("\u{1f56}", "i", "\u{03c5}\u{0313}\u{0342}"), + ("\u{1f50}\u{0300}", "i", "\u{1f52}"), + ("\u{1f52}", "i", "\u{1f50}\u{0300}"), + ("\u{1f50}\u{0301}", "i", "\u{1f54}"), + ("\u{1f54}", "i", "\u{1f50}\u{0301}"), + ("\u{1f50}\u{0342}", "i", "\u{1f56}"), + ("\u{1f56}", "i", "\u{1f50}\u{0342}"), + ("\u{1fb6}", "i", "\u{03b1}\u{0342}"), + ("\u{1fb7}", "i", "\u{03b1}\u{0342}\u{03b9}"), + ("\u{1fb6}\u{03b9}", "i", "\u{1fb7}"), + ("\u{1fb7}", "i", "\u{1fb6}\u{03b9}"), + ("\u{1fc6}", "i", "\u{03b7}\u{0342}"), + ("\u{1fc7}", "i", "\u{03b7}\u{0342}\u{03b9}"), + ("\u{1fc6}\u{03b9}", "i", "\u{1fc7}"), + ("\u{1fc7}", "i", "\u{1fc6}\u{03b9}"), + ("\u{1ff6}", "i", "\u{03c9}\u{0342}"), + ("\u{1ff7}", "i", "\u{03c9}\u{0342}\u{03b9}"), + ("\u{1ff6}\u{03b9}", "i", "\u{1ff7}"), + ("\u{1ff7}", "i", "\u{1ff6}\u{03b9}"), + ("f*", "i", "ff"), + ("f*", "i", "\u{fb00}"), + ("f+", "i", "ff"), + ("f+", "i", "\u{fb00}"), + ("f{1,}", "i", "ff"), + ("f{1,}", "i", "\u{fb00}"), + ("f{1,2}", "i", "ff"), + ("f{1,2}", "i", "\u{fb00}"), + ("f{,2}", "i", "ff"), + ("f{,2}", "i", "\u{fb00}"), + ("ff?", "i", "ff"), + ("ff?", "i", "\u{fb00}"), + ("f{2}", "i", "ff"), + ("f{2}", "i", "\u{fb00}"), + ("f{2,2}", "i", "ff"), + ("f{2,2}", "i", "\u{fb00}"), + ("K", "i", "\u{212a}"), + ("k", "i", "\u{212a}"), + ("\\w", "i", "\u{212a}"), + ("\\W", "i", "\u{212a}"), + ("[\\w]", "i", "\u{212a}"), + ("[\\w]+", "i", "a\\wWc"), + ("[\\W]+", "i", "a\\wWc"), + ("[\\d]+", "i", "0\\dD9"), + ("[\\D]+", "i", "a\\dDc"), + ("[\\s]+", "i", " \\sS\t"), + ("[\\S]+", "i", " \\sS\t"), + ("[kx]", "i", "\u{212a}"), + ("ff", "i", "\u{fb00}"), + ("[f]f", "i", "\u{fb00}"), + ("f[f]", "i", "\u{fb00}"), + ("[f][f]", "i", "\u{fb00}"), + ("(?:f)f", "i", "\u{fb00}"), + ("f(?:f)", "i", "\u{fb00}"), + ("(?:f)(?:f)", "i", "\u{fb00}"), + ("\\A[\u{fb00}]\\z", "i", "\u{fb00}"), + ("\\A[\u{fb00}]\\z", "i", "ff"), + ("\\A[^\u{fb00}]\\z", "i", "\u{fb00}"), + ("\\A[^\u{fb00}]\\z", "i", "ff"), + ("\\A[^[^\u{fb00}]]\\z", "i", "\u{fb00}"), + ("\\A[^[^\u{fb00}]]\\z", "i", "ff"), + ("\\A[[^[^\u{fb00}]]]\\z", "i", "\u{fb00}"), + ("\\A[[^[^\u{fb00}]]]\\z", "i", "ff"), + ("[^a-c]", "i", "A"), + ("[[^a-c]]", "i", "A"), + ("[^a]", "i", "a"), + ("[[^a]]", "i", "a"), + ("\\A\\W\\z", "i", "\u{fb00}"), + ("\\A\\W\\z", "i", "ff"), + ("\\A[\\p{L}]\\z", "i", "\u{fb00}"), + ("\\A[\\p{L}]\\z", "i", "ff"), + ("\\A\\W\\z", "i", "\u{fb03}"), + ("\\A\\W\\z", "i", "ffi"), + ("\\A\\W\\z", "i", "\u{fb00}i"), + ("\\A[\\p{L}]\\z", "i", "\u{fb03}"), + ("\\A[\\p{L}]\\z", "i", "ffi"), + ("\\A[\\p{L}]\\z", "i", "\u{fb00}i"), + ("([[=a=]])\\1", "i", "aA"), + ("([[=a=]])\\1", "i", "Aa"), + ("([[=a=]])\\1", "i", "a\u{00e4}"), + ("([[=a=]])\\1", "i", "a\u{00c4}"), + ("([[=a=]])\\1", "i", "\u{00e4}a"), + ("([[=a=]])\\1", "i", "\u{00c4}a"), + ("([[=a=]])\\1", "i", "\u{00c4}A"), + ("[[=a=]o]+", "i", "\u{00e4}O\u{00f6}"), + ("[[=a=]o]+", "i", "\u{00e4}O\u{00f6}"), + ("[[=\u{00df}=]o]+", "i", "s"), + ("[[=\u{00df}=]o]+", "i", "ss"), + ("[[=\u{00df}=]o]+", "", "s"), + ("[[=\u{00df}=]o]+", "", "ss"), + ("[\u{0132}]+", "", "ij"), + ("[\u{0132}]+", "i", "ij"), + ("[[=\u{0132}=]]+", "", "ij"), + ("[[=\u{0132}=]o]+", "", "ij"), + ("[[=\u{0132}=]o]+", "i", "ij"), + ("[\\s-r]+", "", "\\stu"), + ("[\\s-v]+", "", "\\stu"), + ] { + let from_index = 1; + let e_pattern = java_string_escape(pattern); + let e_input = java_string_escape(input); + match run_test(&mut statement, &pattern, &flags, &input, from_index)? { + TestResult::Match(groups) => { + writeln!(out, "test(\"{}\", \"{}\", \"{}\", {}, true, {});", e_pattern, flags, e_input, from_index - 1, groups.iter().map(|v| format!("{}", v)).collect::>().join(", "))?; + } + TestResult::NoMatch => { + writeln!(out, "test(\"{}\", \"{}\", \"{}\", {}, false);", e_pattern, flags, e_input, from_index - 1)?; + } + TestResult::SyntaxError(message) => { + writeln!(out, "expectSyntaxError(\"{}\", \"{}\", \"{}\");", e_pattern, flags, java_string_escape(message.as_str()))?; + } + } + } + insert_generated_code(Path::new(PATH_GRAAL_REPO).join(PATH_ORACLE_DB_TESTS).as_path(), &out)?; + Ok(()) +} diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj b/regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj deleted file mode 100755 index 97f2d75dfaf1..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_case_fold_table.clj +++ /dev/null @@ -1,406 +0,0 @@ -; ------------------------------------------------------------------------------ -; Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. -; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -; -; The Universal Permissive License (UPL), Version 1.0 -; -; Subject to the condition set forth below, permission is hereby granted to any -; person obtaining a copy of this software, associated documentation and/or -; data (collectively the "Software"), free of charge and under any and all -; copyright rights in the Software, and any and all patent rights owned or -; freely licensable by each licensor hereunder covering either (i) the -; unmodified Software as contributed to or provided by such licensor, or (ii) -; the Larger Works (as defined below), to deal in both -; -; (a) the Software, and -; -; (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -; one is included with the Software each a "Larger Work" to which the Software -; is contributed by such licensors), -; -; without restriction, including without limitation the rights to copy, create -; derivative works of, display, perform, and distribute the Software and make, -; use, sell, offer for sale, import, export, have made, and have sold the -; Software and the Larger Work(s), and to sublicense the foregoing rights on -; either these or other terms. -; -; This license is subject to the following condition: -; -; The above copyright notice and either this complete permission notice or at a -; minimum a reference to the UPL must be included in all copies or substantial -; portions of the Software. -; -; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -; SOFTWARE. -; ------------------------------------------------------------------------------ - -;; In order to run this script, install Boot as described in -;; https://github.com/boot-clj/boot#install or simply evaluate the code below in -;; any Clojure REPL and then call the `-main` function. - -;; This script assumes that the current working directory contains a folder "dat" -;; with the files NonUnicodeFoldTable.txt, UnicodeFoldTable.txt, -;; PythonFoldTable.txt and CaseFolding.txt. - -(ns generate-case-fold-table - (:require [clojure.set :as set] - [clojure.string :as str])) - -(defn pairwise - "Given a sequence `x_1`, `x_2`, `x_3`..., returns the sequence of pairs `[x_1 x_2]`, `[x_2 x_3]`..." - [xs] - (map vector xs (rest xs))) - -(defn parse-hex - [hex-string] - (Long/parseLong hex-string 16)) - -(defn parse-relation-file - "Parses a binary relation from the file at `path` and returns it as a sorted - set." - [path] - (into (sorted-set) - (apply concat - (for [line (str/split-lines (slurp path))] - (let [codepoints-str (str/split line #";") - codepoints (map parse-hex codepoints-str)] - (pairwise codepoints)))))) - -(defn parse-case-folding-file - "Parses Unicode's CaseFolding.txt from the file at `path` and returns it as a - sorted set." - [path] - (into (sorted-set) - (for [line (str/split-lines (slurp path)) - :when (not (or (str/blank? line) (str/starts-with? line "#"))) - :let [[code status mapping] (str/split line #"\s*;\s*")] - :when (#{"C" "S"} status)] - [(parse-hex code) (parse-hex mapping)]))) - -(defn maps-to - "Given a binary relation `rel`, represented as a sorted set, finds the set of - elements Y such that X `rel` Y." - [rel elem] - (map second (subseq rel > [elem 0] < [(inc elem) 0]))) - -(defn swap - "Swaps the elements in a pair." - [[a b]] - [b a]) - -(defn remove-reflexive-entries - "Remove pairs of the form [x x] from a given set of pairs." - [rel] - (set/select #(not= (first %) (second %)) rel)) - -(defn symmetric-closure - "Calculates the symmetric closure of the binary relation `rel`." - [rel] - (let [symmetric-rel (into (sorted-set) (map swap rel))] - (set/union rel symmetric-rel))) - -(defn load-relation - "Loads an equivalence relation from a file and makes it symmetric. - - We do not want a reflexive closure because we want the entries in the relation - to correspond to equivalent pairs that still need to be encoded in the case - fold table (and we do not want to include reflexive entries in the case fold - table). We do not need transitivity because we handle all equivalence classes - of size larger than 3 in the first step, `extract-large-classes`, and in that - step we traverse the relation graph recursively." - [path] - (-> path - parse-relation-file - remove-reflexive-entries - symmetric-closure)) - -(def python-ascii-relation - "The case-folding equivalence relation for Python ascii regular expressions." - (->> (map vector (range (int \a) (inc (int \z))) - (range (int \A) (inc (int \Z)))) - (into (sorted-set)) - symmetric-closure)) - -(defn collect-eq-classes - "Given some equivalence relation `rel`, finds the equivalence classes. - - NB: This function assumes that `rel` is only symmetric. Transitive pairs need - not be included since the graph is being searched." - [rel] - (let [find-class (fn [rel start-elem] - (let [visited? (atom (sorted-set))] - (letfn [(traverse [elem] - (when-not (@visited? elem) - (swap! visited? conj elem) - (doseq [eq-elem (maps-to rel elem)] - (traverse eq-elem))))] - (do (traverse start-elem) - @visited?))))] - (loop [rel rel - from [0 0] - classes []] - (if-let [next-pair (first (subseq rel > from))] - (let [class (find-class rel (first next-pair))] - (recur (set/select #(not-any? class %) rel) - next-pair - (conj classes class))) - classes)))) - -(defn encode-classes - "Given a list of equivalence classes, generates case fold table entries that - encode them. For classes of size 2, we encode them as two entries, - deltaPositive and deltaNegative (:kind :delta). For classes of larger size, we - use directMapping (:kind :class)." - [classes] - (let [;; `class-as-ranges` represents a `class` as a union of closed - ;; intervals (ranges). This representation is then used inside the - ;; CHARACTER_SET_TABLE generated by `show-classes`.` - class-as-ranges (fn [class] - (loop [class class - cur-range nil - ranges []] - (if-let [elem (first class)] - (if cur-range - (if (= (inc (:hi cur-range)) elem) - (recur (rest class) (update cur-range :hi inc) ranges) - (recur (rest class) {:lo elem, :hi elem} (conj ranges cur-range))) - (recur (rest class) {:lo elem, :hi elem} ranges)) - (if cur-range - (conj ranges cur-range) - ranges)))) - encode-class (fn [class] - (cond - (<= (count class) 1) - [] - (= (count class) 2) - (let [lower (first class) - higher (second class)] - [{:lo lower - :hi lower - :delta (- higher lower) - :kind :delta} - {:lo higher - :hi higher - :delta (- lower higher) - :kind :delta}]) - :otherwise - (let [class-ranges (class-as-ranges class)] - (for [range class-ranges] - {:lo (:lo range) - :hi (:hi range) - :class class-ranges - :kind :class}))))] - (mapcat encode-class classes))) - -(defn extract-large-classes - "This is the first step in encoding the equivalence relation `rel` into a list - of case fold table entries. This step finds any equivalence classes of size >= - 3 and encodes them using directMapping (:kind :class) because the other - heuristics only deal well with equivalence classes of size 2. - - NB: directMapping is a case fold table entry which assigns to a range of code - points a specific set of equivalent code points." - [rel] - (let [large-classes (filter #(>= (count %) 3) (collect-eq-classes rel)) - processed-elems (apply set/union large-classes) - entries (encode-classes large-classes)] - {:todo-rel (set/select #(not-any? processed-elems %) rel) - :entries entries})) - -(defn extract-runs - "This is a helper function for `extract-delta-runs` and - `extract-alternating-runs`." - [rel find-run encode-run allow-singletons] - (loop [todo-rel rel - from [0 0] - entries []] - (if-let [next-pair (first (subseq todo-rel > from))] - (let [run (find-run rel next-pair)] - (if (or allow-singletons (> (count run) 1)) - (recur (set/difference todo-rel run) - next-pair - (conj entries (encode-run run))) - (recur todo-rel next-pair entries))) - {:todo-rel todo-rel - :entries entries}))) - -(defn extract-delta-runs - "This is the second step in encoding the equivalence relation `rel` into a - list of code table entries. This step finds ranges of characters which are - case-equivalent, character by character, to other ranges of characters, e.g. - the ASCII ranges [a-z] and [A-Z]. These are then encoded via the entries - deltaPositive and deltaNegative (:kind :delta)." - [allow-singletons rel] - (letfn [(find-delta-run [rel start-pair] - (let [next-pair [(inc (first start-pair)) (inc (second start-pair))]] - (cons start-pair (when (rel next-pair) - (find-delta-run rel next-pair))))) - (encode-delta-run [run] - {:lo (first (first run)) - :hi (first (last run)) - :delta (- (second (first run)) (first (first run))) - :kind :delta})] - (extract-runs rel find-delta-run encode-delta-run allow-singletons))) - -(defn extract-alternating-runs - "This is the third step in encoding the equivalence relation `rel` into a list - of code table entries. This step finds ranges of characters in which - lower-case and upper-case variants are alternated, e.g., as in the Latin - Extended-A range from 0x0100 to 0x012f. These are encoded using the entries - alternatingAL and alternatingUL (:kind :alternating)." - [rel] - (letfn [(find-alternating-run [rel start-pair] - (when (= (inc (first start-pair)) (second start-pair)) - (let [next-pair [(+ 2 (first start-pair)) (+ 2 (second start-pair))]] - (cons start-pair (cons (swap start-pair) (when (rel next-pair) - (find-alternating-run rel next-pair))))))) - (encode-alternating-run [run] - {:lo (first (first run)) - :hi (first (last run)) - :aligned (even? (first (first run))) - :kind :alternating})] - (extract-runs rel find-alternating-run encode-alternating-run false))) - -(defn generate-entries-for-eq-relation - "Given an equivalence relation, calculates its encoding in terms of case fold - table entries." - [rel] - (let [{rel :todo-rel, large-class-entries :entries} (extract-large-classes rel) - {rel :todo-rel, delta-entries :entries} (extract-delta-runs false rel) - {rel :todo-rel, alternating-entries :entries} (extract-alternating-runs rel) - remaining-classes (collect-eq-classes rel) - remaining-class-entries (encode-classes remaining-classes)] - (sort-by (fn [e] [(:lo e) (:hi e)]) (concat large-class-entries delta-entries alternating-entries remaining-class-entries)))) - -(defn generate-entries-for-function - "Given a functional relation, calculates its encoding in terms of case fold - table entries." - [rel] - (let [{rel :todo-rel, delta-entries :entries} (extract-delta-runs true rel)] - (sort-by (fn [e] [(:lo e) (:hi e)]) delta-entries))) - -(defn identify-classes - "Replaces the references to equivalence classes (:class field) in - directMapping case fold table entries (:kind :class) with numeric - identifiers (:class-id field). The numeric identifiers are being allocated - starting from the value of `num-classes-ref` and the mapping from classes to - identifiers is being stored in `class-ids-ref`." - [entries num-classes-ref class-ids-ref] - (doall (for [entry entries] - (if (= :class (:kind entry)) - (let [class (:class entry)] - (if-let [class-id (@class-ids-ref class)] - (assoc entry :class-id class-id) - (let [class-id @num-classes-ref] - (do (swap! class-ids-ref assoc class class-id) - (swap! num-classes-ref inc) - (assoc entry :class-id class-id))))) - entry)))) - -(defn show-hex - "Prints a number in hexadecimal format. Hexadecimal is the conventional base - in which to write down values of Unicode code points. Also, it is the same - base as was used in the original case fold table, meaning we can keep the diff - after updating the table minimal." - [n] - (format "0x%04x" n)) - -(defn show-hex6 - "Prints a number in hexadecimal format. Hexadecimal is the conventional base - in which to write down values of Unicode code points. Also, it is the same - base as was used in the original case fold table, meaning we can keep the diff - after updating the table minimal." - [n] - (format "0x%06x" n)) - -(defn show-classes - "Renders the CHARACTER_SET_TABLE in Java code. The CHARACTER_SET_TABLE - contains the definitions of codepoint equivalence classes that are used in - directMapping (:kind :class) entries of the case fold table." - [classes] - (let [header " private static final CodePointSet[] CHARACTER_SET_TABLE = new CodePointSet[]{\n" - item-prefix " " - item-sep ",\n" - footer "};\n" - show-class (fn [class] - (let [range-sep ", " - show-range (fn [range] - (str (show-hex6 (:lo range)) ", " (show-hex6 (:hi range)) ))] - (str "rangeSet(" (apply str (interpose ", " (map show-range class))) ")"))) - body (apply str (interpose item-sep (map #(str item-prefix (show-class %)) classes)))] - (str header body footer))) - -(defn show-entries - "Renders a case fold table with name `table-name`. This is the main product of - this script." - [entries table-name] - (let [header (str " public static final CaseFoldTableImpl " table-name " = new CaseFoldTableImpl(new int[]{\n") - item-prefix " " - item-sep ",\n" - footer "\n });\n" - method-name-and-args (fn [entry] - (case (:kind entry) - :delta {:lo (:lo entry) - :hi (:hi entry) - :method-name "INTEGER_OFFSET" - :arg (:delta entry)} - :alternating {:lo (:lo entry) - :hi (:hi entry) - :method-name (if (:aligned entry) - "ALTERNATING_AL" - "ALTERNATING_UL") - :arg 0 } - :class {:lo (:lo entry) - :hi (:hi entry) - :method-name "DIRECT_MAPPING" - :arg (:class-id entry)})) - show-entry (fn [entry] - (let [{:keys [lo hi method-name arg]} (method-name-and-args entry) - arg-sep ", "] - (str (show-hex6 lo) ", " (show-hex6 hi) ", " method-name ", " arg))) - body (apply str (interpose item-sep (map #(str item-prefix (show-entry %)) entries)))] - (str header body footer))) - -(defn do-the-job - "The main function of the script. It loads the definitions of the - equivalence relations for the cases when the RegExp flag 'u' is not set (file - NonUnicodeFoldTable.txt) and when it is set (file UnicodeFoldTable.txt). It - then generates the case fold table entries to be used in the CaseFoldTable - Java class in TRegex. - - NB: The CHARACTER_SET_TABLE is shared among the two case fold tables because - there is significant overlap between the two." - [] - (let [non-unicode-relation (load-relation "dat/NonUnicodeFoldTable.txt") - unicode-relation (load-relation "dat/UnicodeFoldTable.txt") - python-unicode-relation (load-relation "dat/PythonFoldTable.txt") - simple-case-folding (parse-case-folding-file "dat/CaseFolding.txt") - num-classes (atom 0) - class-ids (atom {}) - non-unicode-entries (identify-classes (generate-entries-for-eq-relation non-unicode-relation) num-classes class-ids) - unicode-entries (identify-classes (generate-entries-for-eq-relation unicode-relation) num-classes class-ids) - python-ascii-entries (identify-classes (generate-entries-for-eq-relation python-ascii-relation) num-classes class-ids) - python-unicode-entries (identify-classes (generate-entries-for-eq-relation python-unicode-relation) num-classes class-ids) - case-folding-entries (generate-entries-for-function simple-case-folding) - classes (map second (sort (map swap @class-ids)))] - (str (show-classes classes) - "\n" - (show-entries non-unicode-entries "NON_UNICODE_TABLE_ENTRIES") - "\n" - (show-entries unicode-entries "UNICODE_TABLE_ENTRIES") - "\n" - (show-entries python-ascii-entries "PYTHON_ASCII_TABLE_ENTRIES") - "\n" - (show-entries python-unicode-entries "PYTHON_UNICODE_TABLE_ENTRIES") - "\n" - (show-entries case-folding-entries "SIMPLE_CASE_FOLDING_ENTRIES")))) - -(defn -main - "This gets evaluated when we run the script." - [& args] - (print (do-the-job))) diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py b/regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py deleted file mode 100755 index ccddc6456e7a..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_nonunicode_fold_table.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - - -# This reads dat/UnicodeData.txt and dat/SpecialCasing.txt and produces a file -# that gives all the non-trivial pairs of inputs-outputs of the ECMAScript -# Canonicalize when Unicode is false and IgnoreCase is true. - -upper_map = {} -for line in open("dat/UnicodeData.txt"): - tokens = line.split(";") - # Drop entries without toUppercase mapping - if tokens[12].strip() == "": - continue - char = int(tokens[0].strip(), 16) - upper = int(tokens[12].strip(), 16) - upper_map[char] = [upper] - -for line in open("dat/SpecialCasing.txt"): - # Drop comments and empty lines - if line.startswith("#") or line.strip() == "": - continue - tokens = line.split(";") - # Drop entries with conditions - if len(tokens) > 5: - continue - char = int(tokens[0].strip(), 16) - upper = [int(c, 16) for c in tokens[3].split()] - upper_map[char] = upper - -for (char, upper) in upper_map.items(): - # Only follow rules which give map to a single code unit - if len(upper) > 1 or upper[0] >= 0x10000: - continue - # Do not allow non-ASCII characters to cross into ASCII. - if char >= 128 and upper[0] < 128: - continue - # Drop trivial mappings - if (char == upper[0]): - continue - print("%X;%X" % (char, upper[0])) diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py b/regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py deleted file mode 100755 index 36443b2e4ca7..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_ruby_case_folding.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re -import os - -os.chdir('dat') - -regex = re.compile(r'^([0-9A-Z]+);\s*[CF];((?:\s*[0-9A-Z])+)') - -data_file = open("CaseFolding.txt", "r") - -entries = [] - -for line in data_file: - m = regex.match(line) - if m is not None: - key = "0x{0}".format(m.group(1)) - cp_list = ", ".join([ "0x{0}".format(cp) for cp in m.group(2).strip().split() ]) - value = "new int[]{{{0}}}".format(cp_list) - entries.append(" CASE_FOLD.put({key}, {value});".format(key = key, value = value)) - -print(r"""/* - * Copyright (c) 2021, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.truffle.regex.tregex.parser.flavors; - -import java.util.SortedMap; -import java.util.TreeMap; - -public class RubyCaseFoldingData {{ - - public static final SortedMap CASE_FOLD; - - static {{ - CASE_FOLD = new TreeMap<>(); - -{} - }} -}}""".format("\n".join(entries))) diff --git a/regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py b/regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py deleted file mode 100755 index 57159136754d..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/generate_special_casing_equivalences.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# This script generates a list of case-equivalent Unicode code points using the -# SpecialCasing.txt file from Unicode. It expects this file to be in a folder -# called "dat". Two codepoints are considered equivalent if they map to the same -# sequence of codepoints using either the Lowercase or Uppercase function. Such -# cases are handled by including a special list of exceptions in sre_compile.py. - -inv_map = {} - -def add_mapping(codepoint, mapping): - if mapping not in inv_map: - inv_map[mapping] = [] - inv_map[mapping].append(codepoint) - -for line in open('dat/SpecialCasing.txt'): - if line.strip() == '' or line.startswith('#'): - continue - codepoint, lower, title, upper, *tail = [field.strip() for field in line.split(';')] - if len(tail) > 1: - # skip conditional mapping - continue - if ' ' in lower: - add_mapping(codepoint, lower) - if ' ' in upper: - add_mapping(codepoint, upper) - -for eq_class in inv_map.values(): - rep = eq_class[0] - for elem in eq_class[1:]: - print('{};{}'.format(rep, elem)) diff --git a/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh b/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh index 592a40647f84..eace11f75fd4 100755 --- a/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh +++ b/regex/src/com.oracle.truffle.regex/tools/run_scripts.sh @@ -53,9 +53,6 @@ EMOJI_VERSION=15.0 mkdir -p ./dat -wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/UnicodeData.txt -O dat/UnicodeData.txt -wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/CaseFolding.txt -O dat/CaseFolding.txt -wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/SpecialCasing.txt -O dat/SpecialCasing.txt wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/PropertyAliases.txt -O dat/PropertyAliases.txt wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/PropertyValueAliases.txt -O dat/PropertyValueAliases.txt wget https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NameAliases.txt -O dat/NameAliases.txt @@ -68,19 +65,16 @@ unzip -d dat dat/ucd.nounihan.flat.zip ./generate_unicode_properties.py > ../src/com/oracle/truffle/regex/charset/UnicodePropertyData.java -./unicode-script.sh - -clojure -Sdeps '{:paths ["."]}' -M --main generate-case-fold-table > dat/case-fold-table.txt - -./update_case_fold_table.py - -./generate_ruby_case_folding.py > ../src/com/oracle/truffle/regex/tregex/parser/flavors/RubyCaseFoldingData.java - ./generate_name_alias_table.py > ../src/com/oracle/truffle/regex/chardata/UnicodeCharacterAliases.java rm -r ./dat +pushd casefolding +cargo build --release && ./target/release/tregex-casefolding +rm -r ./tmp +popd + mx build -mx java -cp `mx paths regex:TREGEX`:`mx paths truffle:TRUFFLE_API`:`mx paths sdk:GRAAL_SDK` com.oracle.truffle.regex.charset.UnicodeGeneralCategoriesGenerator > ../src/com/oracle/truffle/regex/charset/UnicodeGeneralCategories.java +mx java -cp `mx paths regex:TREGEX`:`mx paths truffle:TRUFFLE_API`:`mx paths sdk:COLLECTIONS` com.oracle.truffle.regex.charset.UnicodeGeneralCategoriesGenerator > ../src/com/oracle/truffle/regex/charset/UnicodeGeneralCategories.java mx eclipseformat --primary || true diff --git a/regex/src/com.oracle.truffle.regex/tools/unicode-script.sh b/regex/src/com.oracle.truffle.regex/tools/unicode-script.sh deleted file mode 100755 index a0f39f340664..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/unicode-script.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - -set -e - - -# This script takes the CaseFolding.txt and UnicodeData.txt files of the Unicode -# character database and extracts from them the files UnicodeFoldTable.txt and -# NonUnicodeFoldTable.txt. These files contain definitions of the Canonicalize -# abstract function used in the ECMAScript spec to define case folding in -# regular expressions. UnicodeFoldTable.txt contains the definition of case -# folding for when the Unicode ('u') flag is present and NonUnicodeFoldTable.txt -# contains the definition of case folding for when the Unicode flag is missing. -# These two files are then picked up by the generate_case_fold_table.clj script -# which produces Java code that can be put into the CaseFoldTable class in -# TRegex. - -# We produce the table for the Canonicalize abstract function when the Unicode -# flag is present. The function is based on the contents of CodeFolding.txt. We -# remove any comments and empty lines from the file. We also remove items -# belonging from the full (F) and Turkic (T) mapping and only keep the simple -# (S) and common (C) ones. -cat dat/CaseFolding.txt \ - | sed -e '/^#/d' \ - -e '/^$/d' \ - -e '/; [FT]; /d' \ - -e 's/; /;/g' \ - | cut -d\; -f1,3 \ - > dat/UnicodeFoldTable.txt - -# We produce the table for the Canonicalize abstract function when the Unicode -# flag is not present. We extract the Unicode Case Conversion table from the -# UnicodeData.txt and SpecialCasing.txt files. We remove entries which map from -# non-ASCII code points (>= 128) to ASCII code points (< 128), as per the -# ECMAScript spec. We also drop the special entries which produce strings of more -# than one UTF-16 code unit. -./generate_nonunicode_fold_table.py > dat/NonUnicodeFoldTable.txt - - -# In Python's case insensitive regular expressions, characters are considered -# equivalent if they have the same Lowercase mapping. However, in some cases -# concerning character classes with non-BMP characters, Python also tries to -# match characters by considering their Uppercase mapping. In recent revisions of -# CPython 3, this is supplemented by an explicit list of equivalence classes of -# lowercase characters which are to be considered equal since they have the same -# Uppercase mapping. - -# Instead of relying on a list of exceptions, we generate the equivalence -# by considering any two characters equivalent if they map to each other or to -# some common target using either the Lowercase or Uppercase mapping (including -# complex cases from SpecialCasing.txt). - -# We make characters equivalent to their simple Uppercase and Lowercase -# mappings. We filter out the codepoint and the two character mappings, remove -# any empty fields by collapsing neighboring or terminating semicolons and -# finally removing any lines consisting of a single codepoint (the case when a -# character has no cased mappings). -cat dat/UnicodeData.txt \ - | cut -d\; -f1,13,14 \ - | sed -e 's/;\+/;/g' \ - -e 's/;$//' \ - -e '/^[^;]*$/d' \ - > dat/PythonSimpleCasing.txt - -./generate_special_casing_equivalences.py > dat/PythonExtendedCasing.txt - -# We produce the Python case fold table by merging the equivalences due to both -# the simple case mappings and the extended case mappings. -cat dat/PythonSimpleCasing.txt dat/PythonExtendedCasing.txt > dat/PythonFoldTable.txt diff --git a/regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py b/regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py deleted file mode 100755 index b827597f8f46..000000000000 --- a/regex/src/com.oracle.truffle.regex/tools/update_case_fold_table.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# The Universal Permissive License (UPL), Version 1.0 -# -# Subject to the condition set forth below, permission is hereby granted to any -# person obtaining a copy of this software, associated documentation and/or -# data (collectively the "Software"), free of charge and under any and all -# copyright rights in the Software, and any and all patent rights owned or -# freely licensable by each licensor hereunder covering either (i) the -# unmodified Software as contributed to or provided by such licensor, or (ii) -# the Larger Works (as defined below), to deal in both -# -# (a) the Software, and -# -# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if -# one is included with the Software each a "Larger Work" to which the Software -# is contributed by such licensors), -# -# without restriction, including without limitation the rights to copy, create -# derivative works of, display, perform, and distribute the Software and make, -# use, sell, offer for sale, import, export, have made, and have sold the -# Software and the Larger Work(s), and to sublicense the foregoing rights on -# either these or other terms. -# -# This license is subject to the following condition: -# -# The above copyright notice and either this complete permission notice or at a -# minimum a reference to the UPL must be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import sys -import os.path - - -def check_file_exists(path): - if not os.path.exists(path): - error(f'file "${path}" not found') - - -def error(msg): - print('ERROR: ' + msg) - sys.exit(1) - - -def main(): - file_name = 'CaseFoldTable.java' - file_path = '../src/com/oracle/truffle/regex/tregex/parser/' + file_name - replacement_file = './dat/case-fold-table.txt' - marker_begin = 'GENERATED CODE BEGIN' - marker_end = 'GENERATED CODE END' - - check_file_exists(file_path) - check_file_exists(replacement_file) - - with open(file_path, 'r') as f, open(replacement_file, 'r') as rf: - content = f.read() - i_begin = content.find(marker_begin) - i_end = content.find(marker_end) - if i_begin < 0: - error(f'could not find insertion marker "${marker_begin}" in ${file_name}') - if i_end < 0: - error(f'could not find end of insertion marker "${marker_begin}" in ${file_name}') - replacement = content[0:content.find('\n', i_begin) + 1] + '\n' + rf.read() + content[content.rfind('\n', i_begin, i_end):] - - with open(file_path, 'w') as f: - f.write(replacement) - - -main() diff --git a/sdk/CHANGELOG.md b/sdk/CHANGELOG.md index dd964eff5f47..ca9337076abf 100644 --- a/sdk/CHANGELOG.md +++ b/sdk/CHANGELOG.md @@ -2,6 +2,9 @@ This changelog summarizes major changes between GraalVM SDK versions. The main focus is on APIs exported by GraalVM SDK. +## Version 24.0.0 +* (GR-49334) Deprecated the `FileSystems#allowLanguageHomeAccess()` method and introduced `FileSystem#allowInternalResourceAccess()` as a replacement. To ensure compatibility, both methods now provide support for language homes and internal resources. + ## Version 23.1.0 * (GR-43819) The GraalVM SDK was split into several more fine-grained modules. The use of the graalvm-sdk module is now deprecated. Please update your Maven and module dependencies accordingly. Note that all APIs remain compatible. The following new modules are available: * `org.graalvm.nativeimage` A framework that allows to customize native image generation. diff --git a/sdk/src/org.graalvm.polyglot/snapshot.sigtest b/sdk/src/org.graalvm.polyglot/snapshot.sigtest index b3b6f5a5e5e1..1714553a080a 100644 --- a/sdk/src/org.graalvm.polyglot/snapshot.sigtest +++ b/sdk/src/org.graalvm.polyglot/snapshot.sigtest @@ -673,7 +673,9 @@ meth public java.lang.String getSeparator() meth public java.nio.charset.Charset getEncoding(java.nio.file.Path) meth public java.nio.file.Path getTempDirectory() meth public java.nio.file.Path readSymbolicLink(java.nio.file.Path) throws java.io.IOException +meth public static org.graalvm.polyglot.io.FileSystem allowInternalResourceAccess(org.graalvm.polyglot.io.FileSystem) meth public static org.graalvm.polyglot.io.FileSystem allowLanguageHomeAccess(org.graalvm.polyglot.io.FileSystem) + anno 0 java.lang.Deprecated(boolean forRemoval=false, java.lang.String since="") meth public static org.graalvm.polyglot.io.FileSystem newDefaultFileSystem() meth public static org.graalvm.polyglot.io.FileSystem newFileSystem(java.nio.file.FileSystem) meth public static org.graalvm.polyglot.io.FileSystem newReadOnlyFileSystem(org.graalvm.polyglot.io.FileSystem) diff --git a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java index 0b3e4bd3727d..28fab9e930e8 100644 --- a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java +++ b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Engine.java @@ -2101,7 +2101,7 @@ public FileSystem newDefaultFileSystem(String hostTmpDir) { } @Override - public FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { + public FileSystem allowInternalResourceAccess(FileSystem fileSystem) { throw noPolyglotImplementationFound(); } diff --git a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java index 79038185b3b5..fab8c9d0933e 100644 --- a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java +++ b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java @@ -1389,8 +1389,8 @@ public FileSystem newDefaultFileSystem(String hostTmpDir) { return getNext().newDefaultFileSystem(hostTmpDir); } - public FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return getNext().allowLanguageHomeAccess(fileSystem); + public FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return getNext().allowInternalResourceAccess(fileSystem); } public FileSystem newReadOnlyFileSystem(FileSystem fileSystem) { diff --git a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java index 9960324819b3..2d3e9040d153 100644 --- a/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java +++ b/sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/io/FileSystem.java @@ -67,6 +67,7 @@ import java.util.Objects; import java.util.Set; import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.Engine; import org.graalvm.polyglot.io.IOAccess.Builder; /** @@ -512,9 +513,31 @@ static FileSystem newDefaultFileSystem() { * {@link #getPathSeparator() path separator} as the {@link #newDefaultFileSystem() * default file system}. * @since 22.2 + * @deprecated Use {{@link #allowInternalResourceAccess(FileSystem)}}. */ + @Deprecated static FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return IOHelper.ImplHolder.IMPL.allowLanguageHomeAccess(fileSystem); + return allowInternalResourceAccess(fileSystem); + } + + /** + * Decorates the given {@code fileSystem} by an implementation that forwards access to the + * internal resources to the default file system. The method is intended to be used by custom + * filesystem implementations with non default storage to allow guest languages to access + * internal resources. As the returned filesystem uses a default file system to access internal + * resources, the {@code fileSystem} has to use the same {@link Path} type, + * {@link #getSeparator() separator} and {@link #getPathSeparator() path separator} as the + * {@link #newDefaultFileSystem() default filesystem}. + * + * @throws IllegalArgumentException when the {@code fileSystem} does not use the same + * {@link Path} type or has a different {@link #getSeparator() separator} or + * {@link #getPathSeparator() path separator} as the {@link #newDefaultFileSystem() + * default file system}. + * @see Engine#copyResources(Path, String...) + * @since 24.0 + */ + static FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return IOHelper.ImplHolder.IMPL.allowInternalResourceAccess(fileSystem); } /** diff --git a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java index f13a3a6af331..5fb82aaff105 100644 --- a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java +++ b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/api/HostVM.java @@ -354,6 +354,13 @@ public interface MultiMethodAnalysisPolicy { * return values. */ boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey multiMethodKey); + + /** + * Some methods can be transformed after analysis; in these cases we do not know what the + * returned value will be. + */ + boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation); + } /** @@ -385,6 +392,11 @@ public boolean canComputeReturnedParameterIndex(MultiMethod.MultiMethodKey multi public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey multiMethodKey) { return false; } + + @Override + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation) { + return false; + } }; public MultiMethodAnalysisPolicy getMultiMethodAnalysisPolicy() { diff --git a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java index dbb3e7caf21a..ece8f7c168fe 100644 --- a/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java +++ b/substratevm/src/com.oracle.graal.pointsto/src/com/oracle/graal/pointsto/flow/InvokeTypeFlow.java @@ -203,14 +203,16 @@ protected void updateReceiver(PointsToAnalysis bb, MethodFlowsGraphInfo calleeFl } protected void updateReceiver(PointsToAnalysis bb, MethodFlowsGraphInfo calleeFlows, TypeState receiverTypeState) { - if (bb.getHostVM().getMultiMethodAnalysisPolicy().performParameterLinking(callerMultiMethodKey, calleeFlows.getMethod().getMultiMethodKey())) { + var analysisPolicy = bb.getHostVM().getMultiMethodAnalysisPolicy(); + var calleeKey = calleeFlows.getMethod().getMultiMethodKey(); + if (analysisPolicy.performParameterLinking(callerMultiMethodKey, calleeKey)) { FormalReceiverTypeFlow formalReceiverFlow = calleeFlows.getFormalReceiver(); if (formalReceiverFlow != null) { formalReceiverFlow.addReceiverState(bb, receiverTypeState); } } - if (bb.getHostVM().getMultiMethodAnalysisPolicy().performReturnLinking(callerMultiMethodKey, calleeFlows.getMethod().getMultiMethodKey())) { + if (analysisPolicy.performReturnLinking(callerMultiMethodKey, calleeKey) && !analysisPolicy.unknownReturnValue(bb, callerMultiMethodKey, calleeFlows.getMethod())) { if (bb.optimizeReturnedParameter()) { int paramIndex = calleeFlows.getMethod().getTypeFlow().getReturnedParameterIndex(); if (actualReturn != null && paramIndex == 0) { @@ -279,7 +281,12 @@ public void linkReturn(PointsToAnalysis bb, boolean isStatic, MethodFlowsGraphIn * created for the return, then {@code setActualReturn} will perform all necessary linking. */ if (actualReturn != null && bb.getHostVM().getMultiMethodAnalysisPolicy().performReturnLinking(callerMultiMethodKey, calleeFlows.getMethod().getMultiMethodKey())) { - if (bb.optimizeReturnedParameter()) { + if (bb.getHostVM().getMultiMethodAnalysisPolicy().unknownReturnValue(bb, callerMultiMethodKey, calleeFlows.getMethod())) { + /* + * When there is an unknown return value we must be conservative. + */ + actualReturn.declaredType.getTypeFlow(bb, true).addUse(bb, actualReturn); + } else if (bb.optimizeReturnedParameter()) { int paramNodeIndex = calleeFlows.getMethod().getTypeFlow().getReturnedParameterIndex(); if (paramNodeIndex != -1) { if (isStatic || paramNodeIndex != 0) { diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java index 1fc938b07098..d0724fd7b597 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/AbstractMemoryPoolMXBean.java @@ -45,12 +45,13 @@ public abstract class AbstractMemoryPoolMXBean extends AbstractMXBean implements MemoryPoolMXBean { + protected static final UnsignedWord UNDEFINED = WordFactory.unsigned(UNDEFINED_MEMORY_USAGE); + private final String name; private final String[] managerNames; protected final UninterruptibleUtils.AtomicUnsigned peakUsage = new UninterruptibleUtils.AtomicUnsigned(); - private static final UnsignedWord UNDEFINED = WordFactory.zero(); - protected UnsignedWord initialValue = UNDEFINED; + private UnsignedWord initialValue = UNDEFINED; @Platforms(Platform.HOSTED_ONLY.class) protected AbstractMemoryPoolMXBean(String name, String... managerNames) { @@ -67,8 +68,6 @@ UnsignedWord getInitialValue() { abstract UnsignedWord computeInitialValue(); - abstract UnsignedWord getMaximumValue(); - abstract void beforeCollection(); abstract void afterCollection(); @@ -167,4 +166,9 @@ void updatePeakUsage(UnsignedWord value) { current = peakUsage.get(); } while (value.aboveThan(current) && !peakUsage.compareAndSet(current, value)); } + + protected UnsignedWord getMaximumValue() { + /* Actual usage may temporarily exceed the maximum, so we need to return UNDEFINED. */ + return UNDEFINED; + } } diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java index 3ff352baf538..2ccf9cdea9d0 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GCImpl.java @@ -29,7 +29,6 @@ import java.lang.ref.Reference; -import jdk.graal.compiler.api.replacements.Fold; import org.graalvm.nativeimage.CurrentIsolate; import org.graalvm.nativeimage.IsolateThread; import org.graalvm.nativeimage.Platform; @@ -97,6 +96,8 @@ import com.oracle.svm.core.util.TimeUtils; import com.oracle.svm.core.util.VMError; +import jdk.graal.compiler.api.replacements.Fold; + /** * Garbage collector (incremental or complete) for {@link HeapImpl}. */ @@ -211,14 +212,14 @@ assert getCollectionEpoch().equal(data.getRequestingEpoch()) || printGCBefore(cause); ThreadLocalAllocation.disableAndFlushForAllThreads(); - GenScavengeMemoryPoolMXBeans.notifyBeforeCollection(); + GenScavengeMemoryPoolMXBeans.singleton().notifyBeforeCollection(); HeapImpl.getAccounting().notifyBeforeCollection(); boolean outOfMemory = collectImpl(cause, data.getRequestingNanoTime(), data.getForceFullGC()); data.setOutOfMemory(outOfMemory); HeapImpl.getAccounting().notifyAfterCollection(); - GenScavengeMemoryPoolMXBeans.notifyAfterCollection(); + GenScavengeMemoryPoolMXBeans.singleton().notifyAfterCollection(); printGCAfter(cause); JfrGCHeapSummaryEvent.emit(JfrGCWhen.AFTER_GC); diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java index c60fce4681b1..0eb785bd61db 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/GenScavengeMemoryPoolMXBeans.java @@ -25,9 +25,9 @@ */ package com.oracle.svm.core.genscavenge; -import java.lang.management.MemoryPoolMXBean; import java.lang.management.MemoryUsage; +import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.Platform; import org.graalvm.nativeimage.Platforms; import org.graalvm.word.UnsignedWord; @@ -36,8 +36,9 @@ import com.oracle.svm.core.SubstrateOptions; import com.oracle.svm.core.util.VMError; -public class GenScavengeMemoryPoolMXBeans { +import jdk.graal.compiler.api.replacements.Fold; +public class GenScavengeMemoryPoolMXBeans { static final String YOUNG_GEN_SCAVENGER = "young generation scavenger"; static final String COMPLETE_SCAVENGER = "complete scavenger"; static final String EPSILON_SCAVENGER = "epsilon scavenger"; @@ -47,10 +48,10 @@ public class GenScavengeMemoryPoolMXBeans { static final String OLD_GEN_SPACE = "old generation space"; static final String EPSILON_HEAP = "epsilon heap"; - private static AbstractMemoryPoolMXBean[] mxBeans; + private final AbstractMemoryPoolMXBean[] mxBeans; @Platforms(Platform.HOSTED_ONLY.class) - public static MemoryPoolMXBean[] createMemoryPoolMXBeans() { + public GenScavengeMemoryPoolMXBeans() { if (SubstrateOptions.UseSerialGC.getValue()) { mxBeans = new AbstractMemoryPoolMXBean[]{ new EdenMemoryPoolMXBean(YOUNG_GEN_SCAVENGER, COMPLETE_SCAVENGER), @@ -63,16 +64,24 @@ public static MemoryPoolMXBean[] createMemoryPoolMXBeans() { new EpsilonMemoryPoolMXBean(EPSILON_SCAVENGER) }; } + } + + @Fold + public static GenScavengeMemoryPoolMXBeans singleton() { + return ImageSingletons.lookup(GenScavengeMemoryPoolMXBeans.class); + } + + public AbstractMemoryPoolMXBean[] getMXBeans() { return mxBeans; } - public static void notifyBeforeCollection() { + public void notifyBeforeCollection() { for (AbstractMemoryPoolMXBean mxBean : mxBeans) { mxBean.beforeCollection(); } } - public static void notifyAfterCollection() { + public void notifyAfterCollection() { for (AbstractMemoryPoolMXBean mxBean : mxBeans) { mxBean.afterCollection(); } @@ -100,11 +109,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getInitialEdenSize(); } - @Override - UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumEdenSize(); - } - @Override public MemoryUsage getUsage() { return memoryUsage(getCurrentUsage()); @@ -148,11 +152,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getInitialSurvivorSize(); } - @Override - UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumSurvivorSize(); - } - @Override public MemoryUsage getUsage() { return getCollectionUsage(); @@ -191,11 +190,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getInitialOldSize(); } - @Override - UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumOldSize(); - } - @Override public MemoryUsage getUsage() { return getCollectionUsage(); @@ -234,11 +228,6 @@ UnsignedWord computeInitialValue() { return GCImpl.getPolicy().getMinimumHeapSize(); } - @Override - UnsignedWord getMaximumValue() { - return GCImpl.getPolicy().getMaximumHeapSize(); - } - @Override public MemoryUsage getUsage() { HeapAccounting accounting = HeapImpl.getAccounting(); diff --git a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java index 6d3abab9a557..6727381fad42 100644 --- a/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java +++ b/substratevm/src/com.oracle.svm.core.genscavenge/src/com/oracle/svm/core/genscavenge/graal/GenScavengeGCFeature.java @@ -24,15 +24,11 @@ */ package com.oracle.svm.core.genscavenge.graal; -import java.lang.management.MemoryPoolMXBean; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import jdk.graal.compiler.graph.Node; -import jdk.graal.compiler.options.OptionValues; -import jdk.graal.compiler.phases.util.Providers; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.hosted.Feature; @@ -70,6 +66,10 @@ import com.oracle.svm.core.jvmstat.PerfManager; import com.sun.management.GarbageCollectorMXBean; +import jdk.graal.compiler.graph.Node; +import jdk.graal.compiler.options.OptionValues; +import jdk.graal.compiler.phases.util.Providers; + @AutomaticallyRegisteredFeature class GenScavengeGCFeature implements InternalFeature { @Override @@ -95,7 +95,9 @@ public void duringSetup(DuringSetupAccess access) { ImageSingletons.add(Heap.class, heap); ImageSingletons.add(GCAllocationSupport.class, new GenScavengeAllocationSupport()); - List memoryPools = Arrays.asList(GenScavengeMemoryPoolMXBeans.createMemoryPoolMXBeans()); + GenScavengeMemoryPoolMXBeans memoryPoolMXBeans = new GenScavengeMemoryPoolMXBeans(); + ImageSingletons.add(GenScavengeMemoryPoolMXBeans.class, memoryPoolMXBeans); + List garbageCollectors; if (SubstrateOptions.UseEpsilonGC.getValue()) { garbageCollectors = Arrays.asList(new EpsilonGarbageCollectorMXBean()); @@ -105,7 +107,7 @@ public void duringSetup(DuringSetupAccess access) { ManagementSupport managementSupport = ManagementSupport.getSingleton(); managementSupport.addPlatformManagedObjectSingleton(java.lang.management.MemoryMXBean.class, new HeapImplMemoryMXBean()); - managementSupport.addPlatformManagedObjectList(java.lang.management.MemoryPoolMXBean.class, memoryPools); + managementSupport.addPlatformManagedObjectList(java.lang.management.MemoryPoolMXBean.class, Arrays.asList(memoryPoolMXBeans.getMXBeans())); managementSupport.addPlatformManagedObjectList(com.sun.management.GarbageCollectorMXBean.class, garbageCollectors); /* Not supported yet. */ managementSupport.addPlatformManagedObjectList(java.lang.management.BufferPoolMXBean.class, Collections.emptyList()); diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java index 0498fcbf22d5..f359c4b72437 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateDiagnostics.java @@ -30,16 +30,6 @@ import java.util.Arrays; import org.graalvm.collections.EconomicMap; -import jdk.graal.compiler.api.replacements.Fold; -import jdk.graal.compiler.core.common.NumUtil; -import jdk.graal.compiler.core.common.SuppressFBWarnings; -import jdk.graal.compiler.nodes.PauseNode; -import jdk.graal.compiler.nodes.java.ArrayLengthNode; -import jdk.graal.compiler.options.Option; -import jdk.graal.compiler.options.OptionKey; -import jdk.graal.compiler.options.OptionType; -import jdk.graal.compiler.word.ObjectAccess; -import jdk.graal.compiler.word.Word; import org.graalvm.nativeimage.CurrentIsolate; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.IsolateThread; @@ -101,6 +91,17 @@ import com.oracle.svm.core.util.TimeUtils; import com.oracle.svm.core.util.VMError; +import jdk.graal.compiler.api.replacements.Fold; +import jdk.graal.compiler.core.common.NumUtil; +import jdk.graal.compiler.core.common.SuppressFBWarnings; +import jdk.graal.compiler.nodes.PauseNode; +import jdk.graal.compiler.nodes.java.ArrayLengthNode; +import jdk.graal.compiler.options.Option; +import jdk.graal.compiler.options.OptionKey; +import jdk.graal.compiler.options.OptionType; +import jdk.graal.compiler.word.ObjectAccess; +import jdk.graal.compiler.word.Word; + public class SubstrateDiagnostics { private static final int MAX_THREADS_TO_PRINT = 100_000; private static final int MAX_FRAME_ANCHORS_TO_PRINT_PER_THREAD = 1000; @@ -506,6 +507,31 @@ private static boolean matches(String text, int t, String pattern, int p) { return patternPos == pattern.length(); } + /* Scan the stack until we find a valid return address. We may encounter false-positives. */ + private static Pointer findPotentialReturnAddressPosition(Pointer originalSp) { + UnsignedWord stackBase = VMThreads.StackBase.get(); + if (stackBase.equal(0)) { + /* We don't know the stack boundaries, so only search within 32 bytes. */ + stackBase = originalSp.add(32); + } + + int wordSize = ConfigurationValues.getTarget().wordSize; + Pointer pos = originalSp; + while (pos.belowThan(stackBase)) { + CodePointer possibleIp = pos.readWord(0); + if (pointsIntoNativeImageCode(possibleIp)) { + return pos; + } + pos = pos.add(wordSize); + } + return WordFactory.nullPointer(); + } + + @Uninterruptible(reason = "Prevent the GC from freeing the CodeInfo.") + private static boolean pointsIntoNativeImageCode(CodePointer possibleIp) { + return CodeInfoTable.lookupCodeInfo(possibleIp).isNonNull(); + } + public static class FatalErrorState { AtomicWord diagnosticThread; volatile int diagnosticThunkIndex; @@ -593,15 +619,31 @@ public int maxInvocationCount() { @RestrictHeapAccess(access = RestrictHeapAccess.Access.NO_ALLOCATION, reason = "Must not allocate while printing diagnostics.") public void printDiagnostics(Log log, ErrorContext context, int maxDiagnosticLevel, int invocationCount) { CodePointer ip = context.getInstructionPointer(); - log.string("Printing instructions (ip=").zhex(ip).string("):").indent(true); - if (ip.isNull()) { - // can't print any instructions - } else if (invocationCount < 4) { - // print 512, 128, or 32 instruction bytes. + log.string("Printing instructions (ip=").zhex(ip).string("):"); + + if (((Pointer) ip).belowThan(VirtualMemoryProvider.get().getGranularity())) { + /* IP points into the first page of the virtual address space. */ + Pointer originalSp = context.getStackPointer(); + log.string(" IP is invalid"); + + Pointer returnAddressPos = findPotentialReturnAddressPosition(originalSp); + if (returnAddressPos.isNull()) { + log.string(", instructions cannot be printed.").newline(); + return; + } + + ip = returnAddressPos.readWord(0); + Pointer sp = returnAddressPos.add(FrameAccess.returnAddressSize()); + log.string(", printing instructions (ip=").zhex(ip).string(") of the most likely caller (sp + ").unsigned(sp.subtract(originalSp)).string(") instead"); + } + + log.indent(true); + if (invocationCount < 4) { + /* Print 512, 128, or 32 instruction bytes. */ int bytesToPrint = 1024 >> (invocationCount * 2); hexDump(log, ip, bytesToPrint, bytesToPrint); } else if (invocationCount == 4) { - // just print one word starting at the ip + /* Just print one word starting at the ip. */ hexDump(log, ip, 0, ConfigurationValues.getTarget().wordSize); } log.indent(false).newline(); @@ -992,31 +1034,16 @@ public void printDiagnostics(Log log, ErrorContext context, int maxDiagnosticLev } private static void startStackWalkInMostLikelyCaller(Log log, int invocationCount, Pointer originalSp) { - UnsignedWord stackBase = VMThreads.StackBase.get(); - if (stackBase.equal(0)) { - /* We don't know the stack boundaries, so only search within 32 bytes. */ - stackBase = originalSp.add(32); - } - - /* Search until we find a valid return address. We may encounter false-positives. */ - int wordSize = ConfigurationValues.getTarget().wordSize; - Pointer pos = originalSp; - while (pos.belowThan(stackBase)) { - CodePointer possibleIp = pos.readWord(0); - if (pointsIntoNativeImageCode(possibleIp)) { - Pointer sp = pos.add(wordSize); - log.newline(); - log.string("Starting the stack walk in a possible caller:").newline(); - ThreadStackPrinter.printStacktrace(sp, possibleIp, printVisitors[invocationCount - 1].reset(), log); - break; - } - pos = pos.add(wordSize); + Pointer returnAddressPos = findPotentialReturnAddressPosition(originalSp); + if (returnAddressPos.isNull()) { + return; } - } - @Uninterruptible(reason = "Prevent the GC from freeing the CodeInfo.") - private static boolean pointsIntoNativeImageCode(CodePointer possibleIp) { - return CodeInfoTable.lookupCodeInfo(possibleIp).isNonNull(); + CodePointer possibleIp = returnAddressPos.readWord(0); + Pointer sp = returnAddressPos.add(FrameAccess.returnAddressSize()); + log.newline(); + log.string("Starting the stack walk in a possible caller (sp + ").unsigned(sp.subtract(originalSp)).string("):").newline(); + ThreadStackPrinter.printStacktrace(sp, possibleIp, printVisitors[invocationCount - 1].reset(), log); } } diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java index 541938993712..02a08a275d22 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/SubstrateAllocationSnippets.java @@ -130,7 +130,7 @@ protected Object allocateInstance(@NonNullParameter DynamicHub hub, @ConstantParameter FillContent fillContents, @ConstantParameter boolean emitMemoryBarrier, @ConstantParameter AllocationProfilingData profilingData) { - Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), WordFactory.unsigned(size), fillContents, emitMemoryBarrier, true, profilingData); + Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), WordFactory.unsigned(size), false, fillContents, emitMemoryBarrier, true, profilingData); return piCastToSnippetReplaceeStamp(result); } @@ -229,7 +229,7 @@ protected Object allocateInstanceDynamicImpl(DynamicHub hub, FillContent fillCon @SuppressWarnings("unused") boolean supportsOptimizedFilling, AllocationProfilingData profilingData) { // The hub was already verified by a ValidateNewInstanceClassNode. UnsignedWord size = LayoutEncoding.getPureInstanceAllocationSize(hub.getLayoutEncoding()); - Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), size, fillContents, emitMemoryBarrier, false, profilingData); + Object result = allocateInstanceImpl(encodeAsTLABObjectHeader(hub), size, false, fillContents, emitMemoryBarrier, false, profilingData); return piCastToSnippetReplaceeStamp(result); } diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java index 84ab46d13600..a8923223c386 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/stack/JavaStackWalker.java @@ -45,6 +45,7 @@ import com.oracle.svm.core.heap.RestrictHeapAccess; import com.oracle.svm.core.log.Log; import com.oracle.svm.core.thread.VMOperation; +import com.oracle.svm.core.thread.VMThreads.SafepointBehavior; import com.oracle.svm.core.util.VMError; /** @@ -134,6 +135,11 @@ public static boolean initWalk(JavaStackWalk walk, IsolateThread thread) { assert thread.notEqual(CurrentIsolate.getCurrentThread()) : "Cannot walk the current stack with this method, it would miss all frames after the last frame anchor"; assert VMOperation.isInProgressAtSafepoint() : "Walking the stack of another thread is only safe when that thread is stopped at a safepoint"; + if (SafepointBehavior.isCrashedThread(thread)) { + /* Skip crashed threads because they may no longer have a stack. */ + return false; + } + JavaFrameAnchor anchor = JavaFrameAnchors.getFrameAnchor(thread); boolean result = anchor.isNonNull(); Pointer sp = WordFactory.nullPointer(); diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java index 154ede835acb..49ed1d06b494 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/Safepoint.java @@ -876,42 +876,7 @@ public UnsignedWord getSafepointId() { return safepointId; } - /** A sample method to execute in a VMOperation. */ public static class TestingBackdoor { - - public static int countingVMOperation() { - final Log trace = Log.log().string("[Safepoint.Master.TestingBackdoor.countingVMOperation:").newline(); - int atSafepoint = 0; - int ignoreSafepoints = 0; - int notAtSafepoint = 0; - - for (IsolateThread vmThread = VMThreads.firstThread(); vmThread.isNonNull(); vmThread = VMThreads.nextThread(vmThread)) { - int safepointBehavior = SafepointBehavior.getSafepointBehaviorVolatile(vmThread); - int status = StatusSupport.getStatusVolatile(vmThread); - if (safepointBehavior == SafepointBehavior.PREVENT_VM_FROM_REACHING_SAFEPOINT) { - notAtSafepoint++; - } else if (safepointBehavior == SafepointBehavior.THREAD_CRASHED) { - ignoreSafepoints += 1; - } else { - assert safepointBehavior == SafepointBehavior.ALLOW_SAFEPOINT; - // Check if the thread is at a safepoint or in native code. - switch (status) { - case StatusSupport.STATUS_IN_SAFEPOINT: - atSafepoint += 1; - break; - default: - notAtSafepoint += 1; - break; - } - } - } - trace.string(" atSafepoint: ").signed(atSafepoint) - .string(" ignoreSafepoints: ").signed(ignoreSafepoints) - .string(" notAtSafepoint: ").signed(notAtSafepoint); - trace.string("]").newline(); - return atSafepoint; - } - @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) public static int getCurrentThreadSafepointRequestedCount() { return getSafepointRequested(CurrentIsolate.getCurrentThread()); diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java index dc9b724b2fc9..f7557013a637 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/VMThreads.java @@ -888,13 +888,13 @@ public static class SafepointBehavior { * The thread won't freeze at a safepoint, and will actively prevent the VM from reaching a * safepoint (regardless of the thread status). */ - static final int PREVENT_VM_FROM_REACHING_SAFEPOINT = 1; + public static final int PREVENT_VM_FROM_REACHING_SAFEPOINT = 1; /** * The thread won't freeze at a safepoint and the safepoint handling will ignore the thread. * So, the VM will be able to reach a safepoint regardless of the status of this thread. */ - static final int THREAD_CRASHED = 2; + public static final int THREAD_CRASHED = 2; @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) public static boolean ignoresSafepoints() { @@ -945,6 +945,11 @@ public static void markThreadAsCrashed() { safepointBehaviorTL.setVolatile(THREAD_CRASHED); } + @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) + public static boolean isCrashedThread(IsolateThread thread) { + return safepointBehaviorTL.getVolatile(thread) == THREAD_CRASHED; + } + @Uninterruptible(reason = "Called from uninterruptible code.", mayBeInlined = true) public static String toString(int safepointBehavior) { switch (safepointBehavior) { diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java index 14329990aca0..aa9d43fe94f8 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceDeoptTestFeature.java @@ -244,5 +244,10 @@ public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey m */ return multiMethodKey == DEOPT_TARGET_METHOD; } + + @Override + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation) { + return false; + } } } diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java index 5409eb73a922..21d7e16abdd9 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/hosted/ParseOnceRuntimeCompilationFeature.java @@ -1253,6 +1253,23 @@ public boolean canComputeReturnedParameterIndex(MultiMethod.MultiMethodKey multi public boolean insertPlaceholderParamAndReturnFlows(MultiMethod.MultiMethodKey multiMethodKey) { return multiMethodKey == DEOPT_TARGET_METHOD || multiMethodKey == RUNTIME_COMPILED_METHOD; } + + @Override + public boolean unknownReturnValue(BigBang bb, MultiMethod.MultiMethodKey callerMultiMethodKey, AnalysisMethod implementation) { + if (callerMultiMethodKey == RUNTIME_COMPILED_METHOD || implementation.isDeoptTarget()) { + /* + * If the method may be intrinsified later, the implementation can change. + * + * We also must ensure deopt methods always return a superset of the original + * method. + */ + var origImpl = implementation.getMultiMethod(ORIGINAL_METHOD); + var options = bb.getOptions(); + return (hostedProviders.getGraphBuilderPlugins().getInvocationPlugins().lookupInvocation(origImpl, options) != null) || + hostedProviders.getReplacements().hasSubstitution(origImpl, options); + } + return false; + } } /** diff --git a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java index 338206c69f9b..29342b33f5a5 100644 --- a/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java +++ b/substratevm/src/com.oracle.svm.graal/src/com/oracle/svm/graal/substitutions/GraalSubstitutions.java @@ -34,44 +34,9 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import com.oracle.svm.graal.GraalSupport; -import jdk.graal.compiler.core.match.MatchRuleRegistry; -import jdk.graal.compiler.debug.KeyRegistry; -import jdk.graal.compiler.debug.TTY; -import jdk.graal.compiler.nodes.NamedLocationIdentity; -import jdk.graal.compiler.nodes.graphbuilderconf.InvocationPlugins; -import jdk.graal.compiler.phases.common.inlining.info.elem.InlineableGraph; -import jdk.graal.compiler.phases.common.inlining.walker.ComputeInliningRelevance; -import jdk.graal.compiler.replacements.nodes.BinaryMathIntrinsicNode; -import jdk.graal.compiler.replacements.nodes.UnaryMathIntrinsicNode; import org.graalvm.collections.EconomicMap; import org.graalvm.collections.EconomicSet; import org.graalvm.collections.Equivalence; -import jdk.graal.compiler.core.common.CompilationIdentifier; -import jdk.graal.compiler.core.common.SuppressFBWarnings; -import jdk.graal.compiler.core.gen.NodeLIRBuilder; -import jdk.graal.compiler.core.match.MatchStatement; -import jdk.graal.compiler.debug.DebugContext; -import jdk.graal.compiler.debug.DebugHandlersFactory; -import jdk.graal.compiler.debug.MetricKey; -import jdk.graal.compiler.debug.TimeSource; -import jdk.graal.compiler.graph.Node; -import jdk.graal.compiler.graph.NodeClass; -import jdk.graal.compiler.lir.CompositeValue; -import jdk.graal.compiler.lir.CompositeValueClass; -import jdk.graal.compiler.lir.LIRInstruction; -import jdk.graal.compiler.lir.LIRInstructionClass; -import jdk.graal.compiler.lir.gen.ArithmeticLIRGeneratorTool; -import jdk.graal.compiler.lir.phases.LIRPhase; -import jdk.graal.compiler.nodes.Invoke; -import jdk.graal.compiler.nodes.StructuredGraph; -import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool; -import jdk.graal.compiler.options.OptionValues; -import jdk.graal.compiler.phases.BasePhase; -import jdk.graal.compiler.phases.common.CanonicalizerPhase; -import jdk.graal.compiler.phases.tiers.HighTierContext; -import jdk.graal.compiler.printer.NoDeadCodeVerifyHandler; -import jdk.graal.compiler.serviceprovider.GlobalAtomicLong; import org.graalvm.nativeimage.CurrentIsolate; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.hosted.FieldValueTransformer; @@ -94,11 +59,46 @@ import com.oracle.svm.core.log.Log; import com.oracle.svm.core.option.HostedOptionValues; import com.oracle.svm.core.util.VMError; +import com.oracle.svm.graal.GraalSupport; import com.oracle.svm.graal.hosted.FieldsOffsetsFeature; import com.oracle.svm.graal.hosted.RuntimeCompilationFeature; import com.oracle.svm.graal.meta.SubstrateMethod; import com.oracle.svm.util.ReflectionUtil; +import jdk.graal.compiler.core.common.CompilationIdentifier; +import jdk.graal.compiler.core.common.SuppressFBWarnings; +import jdk.graal.compiler.core.gen.NodeLIRBuilder; +import jdk.graal.compiler.core.match.MatchRuleRegistry; +import jdk.graal.compiler.core.match.MatchStatement; +import jdk.graal.compiler.debug.DebugContext; +import jdk.graal.compiler.debug.DebugHandlersFactory; +import jdk.graal.compiler.debug.KeyRegistry; +import jdk.graal.compiler.debug.MetricKey; +import jdk.graal.compiler.debug.TTY; +import jdk.graal.compiler.debug.TimeSource; +import jdk.graal.compiler.graph.Node; +import jdk.graal.compiler.graph.NodeClass; +import jdk.graal.compiler.lir.CompositeValue; +import jdk.graal.compiler.lir.CompositeValueClass; +import jdk.graal.compiler.lir.LIRInstruction; +import jdk.graal.compiler.lir.LIRInstructionClass; +import jdk.graal.compiler.lir.gen.ArithmeticLIRGeneratorTool; +import jdk.graal.compiler.lir.phases.LIRPhase; +import jdk.graal.compiler.nodes.Invoke; +import jdk.graal.compiler.nodes.NamedLocationIdentity; +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.graphbuilderconf.InvocationPlugins; +import jdk.graal.compiler.nodes.spi.NodeLIRBuilderTool; +import jdk.graal.compiler.options.OptionValues; +import jdk.graal.compiler.phases.BasePhase; +import jdk.graal.compiler.phases.common.CanonicalizerPhase; +import jdk.graal.compiler.phases.common.inlining.info.elem.InlineableGraph; +import jdk.graal.compiler.phases.common.inlining.walker.ComputeInliningRelevance; +import jdk.graal.compiler.phases.tiers.HighTierContext; +import jdk.graal.compiler.printer.NoDeadCodeVerifyHandler; +import jdk.graal.compiler.replacements.nodes.BinaryMathIntrinsicNode; +import jdk.graal.compiler.replacements.nodes.UnaryMathIntrinsicNode; +import jdk.graal.compiler.serviceprovider.GlobalAtomicLong; import jdk.vm.ci.code.TargetDescription; import jdk.vm.ci.meta.ResolvedJavaMethod; @@ -235,7 +235,7 @@ class GlobalAtomicLongAddressProvider implements FieldValueTransformer { @Override public Object transform(Object receiver, Object originalValue) { long initialValue = ((GlobalAtomicLong) receiver).getInitialValue(); - return CGlobalDataFactory.createWord((Pointer) WordFactory.unsigned(initialValue), null, true); + return CGlobalDataFactory.createWord(WordFactory.unsigned(initialValue), null, true); } } diff --git a/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java b/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java index 1a0f857332a1..e1e51480dac9 100644 --- a/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java +++ b/substratevm/src/com.oracle.svm.hosted/src/com/oracle/svm/hosted/code/CEntryPointCallStubMethod.java @@ -28,24 +28,6 @@ import java.util.Arrays; import java.util.Iterator; -import jdk.graal.compiler.core.common.calc.FloatConvert; -import jdk.graal.compiler.core.common.type.StampFactory; -import jdk.graal.compiler.debug.DebugContext; -import jdk.graal.compiler.graph.NodeSourcePosition; -import jdk.graal.compiler.nodes.CallTargetNode.InvokeKind; -import jdk.graal.compiler.nodes.ConstantNode; -import jdk.graal.compiler.nodes.DeadEndNode; -import jdk.graal.compiler.nodes.FrameState; -import jdk.graal.compiler.nodes.InvokeWithExceptionNode; -import jdk.graal.compiler.nodes.ParameterNode; -import jdk.graal.compiler.nodes.StructuredGraph; -import jdk.graal.compiler.nodes.ValueNode; -import jdk.graal.compiler.nodes.calc.FloatConvertNode; -import jdk.graal.compiler.nodes.calc.IntegerEqualsNode; -import jdk.graal.compiler.nodes.calc.SignExtendNode; -import jdk.graal.compiler.nodes.calc.ZeroExtendNode; -import jdk.graal.compiler.nodes.extended.BranchProbabilityNode; -import jdk.graal.compiler.nodes.java.ExceptionObjectNode; import org.graalvm.nativeimage.Isolate; import org.graalvm.nativeimage.IsolateThread; import org.graalvm.nativeimage.c.constant.CEnum; @@ -58,6 +40,7 @@ import com.oracle.graal.pointsto.infrastructure.WrappedJavaMethod; import com.oracle.graal.pointsto.meta.AnalysisMetaAccess; import com.oracle.graal.pointsto.meta.AnalysisMethod; +import com.oracle.graal.pointsto.meta.AnalysisType; import com.oracle.graal.pointsto.meta.HostedProviders; import com.oracle.svm.core.SubstrateUtil; import com.oracle.svm.core.Uninterruptible; @@ -81,6 +64,24 @@ import com.oracle.svm.hosted.phases.CInterfaceEnumTool; import com.oracle.svm.hosted.phases.HostedGraphKit; +import jdk.graal.compiler.core.common.calc.FloatConvert; +import jdk.graal.compiler.core.common.type.StampFactory; +import jdk.graal.compiler.debug.DebugContext; +import jdk.graal.compiler.graph.NodeSourcePosition; +import jdk.graal.compiler.nodes.ConstantNode; +import jdk.graal.compiler.nodes.DeadEndNode; +import jdk.graal.compiler.nodes.FrameState; +import jdk.graal.compiler.nodes.InvokeWithExceptionNode; +import jdk.graal.compiler.nodes.ParameterNode; +import jdk.graal.compiler.nodes.StructuredGraph; +import jdk.graal.compiler.nodes.ValueNode; +import jdk.graal.compiler.nodes.CallTargetNode.InvokeKind; +import jdk.graal.compiler.nodes.calc.FloatConvertNode; +import jdk.graal.compiler.nodes.calc.IntegerEqualsNode; +import jdk.graal.compiler.nodes.calc.SignExtendNode; +import jdk.graal.compiler.nodes.calc.ZeroExtendNode; +import jdk.graal.compiler.nodes.extended.BranchProbabilityNode; +import jdk.graal.compiler.nodes.java.ExceptionObjectNode; import jdk.vm.ci.code.BytecodeFrame; import jdk.vm.ci.meta.ConstantPool; import jdk.vm.ci.meta.JavaKind; @@ -88,25 +89,52 @@ import jdk.vm.ci.meta.MetaAccessProvider; import jdk.vm.ci.meta.ResolvedJavaMethod; import jdk.vm.ci.meta.ResolvedJavaType; +import jdk.vm.ci.meta.Signature; public final class CEntryPointCallStubMethod extends EntryPointCallStubMethod { static CEntryPointCallStubMethod create(AnalysisMethod targetMethod, CEntryPointData entryPointData, AnalysisMetaAccess metaAccess) { - ResolvedJavaMethod unwrappedMethod = targetMethod.getWrapped(); MetaAccessProvider unwrappedMetaAccess = metaAccess.getWrapped(); ResolvedJavaType declaringClass = unwrappedMetaAccess.lookupJavaType(IsolateEnterStub.class); ConstantPool constantPool = IsolateEnterStub.getConstantPool(unwrappedMetaAccess); - return new CEntryPointCallStubMethod(entryPointData, unwrappedMethod, declaringClass, constantPool); + return new CEntryPointCallStubMethod(entryPointData, targetMethod, declaringClass, constantPool, metaAccess.getUniverse().getWordKind(), unwrappedMetaAccess); } private static final JavaKind cEnumParameterKind = JavaKind.Int; private final CEntryPointData entryPointData; private final ResolvedJavaMethod targetMethod; + private final Signature targetSignature; - private CEntryPointCallStubMethod(CEntryPointData entryPointData, ResolvedJavaMethod targetMethod, ResolvedJavaType holderClass, ConstantPool holderConstantPool) { - super(SubstrateUtil.uniqueStubName(targetMethod), holderClass, targetMethod.getSignature(), holderConstantPool); + private CEntryPointCallStubMethod(CEntryPointData entryPointData, AnalysisMethod targetMethod, ResolvedJavaType holderClass, ConstantPool holderConstantPool, JavaKind wordKind, + MetaAccessProvider metaAccess) { + super(SubstrateUtil.uniqueStubName(targetMethod.getWrapped()), holderClass, createSignature(targetMethod, wordKind, metaAccess), holderConstantPool); this.entryPointData = entryPointData; - this.targetMethod = targetMethod; + this.targetMethod = targetMethod.getWrapped(); + this.targetSignature = targetMethod.getSignature(); + } + + /** + * This method creates a new signature for the stub in which all @CEnum values are converted + * into their corresponding primitive type. In correspondence to how the @CEnum values are + * actually handled, parameters are transformed to the type specified by cEnumParameterKind and + * return type is transformed into the word type. + * + * @see CEnum + * @see CEntryPointCallStubMethod#adaptParameterTypes(HostedProviders, NativeLibraries, + * HostedGraphKit, JavaType[], JavaType[]) + * @see CEntryPointCallStubMethod#adaptReturnValue(ResolvedJavaMethod, HostedProviders, Purpose, + * HostedGraphKit, ValueNode) + */ + private static SimpleSignature createSignature(AnalysisMethod targetMethod, JavaKind wordKind, MetaAccessProvider metaAccess) { + JavaType[] paramTypes = Arrays.stream(targetMethod.toParameterTypes()) + .map(it -> ((AnalysisType) it)) + .map(type -> type.getAnnotation(CEnum.class) != null ? metaAccess.lookupJavaType(cEnumParameterKind.toJavaClass()) : type.getWrapped()) + .toArray(JavaType[]::new); + ResolvedJavaType returnType = ((AnalysisType) targetMethod.getSignature().getReturnType(null)).getWrapped(); + if (returnType.getAnnotation(CEnum.class) != null) { + returnType = metaAccess.lookupJavaType(wordKind.toJavaClass()); + } + return new SimpleSignature(paramTypes, returnType); } @Override @@ -145,7 +173,7 @@ public StructuredGraph buildGraph(DebugContext debug, ResolvedJavaMethod method, NativeLibraries nativeLibraries = CEntryPointCallStubSupport.singleton().getNativeLibraries(); HostedGraphKit kit = new HostedGraphKit(debug, providers, method, purpose); - JavaType[] parameterTypes = method.toParameterTypes(); + JavaType[] parameterTypes = targetSignature.toParameterTypes(null); JavaType[] parameterLoadTypes = Arrays.copyOf(parameterTypes, parameterTypes.length); EnumInfo[] parameterEnumInfos; @@ -336,8 +364,8 @@ private EnumInfo[] adaptParameterTypes(HostedProviders providers, NativeLibrarie assert !matchingNodes.hasNext() && parameterNode.usages().filter(n -> n != initialState).isEmpty(); parameterNode.setStamp(StampFactory.forKind(cEnumParameterKind)); } else { - throw UserError.abort("Entry point method parameter types are restricted to primitive types, word types and enumerations (@%s): %s", - CEnum.class.getSimpleName(), targetMethod); + throw UserError.abort("Entry point method parameter types are restricted to primitive types, word types and enumerations (@%s): %s, given type was %s", + CEnum.class.getSimpleName(), targetMethod, parameterTypes[i]); } } } @@ -550,7 +578,7 @@ private ValueNode adaptReturnValue(ResolvedJavaMethod method, HostedProviders pr if (returnValue.getStackKind().isPrimitive()) { return returnValue; } - JavaType returnType = method.getSignature().getReturnType(null); + JavaType returnType = targetSignature.getReturnType(null); NativeLibraries nativeLibraries = CEntryPointCallStubSupport.singleton().getNativeLibraries(); ElementInfo typeInfo = nativeLibraries.findElementInfo((ResolvedJavaType) returnType); if (typeInfo instanceof EnumInfo) { diff --git a/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java b/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java index 5bc549275818..97cf84abcff3 100644 --- a/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java +++ b/substratevm/src/com.oracle.svm.truffle/src/com/oracle/svm/truffle/TruffleBaseFeature.java @@ -488,6 +488,39 @@ public void beforeAnalysis(BeforeAnalysisAccess access) { Class frameClass = config.findClassByName("com.oracle.truffle.api.impl.FrameWithoutBoxing"); config.registerFieldValueTransformer(config.findField(frameClass, "ASSERTIONS_ENABLED"), new AssertionStatusFieldTransformer(frameClass)); + registerInternalResourceFieldValueTransformers(config); + } + + private static void registerInternalResourceFieldValueTransformers(BeforeAnalysisAccessImpl config) { + Class internalResourceCacheClass = config.findClassByName("com.oracle.truffle.polyglot.InternalResourceCache"); + Class internalResourceRootsClass = config.findClassByName("com.oracle.truffle.polyglot.InternalResourceRoots"); + Class resetableCacheRootClass = config.findClassByName("com.oracle.truffle.polyglot.InternalResourceCache$ResettableCachedRoot"); + Field cacheRootField = ReflectionUtil.lookupField(true, internalResourceCacheClass, "cacheRoot"); + if (cacheRootField != null) { + // graalvm-23.1.0 + assert internalResourceRootsClass == null; + config.registerFieldValueTransformer(cacheRootField, ResetFieldValueTransformer.INSTANCE); + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, resetableCacheRootClass, "resourceCacheRoot"), ResetFieldValueTransformer.INSTANCE); + } else { + // graalvm-24.0 + assert resetableCacheRootClass == null; + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, internalResourceCacheClass, "owningRoot"), ResetFieldValueTransformer.INSTANCE); + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, internalResourceCacheClass, "path"), ResetFieldValueTransformer.INSTANCE); + config.registerFieldValueTransformer(ReflectionUtil.lookupField(false, internalResourceRootsClass, "roots"), ResetFieldValueTransformer.INSTANCE); + } + } + + private static final class ResetFieldValueTransformer implements FieldValueTransformer { + + private static final FieldValueTransformer INSTANCE = new ResetFieldValueTransformer(); + + private ResetFieldValueTransformer() { + } + + @Override + public Object transform(Object receiver, Object originalValue) { + return null; + } } private static class AssertionStatusFieldTransformer implements FieldValueTransformer { @@ -1371,14 +1404,6 @@ final class Target_com_oracle_truffle_polyglot_LanguageCache { @TargetClass(className = "com.oracle.truffle.polyglot.InternalResourceCache", onlyWith = TruffleBaseFeature.IsEnabled.class) final class Target_com_oracle_truffle_polyglot_InternalResourceCache { - /* - * The field cannot be reset from the #afterAnalysis(). The reset comes too late for the - * String-must-not-contain-the-home-directory verification in DisallowedImageHeapObjectFeature, - * so we do the implicit reset using a substitution. - */ - @Alias @RecomputeFieldValue(kind = Kind.Reset) // - private static volatile Pair cacheRoot; - @Alias @RecomputeFieldValue(kind = Kind.Custom, declClass = UseInternalResourcesComputer.class, isFinal = true) // private static boolean useInternalResources; @@ -1395,18 +1420,6 @@ public Object transform(Object receiver, Object originalValue) { } } -@TargetClass(className = "com.oracle.truffle.polyglot.InternalResourceCache$ResettableCachedRoot", onlyWith = TruffleBaseFeature.IsEnabled.class) -final class Target_com_oracle_truffle_polyglot_InternalResourceCache_ResettableCachedRoot { - - /* - * The field cannot be reset from the #afterAnalysis(). The reset comes too late for the - * String-must-not-contain-the-home-directory verification in DisallowedImageHeapObjectFeature, - * so we do the implicit reset using a substitution. - */ - @Alias @RecomputeFieldValue(kind = Kind.Reset) // - private volatile Path resourceCacheRoot; -} - @TargetClass(className = "com.oracle.truffle.object.CoreLocations$DynamicObjectFieldLocation", onlyWith = TruffleBaseFeature.IsEnabled.class) final class Target_com_oracle_truffle_object_CoreLocations_DynamicObjectFieldLocation { @Alias @RecomputeFieldValue(kind = Kind.AtomicFieldUpdaterOffset) // diff --git a/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java b/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java index 5dd1a0121266..d4e98ef846c2 100644 --- a/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java +++ b/truffle/src/com.oracle.truffle.api.instrumentation/src/com/oracle/truffle/api/instrumentation/TruffleInstrument.java @@ -1076,6 +1076,8 @@ public Object getScope(LanguageInfo language) { * unpacking would be repeated once per operating system user. When the language was * compiled using native-image internal resources are unpacked at native-image compile time * and stored relative to the native-image. + *

+ * The caller thread must be entered in a context. * * @param resource the resource class to load * @throws IllegalArgumentException if {@code resource} is not associated with this diff --git a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java index e566880d5e45..c70818ba6abd 100644 --- a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java +++ b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/ContextPreInitializationTest.java @@ -2612,10 +2612,10 @@ private static Consumer newResourceBuildTimeVerifier(List file try { TruffleFile root = env.getInternalResource(ContextPreInitializationResource.class); assertNotNull(root); - assertFalse(root.isAbsolute()); + assertTrue(root.isAbsolute()); TruffleFile resource = root.resolve(ContextPreInitializationResource.FILE_NAME); assertNotNull(resource); - assertFalse(resource.isAbsolute()); + assertTrue(resource.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(resource.readAllBytes(), StandardCharsets.UTF_8)); files.add(resource); } catch (IOException ioe) { @@ -2628,16 +2628,16 @@ private static Consumer newResourceExecutionTimeVerifier(List return (env) -> { try { TruffleFile file1 = files.get(0); - assertFalse(file1.isAbsolute()); + assertTrue(file1.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(file1.readAllBytes(), StandardCharsets.UTF_8)); ContextPreInitializationResource.unpackCount = 0; TruffleFile root = env.getInternalResource(ContextPreInitializationResource.class); assertNotNull(root); - assertFalse(root.isAbsolute()); + assertTrue(root.isAbsolute()); assertEquals(0, ContextPreInitializationResource.unpackCount); TruffleFile file2 = root.resolve(ContextPreInitializationResource.FILE_NAME); assertNotNull(file2); - assertFalse(file2.isAbsolute()); + assertTrue(file2.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(file2.readAllBytes(), StandardCharsets.UTF_8)); assertEquals(file1, file2); assertEquals(file1.getAbsoluteFile(), file2.getAbsoluteFile()); @@ -2696,14 +2696,14 @@ public void testInstrumentInternalResources() throws Exception { Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); setPatchable(FIRST); AtomicReference rootRef = new AtomicReference<>(); - ContextPreInitializationFirstInstrument.actions = Collections.singletonMap("onCreate", (e) -> { + ContextPreInitializationFirstInstrument.actions = Collections.singletonMap("onContextCreated", (e) -> { try { TruffleFile root = e.env.getInternalResource(ContextPreInitializationResource.class); assertNotNull(root); - assertFalse(root.isAbsolute()); + assertTrue(root.isAbsolute()); TruffleFile resource = root.resolve(ContextPreInitializationResource.FILE_NAME); assertNotNull(resource); - assertFalse(resource.isAbsolute()); + assertTrue(resource.isAbsolute()); assertEquals(ContextPreInitializationResource.FILE_CONTENT, new String(resource.readAllBytes(), StandardCharsets.UTF_8)); rootRef.set(root); } catch (IOException ioe) { @@ -2739,7 +2739,7 @@ public void testOverriddenCacheRoot() throws Exception { Path overriddenCacheRoot = Files.createTempDirectory(null).toRealPath(); Engine.copyResources(overriddenCacheRoot, FIRST); System.setProperty("polyglot.engine.resourcePath", overriddenCacheRoot.toRealPath().toString()); - TemporaryResourceCacheRoot.reset(false); + TemporaryResourceCacheRoot.reset(true); try { BaseLanguage.registerAction(ContextPreInitializationTestFirstLanguage.class, ActionKind.ON_PATCH_CONTEXT, newResourceExecutionTimeVerifier(files, overriddenCacheRoot.toString())); diff --git a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java index a71a3e1065ca..32adfe0011b4 100644 --- a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java +++ b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/FileSystemsTest.java @@ -276,7 +276,7 @@ public static void createConfigurations() throws IOException, ReflectiveOperatio cfgs.put(MEMORY_FILE_SYSTEM, new Configuration(MEMORY_FILE_SYSTEM, ctx, memDir, fileSystem, false, true, true, true)); // Memory with language home - fileSystem = FileSystem.allowLanguageHomeAccess(new MemoryFileSystem()); + fileSystem = FileSystem.allowInternalResourceAccess(new MemoryFileSystem()); memDir = mkdirs(fileSystem.toAbsolutePath(fileSystem.parsePath("work")), fileSystem); fileSystem.setCurrentWorkingDirectory(memDir); createContent(memDir, fileSystem); @@ -286,7 +286,7 @@ public static void createConfigurations() throws IOException, ReflectiveOperatio if (TruffleTestAssumptions.isNoClassLoaderEncapsulation()) { // setCwd not supported // Memory with language home - in language home - fileSystem = FileSystem.allowLanguageHomeAccess(new MemoryFileSystem()); + fileSystem = FileSystem.allowInternalResourceAccess(new MemoryFileSystem()); memDir = mkdirs(fileSystem.toAbsolutePath(fileSystem.parsePath("work")), fileSystem); fileSystem.setCurrentWorkingDirectory(memDir); privateDir = createContent(memDir, fileSystem); diff --git a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java index 09724bcd9d1e..e442baec8844 100644 --- a/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java +++ b/truffle/src/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/polyglot/InternalResourceTest.java @@ -70,7 +70,6 @@ import org.graalvm.nativeimage.ImageInfo; import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Engine; -import org.graalvm.polyglot.io.IOAccess; import org.junit.Assert; import org.junit.Assume; import org.junit.BeforeClass; @@ -85,7 +84,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static com.oracle.truffle.api.test.polyglot.AbstractPolyglotTest.assertFails; @@ -279,103 +277,122 @@ public static class TestAccessFileOutsideOfResourceRoot extends AbstractExecutab @TruffleBoundary @SuppressWarnings("try") protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { - TruffleFile hostFolder = env.createTempDirectory(null, getClass().getSimpleName()); + TruffleFile hostFolder = env.getInternalTruffleFile((String) contextArguments[0]); try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { - // Relative paths - TruffleFile lib = env.getInternalResource(LibraryResource.class); - assertNull(lib.getParent()); - assertNoFileAccess(lib.resolve(".."), hostFolder); // Absolute paths - TruffleFile absoluteLibParent = lib.getAbsoluteFile().getParent(); - assertNotNull(absoluteLibParent); - assertNoFileAccess(absoluteLibParent, hostFolder); + TruffleFile lib = env.getInternalResource(LibraryResource.class); + assertTrue(lib.isAbsolute()); + TruffleFile outsideCacheFolder = getParentTransitive(lib, 4); + assertNotNull(outsideCacheFolder); + assertNoFileAccess(outsideCacheFolder, hostFolder); // Combine absolute paths with relative paths to escape from internal resource root - absoluteLibParent = lib.getAbsoluteFile().resolve(".."); - assertNoFileAccess(absoluteLibParent, hostFolder); - absoluteLibParent = lib.getAbsoluteFile().resolve("prefix").resolve("..").resolve(".."); - assertNoFileAccess(absoluteLibParent, hostFolder); + outsideCacheFolder = resolveParentTransitive(lib, 4); + assertNoFileAccess(outsideCacheFolder, hostFolder); + outsideCacheFolder = resolveParentTransitive(lib.resolve("prefix"), 5); + assertNoFileAccess(outsideCacheFolder, hostFolder); // Try to access other resource files TruffleFile src = env.getInternalResource(SourcesResource.class); - TruffleFile srcResolvedUsingLib = lib.resolve(src.getAbsoluteFile().toString()); - assertNoFileAccess(srcResolvedUsingLib, hostFolder); + assertTrue(src.isAbsolute()); + TruffleFile srcResolvedUsingLib = lib.resolve(src.toString()); + // With the shared filesystem the access to other resource cache dir is allowed. + assertTrue(srcResolvedUsingLib.isDirectory()); return null; - } finally { - delete(hostFolder); } } - private static void assertNoFileAccess(TruffleFile file, TruffleFile hostFolder) { - assertSecurityException(() -> file.resolve("fooDir").createDirectory()); - assertSecurityException(() -> file.resolve("fooDir").createDirectories()); - assertSecurityException(() -> file.resolve("fooFile").createFile()); - assertSecurityException(file::exists); - assertSecurityException(file::isDirectory); - assertSecurityException(file::isRegularFile); - assertSecurityException(file::isSymbolicLink); - assertSecurityException(file::isReadable); - assertSecurityException(file::isExecutable); - assertSecurityException(file::size); - assertFalse(file.isWritable()); - assertSecurityException(() -> file.isSameFile(file.resolveSibling("other"))); - assertSecurityException(() -> file.getAttribute(TruffleFile.CREATION_TIME)); - assertSecurityException(() -> file.getAttributes(List.of(TruffleFile.CREATION_TIME))); - assertSecurityException(file::getCreationTime); - assertSecurityException(file::getLastAccessTime); - assertSecurityException(file::getLastModifiedTime); - assertSecurityException(() -> file.setAttribute(TruffleFile.CREATION_TIME, FileTime.from(Instant.now()))); - assertSecurityException(() -> file.setCreationTime(FileTime.from(Instant.now()))); - assertSecurityException(() -> file.setLastAccessTime(FileTime.from(Instant.now()))); - assertSecurityException(() -> file.setLastModifiedTime(FileTime.from(Instant.now()))); - assertSecurityException(file::list); - assertSecurityException(() -> file.visit(new FileVisitor<>() { - @Override - public FileVisitResult preVisitDirectory(TruffleFile dir, BasicFileAttributes attrs) { - return FileVisitResult.CONTINUE; + private static TruffleFile getParentTransitive(TruffleFile file, int times) { + TruffleFile res = file; + for (int i = 0; i < times; i++) { + res = res.getParent(); + if (res == null) { + throw new IllegalArgumentException("File " + file.getAbsoluteFile() + " has not enough path components to go up " + times + " times."); } + } + return res; + } - @Override - public FileVisitResult visitFile(TruffleFile f, BasicFileAttributes attrs) { - return FileVisitResult.CONTINUE; - } + private static TruffleFile resolveParentTransitive(TruffleFile file, int times) { + TruffleFile res = file; + for (int i = 0; i < times; i++) { + res = res.resolve(".."); + } + return res; + } + } + + private static void assertNoFileAccess(TruffleFile file, TruffleFile hostFolder) { + assertSecurityException(() -> file.resolve("fooDir").createDirectory()); + assertSecurityException(() -> file.resolve("fooDir").createDirectories()); + assertSecurityException(() -> file.resolve("fooFile").createFile()); + assertSecurityException(file::exists); + assertSecurityException(file::isDirectory); + assertSecurityException(file::isRegularFile); + assertSecurityException(file::isSymbolicLink); + assertSecurityException(file::isReadable); + assertSecurityException(file::isExecutable); + assertSecurityException(file::size); + assertSecurityException(file::isWritable); + assertSecurityException(() -> file.getAttribute(TruffleFile.CREATION_TIME)); + assertSecurityException(() -> file.getAttributes(List.of(TruffleFile.CREATION_TIME))); + assertSecurityException(file::getCreationTime); + assertSecurityException(file::getLastAccessTime); + assertSecurityException(file::getLastModifiedTime); + assertSecurityException(() -> file.setAttribute(TruffleFile.CREATION_TIME, FileTime.from(Instant.now()))); + assertSecurityException(() -> file.setCreationTime(FileTime.from(Instant.now()))); + assertSecurityException(() -> file.setLastAccessTime(FileTime.from(Instant.now()))); + assertSecurityException(() -> file.setLastModifiedTime(FileTime.from(Instant.now()))); + assertSecurityException(file::list); + assertSecurityException(() -> file.visit(new FileVisitor<>() { + @Override + public FileVisitResult preVisitDirectory(TruffleFile dir, BasicFileAttributes attrs) { + return FileVisitResult.CONTINUE; + } - @Override - public FileVisitResult visitFileFailed(TruffleFile f, IOException exc) { - return FileVisitResult.CONTINUE; - } + @Override + public FileVisitResult visitFile(TruffleFile f, BasicFileAttributes attrs) { + return FileVisitResult.CONTINUE; + } - @Override - public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { - return FileVisitResult.CONTINUE; - } - }, 1)); - assertSecurityException(file::newBufferedReader); - assertSecurityException(file::newBufferedWriter); - assertSecurityException(file::newInputStream); - assertSecurityException(file::newOutputStream); - assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.WRITE, StandardOpenOption.CREATE))); - assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.READ))); - assertSecurityException(file::readAllBytes); - assertSecurityException(file::newDirectoryStream); - assertSecurityException(file::delete); - assertSecurityException(() -> file.copy(hostFolder.resolve("cp"))); - assertSecurityException(() -> file.move(hostFolder.resolve("mv"))); - if (OSUtils.isUnix()) { - assertSecurityException(file::getOwner); - assertSecurityException(file::getGroup); - assertSecurityException(file::getPosixPermissions); - assertSecurityException(() -> file.setPosixPermissions(Set.of())); - assertSecurityException(() -> file.createLink(file.resolveSibling("ln"))); - assertSecurityException(() -> file.createSymbolicLink(file.resolveSibling("lns"))); - assertSecurityException(file::readSymbolicLink); + @Override + public FileVisitResult visitFileFailed(TruffleFile f, IOException exc) { + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { + return FileVisitResult.CONTINUE; } + }, 1)); + assertSecurityException(file::newBufferedReader); + assertSecurityException(file::newBufferedWriter); + assertSecurityException(file::newInputStream); + assertSecurityException(file::newOutputStream); + assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.WRITE, StandardOpenOption.CREATE))); + assertSecurityException(() -> file.newByteChannel(Set.of(StandardOpenOption.READ))); + assertSecurityException(file::readAllBytes); + assertSecurityException(file::newDirectoryStream); + assertSecurityException(file::delete); + assertSecurityException(() -> file.copy(hostFolder.resolve("cp"))); + assertSecurityException(() -> file.move(hostFolder.resolve("mv"))); + if (OSUtils.isUnix()) { + assertSecurityException(file::getOwner); + assertSecurityException(file::getGroup); + assertSecurityException(file::getPosixPermissions); + assertSecurityException(() -> file.setPosixPermissions(Set.of())); + assertSecurityException(() -> file.createLink(file.normalize().resolveSibling("ln"))); + assertSecurityException(() -> file.createSymbolicLink(file.normalize().resolveSibling("lns"))); + assertSecurityException(file::readSymbolicLink); } } @Test - public void testAccessFileOutsideOfResourceRoot() { + public void testAccessFileOutsideOfResourceRoot() throws IOException { Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); - try (Context context = Context.newBuilder().allowIO(IOAccess.ALL).build()) { - AbstractExecutableTestLanguage.execute(context, TestAccessFileOutsideOfResourceRoot.class); + Path hostFolder = Files.createTempDirectory("test").toAbsolutePath(); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestAccessFileOutsideOfResourceRoot.class, hostFolder.toString()); + } finally { + delete(hostFolder); } } @@ -386,9 +403,10 @@ public static class TestAccessFileInResourceRoot extends AbstractExecutableTestL @TruffleBoundary @SuppressWarnings("try") protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { - TruffleFile hostFolder = env.createTempDirectory(null, getClass().getSimpleName()); + TruffleFile hostFolder = env.getInternalTruffleFile((String) contextArguments[0]); try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { TruffleFile root = env.getInternalResource(FileAccessCheckResource.class); + assertTrue(root.isAbsolute()); TruffleFile file = root.resolve(FileAccessCheckResource.fileName); TruffleFile folder = root.resolve(FileAccessCheckResource.folderName); TruffleFile linkTarget = root.resolve(FileAccessCheckResource.linkTargetName); @@ -457,7 +475,7 @@ public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { assertTrue(link.isSymbolicLink()); assertSecurityException(() -> file.createLink(file.resolveSibling("ln"))); assertSecurityException(() -> file.createSymbolicLink(file.resolveSibling("lns"))); - assertEquals(linkTarget, link.readSymbolicLink()); + assertEquals(linkTarget.getName(), link.readSymbolicLink().getPath()); } assertSecurityException(file::delete); if (OSUtils.isUnix()) { @@ -470,8 +488,6 @@ public FileVisitResult postVisitDirectory(TruffleFile dir, IOException exc) { assertSecurityException(() -> file.move(file.resolveSibling("mv"))); assertSecurityException(() -> file.move(hostFolder.resolve("mv"))); return null; - } finally { - delete(hostFolder); } } @@ -491,10 +507,13 @@ private static String readContent(BufferedReader r) throws IOException { } @Test - public void testAccessFileInResourceRoot() { + public void testAccessFileInResourceRoot() throws IOException { Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); - try (Context context = Context.newBuilder().allowIO(IOAccess.ALL).build()) { - AbstractExecutableTestLanguage.execute(context, TestAccessFileInResourceRoot.class); + Path hostFolder = Files.createTempDirectory("test").toAbsolutePath(); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestAccessFileInResourceRoot.class, hostFolder.toString()); + } finally { + delete(hostFolder); } } @@ -532,7 +551,7 @@ public void testOverriddenResourceRoot() throws Exception { Path cacheRoot3 = Files.createTempDirectory(null); Engine.copyResources(cacheRoot3, TestUtils.getDefaultLanguageId(TestOverriddenResourceRoot.class)); // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); try { // Set explicit resource cache root @@ -543,7 +562,7 @@ public void testOverriddenResourceRoot() throws Exception { AbstractExecutableTestLanguage.execute(context, TestOverriddenResourceRoot.class, libPath, strPath); } finally { // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); } // Set explicit component (language, instrument) cache root @@ -554,7 +573,7 @@ public void testOverriddenResourceRoot() throws Exception { AbstractExecutableTestLanguage.execute(context, TestOverriddenResourceRoot.class, libPath, strPath); } finally { // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); } // Set explicit component resource cache root @@ -566,7 +585,7 @@ public void testOverriddenResourceRoot() throws Exception { AbstractExecutableTestLanguage.execute(context, TestOverriddenResourceRoot.class, libPath, strPath); } finally { // Reset cached resource root - TemporaryResourceCacheRoot.setTestCacheRoot(null, true); + TemporaryResourceCacheRoot.setTestCacheRoot(null, false); } } finally { // Clean explicit resource root @@ -819,6 +838,85 @@ public void testOptionalResources() { } } + @Registration(/* ... */internalResources = SourcesResource.class) + public static class TestGetInternalTruffleFile extends AbstractExecutableTestLanguage { + + @Override + @TruffleBoundary + @SuppressWarnings("try") + protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { + try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { + TruffleFile srcRoot = env.getInternalResource(SourcesResource.ID); + verifyResources(srcRoot, SourcesResource.RESOURCES); + TruffleFile srcRootAsInternalTruffleFile = env.getInternalTruffleFile(srcRoot.getPath()); + verifyResources(srcRootAsInternalTruffleFile, SourcesResource.RESOURCES); + return ""; + } + } + } + + @Test + public void testGetInternalTruffleFile() { + Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestGetInternalTruffleFile.class); + } + } + + @Registration(/* ... */internalResources = SourcesResource.class) + public static class TestGetPublicTruffleFile extends AbstractExecutableTestLanguage { + + @Override + @TruffleBoundary + @SuppressWarnings("try") + protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { + try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { + TruffleFile srcRoot = env.getInternalResource(SourcesResource.ID); + verifyResources(srcRoot, SourcesResource.RESOURCES); + TruffleFile srcRootAsPublicTruffleFile = env.getPublicTruffleFile(srcRoot.getPath()); + assertNoFileAccess(srcRootAsPublicTruffleFile, srcRootAsPublicTruffleFile.resolveSibling("other")); + for (String resource : SourcesResource.RESOURCES) { + assertFails(() -> srcRootAsPublicTruffleFile.resolve(resource).readAllBytes(), SecurityException.class); + assertFails(() -> env.getPublicTruffleFile(srcRoot.resolve(resource).getPath()).readAllBytes(), SecurityException.class); + } + return ""; + } + } + } + + @Test + public void testGetPublicTruffleFile() { + Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestGetPublicTruffleFile.class); + } + } + + @Registration(/* ... */internalResources = SourcesResource.class) + public static class TestGetTruffleFileInternal extends AbstractExecutableTestLanguage { + + @Override + @TruffleBoundary + @SuppressWarnings("try") + protected Object execute(RootNode node, Env env, Object[] contextArguments, Object[] frameArguments) throws Exception { + try (TemporaryResourceCacheRoot cache = new TemporaryResourceCacheRoot()) { + TruffleFile srcRoot = env.getInternalResource(SourcesResource.ID); + verifyResources(srcRoot, SourcesResource.RESOURCES); + TruffleFile srcRootAsInternalTruffleFile = env.getTruffleFileInternal(srcRoot.getPath(), (f) -> true); + verifyResources(srcRootAsInternalTruffleFile, SourcesResource.RESOURCES); + return ""; + } + } + } + + @Test + public void testGetTruffleFileInternal() { + Assume.assumeFalse("Cannot run as native unittest", ImageInfo.inImageRuntimeCode()); + try (Context context = Context.create()) { + AbstractExecutableTestLanguage.execute(context, TestGetTruffleFileInternal.class); + } + } + private static boolean hasResource(Path folder, Class language, Class resource) { return hasResource(folder, TestUtils.getDefaultLanguageId(language), resource); } @@ -836,13 +934,15 @@ private static void assertSecurityException(TruffleFileAction action) { }, SecurityException.class); } - private static void delete(TruffleFile file) throws IOException { - if (file.isDirectory()) { - for (TruffleFile child : file.list()) { - delete(child); + private static void delete(Path file) throws IOException { + if (Files.isDirectory(file)) { + try (DirectoryStream children = Files.newDirectoryStream(file)) { + for (Path child : children) { + delete(child); + } } } - file.delete(); + Files.delete(file); } @FunctionalInterface @@ -853,21 +953,19 @@ interface TruffleFileAction { static final class TemporaryResourceCacheRoot implements AutoCloseable { private final Path root; - private final boolean disposeResourceFileSystemOnClose; TemporaryResourceCacheRoot() throws IOException { - this(true); + this(false); } - TemporaryResourceCacheRoot(boolean disposeResourceFileSystemOnClose) throws IOException { - this(Files.createTempDirectory(null), disposeResourceFileSystemOnClose); + TemporaryResourceCacheRoot(boolean nativeImageRuntime) throws IOException { + this(Files.createTempDirectory(null), nativeImageRuntime); } - TemporaryResourceCacheRoot(Path cacheRoot, boolean disposeResourceFileSystemOnClose) throws IOException { + TemporaryResourceCacheRoot(Path cacheRoot, boolean nativeImageRuntime) throws IOException { try { root = cacheRoot.toRealPath(); - this.disposeResourceFileSystemOnClose = disposeResourceFileSystemOnClose; - setTestCacheRoot(root, false); + setTestCacheRoot(root, nativeImageRuntime); } catch (ClassNotFoundException e) { throw new AssertionError("Failed to set cache root.", e); } @@ -880,7 +978,7 @@ Path getRoot() { @Override public void close() { try { - setTestCacheRoot(null, disposeResourceFileSystemOnClose); + setTestCacheRoot(null, false); delete(root); } catch (IOException | ClassNotFoundException e) { throw new AssertionError("Failed to reset cache root.", e); @@ -898,13 +996,13 @@ private static void delete(Path path) throws IOException { Files.delete(path); } - static void reset(boolean disposeResourceFileSystem) throws ClassNotFoundException { - setTestCacheRoot(null, disposeResourceFileSystem); + static void reset(boolean nativeImageRuntime) throws ClassNotFoundException { + setTestCacheRoot(null, nativeImageRuntime); } - private static void setTestCacheRoot(Path root, boolean disposeResourceFileSystem) throws ClassNotFoundException { - Class internalResourceCacheClass = Class.forName("com.oracle.truffle.polyglot.InternalResourceCache"); - ReflectionUtils.invokeStatic(internalResourceCacheClass, "setTestCacheRoot", new Class[]{Path.class, boolean.class}, root, disposeResourceFileSystem); + private static void setTestCacheRoot(Path root, boolean nativeImageRuntime) throws ClassNotFoundException { + Class internalResourceCacheClass = Class.forName("com.oracle.truffle.polyglot.InternalResourceRoots"); + ReflectionUtils.invokeStatic(internalResourceCacheClass, "setTestCacheRoot", new Class[]{Path.class, boolean.class}, root, nativeImageRuntime); } } } diff --git a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java index 32713f74b9f3..e8cf7fc5c357 100644 --- a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java +++ b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleFile.java @@ -2068,8 +2068,7 @@ public interface FileTypeDetector { static final class FileSystemContext { - // instance of PolyglotLanguageContext, PolyglotEngineImpl or - // PolyglotImpl.EmbedderFileSystemContext + // Instance of PolyglotLanguageContext or PolyglotImpl.EmbedderFileSystemContext final Object engineObject; private volatile Map> fileTypeDetectors; diff --git a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java index 46f197a74b74..0f90bcf606f4 100644 --- a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java +++ b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/TruffleLanguage.java @@ -3056,7 +3056,7 @@ private

TruffleFile getTruffleFileInternalImpl(P path, Predicate optionKey); - public abstract String getRelativePathInLanguageHome(TruffleFile truffleFile); - - public abstract TruffleFile relativizeToInternalResourceCache(TruffleFile truffleFile); + public abstract String getRelativePathInResourceRoot(TruffleFile truffleFile); public abstract void onSourceCreated(Source source); @@ -760,6 +758,8 @@ public abstract Iterator mergeHostGuestFrames(Object polyglotEngine, S public abstract TruffleFile getInternalResource(Object owner, String resourceId) throws IOException; + public abstract Path getEngineResource(Object polyglotEngine, String resourceId) throws IOException; + public abstract Collection getResourceIds(String componentId); public abstract void setIsolatePolyglot(AbstractPolyglotImpl instance); diff --git a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java index 09914686b560..5361cb9472e7 100644 --- a/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java +++ b/truffle/src/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java @@ -1088,18 +1088,12 @@ static Source buildSource(String language, Object origin, String name, String pa useContent = enforceInterfaceContracts(useContent); String relativePathInLanguageHome = null; if (useTruffleFile != null) { - TruffleFile relativeFileInResourceCache = SourceAccessor.ACCESSOR.engineSupport().relativizeToInternalResourceCache(useTruffleFile); - if (relativeFileInResourceCache != null) { - useTruffleFile = relativeFileInResourceCache; - relativePathInLanguageHome = relativeFileInResourceCache.getPath(); - } else { - /* - * The relativePathInLanguageHome has to be calculated also for Sources created in - * the image execution time. They have to have the same hash code as sources created - * during the context pre-initialization. - */ - relativePathInLanguageHome = SourceAccessor.ACCESSOR.engineSupport().getRelativePathInLanguageHome(useTruffleFile); - } + /* + * The relativePathInLanguageHome has to be calculated also for Sources created in the + * image execution time. They have to have the same hash code as sources created during + * the context pre-initialization. + */ + relativePathInLanguageHome = SourceAccessor.ACCESSOR.engineSupport().getRelativePathInResourceRoot(useTruffleFile); if (relativePathInLanguageHome != null) { Object fsEngineObject = SourceAccessor.ACCESSOR.languageSupport().getFileSystemEngineObject(SourceAccessor.ACCESSOR.languageSupport().getFileSystemContext(useTruffleFile)); if (SourceAccessor.ACCESSOR.engineSupport().inContextPreInitialization(fsEngineObject)) { diff --git a/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java b/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java index ebefda284276..d20d1c406beb 100644 --- a/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java +++ b/truffle/src/com.oracle.truffle.object/src/com/oracle/truffle/object/CoreAllocator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -60,6 +60,8 @@ import com.oracle.truffle.object.CoreLocations.TypedLocation; import com.oracle.truffle.object.CoreLocations.ValueLocation; +import sun.misc.Unsafe; + @SuppressWarnings("deprecation") class CoreAllocator extends ShapeImpl.BaseAllocator { @@ -130,7 +132,8 @@ protected Location newIntLocation(boolean useFinal) { if (com.oracle.truffle.object.ObjectStorageOptions.InObjectFields && primitiveFieldSize + getLayout().getLongFieldSize() <= getLayout().getPrimitiveFieldCount()) { return advance(new IntLocationDecorator(getLayout().getPrimitiveFieldLocation(primitiveFieldSize))); } else if (getLayout().hasPrimitiveExtensionArray()) { - return advance(new IntLocationDecorator(new LongArrayLocation(primitiveArraySize))); + int alignedIndex = alignArrayIndex(primitiveArraySize, Long.BYTES); + return advance(new IntLocationDecorator(new LongArrayLocation(alignedIndex))); } } return newObjectLocation(useFinal, true); @@ -146,7 +149,8 @@ Location newDoubleLocation(boolean useFinal, boolean allowedIntToDouble) { if (com.oracle.truffle.object.ObjectStorageOptions.InObjectFields && primitiveFieldSize + getLayout().getLongFieldSize() <= getLayout().getPrimitiveFieldCount()) { return advance(new DoubleLocationDecorator(getLayout().getPrimitiveFieldLocation(primitiveFieldSize), allowedIntToDouble)); } else if (getLayout().hasPrimitiveExtensionArray()) { - return advance(new DoubleLocationDecorator(new LongArrayLocation(primitiveArraySize), allowedIntToDouble)); + int alignedIndex = alignArrayIndex(primitiveArraySize, Long.BYTES); + return advance(new DoubleLocationDecorator(new LongArrayLocation(alignedIndex), allowedIntToDouble)); } } return newObjectLocation(useFinal, true); @@ -162,7 +166,8 @@ Location newLongLocation(boolean useFinal, boolean allowedIntToLong) { if (com.oracle.truffle.object.ObjectStorageOptions.InObjectFields && primitiveFieldSize + getLayout().getLongFieldSize() <= getLayout().getPrimitiveFieldCount()) { return advance((Location) CoreLocations.createLongLocation(getLayout().getPrimitiveFieldLocation(primitiveFieldSize), allowedIntToLong)); } else if (getLayout().hasPrimitiveExtensionArray()) { - return advance(new LongArrayLocation(primitiveArraySize, allowedIntToLong)); + int alignedIndex = alignArrayIndex(primitiveArraySize, Long.BYTES); + return advance(new LongArrayLocation(alignedIndex, allowedIntToLong)); } } return newObjectLocation(useFinal, true); @@ -243,4 +248,25 @@ protected Location locationForValueUpcast(Object value, Location oldLocation, lo } return locationForValue(value, false, value != null); } + + /** + * Adjust index to ensure alignment for slots larger than the array element size, e.g. long and + * double slots in an int[] array. Note that array element 0 is not necessarily 8-byte aligned. + */ + private static int alignArrayIndex(int index, int bytes) { + assert bytes > 0 && (bytes & (bytes - 1)) == 0; + final int baseOffset = Unsafe.ARRAY_INT_BASE_OFFSET; + final int indexScale = Unsafe.ARRAY_INT_INDEX_SCALE; + if (bytes <= indexScale) { + // Always aligned. + return index; + } else { + int misalignment = (baseOffset + indexScale * index) & (bytes - 1); + if (misalignment == 0) { + return index; + } else { + return index + ((bytes - misalignment) / indexScale); + } + } + } } diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java index c39a020bfe65..bef1cf3482ce 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/EngineAccessor.java @@ -715,8 +715,6 @@ public boolean inContextPreInitialization(Object polyglotObject) { if (polyglotObject instanceof PolyglotLanguageContext languageContext) { PolyglotContextImpl polyglotContext = languageContext.context; return polyglotContext.getEngine().inEnginePreInitialization && polyglotContext.parent == null; - } else if (polyglotObject instanceof PolyglotEngineImpl polyglotEngine) { - return polyglotEngine.inEnginePreInitialization; } else if (polyglotObject instanceof EmbedderFileSystemContext) { return false; } else { @@ -1006,7 +1004,7 @@ public TruffleContext createInternalContext(Object sourcePolyglotLanguageContext fileSystemConfig = creatorConfig.fileSystemConfig; } else { FileSystem publicFileSystem = FileSystems.newNoIOFileSystem(); - FileSystem internalFileSystem = PolyglotEngineImpl.ALLOW_IO ? FileSystems.newLanguageHomeFileSystem() : publicFileSystem; + FileSystem internalFileSystem = PolyglotEngineImpl.ALLOW_IO ? FileSystems.newResourcesFileSystem() : publicFileSystem; fileSystemConfig = new FileSystemConfig(api.getIOAccessNone(), publicFileSystem, internalFileSystem); } @@ -1270,8 +1268,6 @@ public boolean isInternal(Object engineObject, FileSystem fs) { public boolean isSocketIOAllowed(Object engineFileSystemContext) { if (engineFileSystemContext instanceof PolyglotLanguageContext languageContext) { return languageContext.getImpl().getIO().hasHostSocketAccess(languageContext.context.config.fileSystemConfig.ioAccess); - } else if (engineFileSystemContext instanceof PolyglotEngineImpl) { - return false; } else if (engineFileSystemContext instanceof EmbedderFileSystemContext) { return true; } else { @@ -1532,25 +1528,8 @@ public String getUnparsedOptionValue(OptionValues optionValues, OptionKey opt } @Override - public String getRelativePathInLanguageHome(TruffleFile truffleFile) { - return FileSystems.getRelativePathInLanguageHome(truffleFile); - } - - @Override - public TruffleFile relativizeToInternalResourceCache(TruffleFile truffleFile) { - FileSystem fs = LANGUAGE.getFileSystem(truffleFile); - if (FileSystems.isInternalResourceFileSystem(fs)) { - if (truffleFile.isAbsolute()) { - Path root = fs.parsePath(FileSystems.getInternalResourceFileSystemRoot(fs).get().toString()); - Path path = LANGUAGE.getPath(truffleFile); - if (path.startsWith(root)) { - return LANGUAGE.getTruffleFile(root.relativize(path), LANGUAGE.getFileSystemContext(truffleFile)); - } - } else { - return truffleFile; - } - } - return null; + public String getRelativePathInResourceRoot(TruffleFile truffleFile) { + return FileSystems.getRelativePathInResourceRoot(truffleFile); } @Override @@ -2077,28 +2056,22 @@ public TruffleFile getInternalResource(Object owner, String resourceId) throws I } private static TruffleFile getInternalResource(Object owner, String resourceId, boolean failIfMissing) throws IOException { - Map cachedRoots; InternalResourceCache resourceCache; String componentId; Supplier> supportedResourceIds; - if (owner instanceof PolyglotLanguageContext languageContext) { - PolyglotLanguage polyglotLanguage = languageContext.language; - cachedRoots = polyglotLanguage.internalResources; - LanguageCache cache = polyglotLanguage.cache; + PolyglotLanguageContext languageContext; + if (owner instanceof PolyglotLanguageContext) { + languageContext = (PolyglotLanguageContext) owner; + LanguageCache cache = languageContext.language.cache; resourceCache = cache.getResourceCache(resourceId); componentId = cache.getId(); supportedResourceIds = cache::getResourceIds; } else if (owner instanceof PolyglotInstrument polyglotInstrument) { - cachedRoots = polyglotInstrument.internalResources; InstrumentCache cache = polyglotInstrument.cache; resourceCache = cache.getResourceCache(resourceId); componentId = cache.getId(); supportedResourceIds = cache::getResourceIds; - } else if (owner instanceof PolyglotEngineImpl) { - cachedRoots = null; - resourceCache = InternalResourceCache.getEngineResource(resourceId); - componentId = PolyglotEngineImpl.ENGINE_ID; - supportedResourceIds = InternalResourceCache::getEngineResourceIds; + languageContext = getPolyglotContext(null).getHostContext(); } else { throw CompilerDirectives.shouldNotReachHere("Unsupported owner " + owner); } @@ -2110,17 +2083,18 @@ private static TruffleFile getInternalResource(Object owner, String resourceId, return null; } } - TruffleFile root = cachedRoots != null ? cachedRoots.get(resourceId) : null; - if (root == null) { - PolyglotEngineImpl polyglotEngine = ((VMObject) owner).getEngine(); - Object fsContext = EngineAccessor.LANGUAGE.createFileSystemContext(polyglotEngine, resourceCache.getResourceFileSystem(polyglotEngine)); - root = EngineAccessor.LANGUAGE.getTruffleFile(".", fsContext); - if (cachedRoots != null) { - var prevValue = cachedRoots.putIfAbsent(resourceId, root); - root = prevValue != null ? prevValue : root; - } + Path rootPath = resourceCache.getPath(languageContext.getEngine()); + return EngineAccessor.LANGUAGE.getTruffleFile(rootPath.toString(), languageContext.getInternalFileSystemContext()); + } + + @Override + public Path getEngineResource(Object polyglotEngine, String resourceId) throws IOException { + InternalResourceCache resourceCache = InternalResourceCache.getEngineResource(resourceId); + if (resourceCache != null) { + return resourceCache.getPath((PolyglotEngineImpl) polyglotEngine); + } else { + return null; } - return root; } @Override diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java index aeaaab66ba15..ae70ea767f62 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/FileSystems.java @@ -104,8 +104,8 @@ static FileSystem newNIOFileSystem(java.nio.file.FileSystem fileSystem) { return new NIOFileSystem(fileSystem, null, false); } - static FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return new LanguageHomeFileSystem(newDefaultFileSystem(null), fileSystem); + static FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return new ResourcesFileSystem(newDefaultFileSystem(null), fileSystem); } static FileSystem newReadOnlyFileSystem(FileSystem fileSystem) { @@ -116,9 +116,9 @@ static FileSystem newNoIOFileSystem() { return new DeniedIOFileSystem(); } - static FileSystem newLanguageHomeFileSystem() { + static FileSystem newResourcesFileSystem() { FileSystem defaultFS = newDefaultFileSystem(null); - return new LanguageHomeFileSystem(new ReadOnlyFileSystem(defaultFS), new PathOperationsOnlyFileSystem(defaultFS)); + return new ResourcesFileSystem(new ReadOnlyFileSystem(defaultFS), new PathOperationsOnlyFileSystem(defaultFS)); } static boolean hasNoAccess(FileSystem fileSystem) { @@ -137,11 +137,18 @@ static Supplier>> return new FileTypeDetectorsSupplier(languageCaches); } - static String getRelativePathInLanguageHome(TruffleFile file) { + static String getRelativePathInResourceRoot(TruffleFile file) { Object engineObject = EngineAccessor.LANGUAGE.getFileSystemEngineObject(EngineAccessor.LANGUAGE.getFileSystemContext(file)); if (engineObject instanceof PolyglotLanguageContext languageContext) { - FileSystem fs = EngineAccessor.LANGUAGE.getFileSystem(file); Path path = EngineAccessor.LANGUAGE.getPath(file); + if (InternalResourceCache.usesInternalResources()) { + Path hostPath = toHostPath(path); + InternalResourceCache cache = InternalResourceRoots.findInternalResource(hostPath); + if (cache != null) { + return cache.getPathOrNull().relativize(hostPath).toString(); + } + } + FileSystem fs = EngineAccessor.LANGUAGE.getFileSystem(file); String result = relativizeToLanguageHome(fs, path, languageContext.language); if (result != null) { return result; @@ -149,7 +156,7 @@ static String getRelativePathInLanguageHome(TruffleFile file) { Map accessibleLanguages = languageContext.getAccessibleLanguages(true); /* * The accessibleLanguages is null for a closed context. The - * getRelativePathInLanguageHome may be called even for closed context by the compiler + * getRelativePathInResourceRoot may be called even for closed context by the compiler * thread. */ if (accessibleLanguages != null) { @@ -162,9 +169,6 @@ static String getRelativePathInLanguageHome(TruffleFile file) { } } return null; - } else if (engineObject instanceof PolyglotEngineImpl) { - // instrument internal resources are never relative to language homes - return null; } else if (engineObject instanceof EmbedderFileSystemContext) { // embedding sources are never relative to language homes return null; @@ -173,19 +177,11 @@ static String getRelativePathInLanguageHome(TruffleFile file) { } } - static FileSystem newInternalResourceFileSystem(Supplier rootSupplier) { - return newReadOnlyFileSystem(new InternalResourceFileSystem(rootSupplier)); - } - - static boolean isInternalResourceFileSystem(FileSystem fileSystem) { - return (fileSystem instanceof ReadOnlyFileSystem readOnlyFileSystem) && readOnlyFileSystem.delegateFileSystem instanceof InternalResourceFileSystem; - } - - static Supplier getInternalResourceFileSystemRoot(FileSystem fileSystem) { - if (isInternalResourceFileSystem(fileSystem)) { - return ((InternalResourceFileSystem) ((ReadOnlyFileSystem) fileSystem).delegateFileSystem).rootSupplier; + private static Path toHostPath(Path path) { + if (path.getClass() != Path.of("").getClass()) { + return Paths.get(path.toString()); } else { - throw new IllegalArgumentException(Objects.toString(fileSystem)); + return path; } } @@ -678,17 +674,7 @@ private Path resolve(FileSystem fs) { if (current instanceof Path) { return (Path) current; } else if (current instanceof ImageHeapPath) { - ImageHeapPath imageHeapPath = (ImageHeapPath) current; - String languageId = imageHeapPath.languageId; - String path = imageHeapPath.path; - Path result; - String newLanguageHome; - if (languageId != null && (newLanguageHome = LanguageCache.languages().get(languageId).getLanguageHome()) != null) { - result = fs.parsePath(newLanguageHome).resolve(path); - } else { - result = fs.parsePath(path); - } - return result; + return ((ImageHeapPath) current).resolve(fs); } else { throw new IllegalStateException("Unknown delegate " + current); } @@ -696,15 +682,25 @@ private Path resolve(FileSystem fs) { void onPreInitializeContextEnd(Map languageHomes) { Path internalPath = (Path) delegatePath; - String languageId = null; - for (Map.Entry e : languageHomes.entrySet()) { - if (internalPath.startsWith(e.getValue())) { - internalPath = e.getValue().relativize(internalPath); - languageId = e.getKey(); - break; + ImageHeapPath result = null; + InternalResourceCache owner = InternalResourceRoots.findInternalResource(internalPath); + if (owner != null) { + String relativePath = owner.getPathOrNull().relativize(internalPath).toString(); + result = new InternalResourceImageHeapPath(owner, relativePath); + } + if (result == null) { + for (Map.Entry e : languageHomes.entrySet()) { + if (internalPath.startsWith(e.getValue())) { + String languageId = e.getKey(); + String relativePath = e.getValue().relativize(internalPath).toString(); + result = new LanguageHomeImageHeapPath(languageId, relativePath); + } } } - delegatePath = new ImageHeapPath(languageId, internalPath.toString(), internalPath.isAbsolute()); + if (result == null) { + result = new PathImageHeapPath(internalPath.toString(), internalPath.isAbsolute()); + } + delegatePath = result; } @Override @@ -726,9 +722,7 @@ public String toString() { // TruffleFiles created during context pre-initialization. if (delegate == INVALID_FILESYSTEM) { ImageHeapPath imageHeapPath = (ImageHeapPath) delegatePath; - if (imageHeapPath.languageId != null) { - throw new UnsupportedOperationException("ToString in the image heap form is supported only for files outside language homes."); - } + assert imageHeapPath instanceof PathImageHeapPath : "ToString can be called only for non internal resource files located outside of language homes."; return imageHeapPath.path; } else { return super.toString(); @@ -758,10 +752,61 @@ public URI getReinitializedURI() { } } - private record ImageHeapPath(String languageId, String path, boolean absolute) { + private abstract static class ImageHeapPath { + + final String path; + final boolean absolute; + + ImageHeapPath(String path, boolean absolute) { + this.path = Objects.requireNonNull(path, "Path must be non-null"); + this.absolute = absolute; + } + + abstract Path resolve(FileSystem fileSystem); + + } + + private static final class LanguageHomeImageHeapPath extends ImageHeapPath { + + private final String languageId; + + LanguageHomeImageHeapPath(String languageId, String path) { + super(path, false); + this.languageId = Objects.requireNonNull(languageId, "LanguageId must be non-null"); + } + + @Override + Path resolve(FileSystem fileSystem) { + String newLanguageHome = LanguageCache.languages().get(languageId).getLanguageHome(); + assert newLanguageHome != null : "Pre-initialized language " + languageId + " must exist in the image execution time."; + return fileSystem.parsePath(newLanguageHome).resolve(path); + } + } + + private static final class InternalResourceImageHeapPath extends ImageHeapPath { + + private final InternalResourceCache cache; - private ImageHeapPath { - assert path != null; + InternalResourceImageHeapPath(InternalResourceCache cache, String path) { + super(path, false); + this.cache = cache; + } + + @Override + Path resolve(FileSystem fileSystem) { + return fileSystem.parsePath(cache.getPathOrNull().toString()).resolve(path); + } + } + + private static final class PathImageHeapPath extends ImageHeapPath { + + PathImageHeapPath(String path, boolean absolute) { + super(path, absolute); + } + + @Override + Path resolve(FileSystem fileSystem) { + return fileSystem.parsePath(path); } } } @@ -1183,24 +1228,24 @@ public boolean isSameFile(Path path1, Path path2, LinkOption... options) throws } } - private static final class LanguageHomeFileSystem implements PolyglotFileSystem { + private static final class ResourcesFileSystem implements PolyglotFileSystem { - private final FileSystem languageHomeFileSystem; + private final FileSystem resourcesFileSystem; private final FileSystem delegateFileSystem; private volatile Set languageHomes; - LanguageHomeFileSystem(FileSystem languageHomeFileSystem, FileSystem delegateFileSystem) { - this.languageHomeFileSystem = languageHomeFileSystem; + ResourcesFileSystem(FileSystem resourcesFileSystem, FileSystem delegateFileSystem) { + this.resourcesFileSystem = resourcesFileSystem; this.delegateFileSystem = delegateFileSystem; - Class languageHomeFileSystemPathType = this.languageHomeFileSystem.parsePath("").getClass(); + Class resourcesFileSystemPathType = this.resourcesFileSystem.parsePath("").getClass(); Class customFileSystemPathType = delegateFileSystem.parsePath("").getClass(); - if (languageHomeFileSystemPathType != customFileSystemPathType) { + if (resourcesFileSystemPathType != customFileSystemPathType) { throw new IllegalArgumentException("Given FileSystem must have the same Path type as the default FileSystem."); } - if (!languageHomeFileSystem.getSeparator().equals(delegateFileSystem.getSeparator())) { + if (!resourcesFileSystem.getSeparator().equals(delegateFileSystem.getSeparator())) { throw new IllegalArgumentException("Given FileSystem must use the same separator character as the default FileSystem."); } - if (!languageHomeFileSystem.getPathSeparator().equals(delegateFileSystem.getPathSeparator())) { + if (!resourcesFileSystem.getPathSeparator().equals(delegateFileSystem.getPathSeparator())) { throw new IllegalArgumentException("Given FileSystem must use the same path separator character as the default FileSystem."); } } @@ -1233,8 +1278,8 @@ public Path parsePath(String path) { @Override public void checkAccess(Path path, Set modes, LinkOption... linkOptions) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.checkAccess(absolutePath, modes, linkOptions); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.checkAccess(absolutePath, modes, linkOptions); } else { delegateFileSystem.checkAccess(path, modes, linkOptions); } @@ -1243,8 +1288,8 @@ public void checkAccess(Path path, Set modes, LinkOption.. @Override public void createDirectory(Path dir, FileAttribute... attrs) throws IOException { Path absolutePath = toNormalizedAbsolutePath(dir); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.createDirectory(absolutePath, attrs); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.createDirectory(absolutePath, attrs); } else { delegateFileSystem.createDirectory(dir, attrs); } @@ -1253,8 +1298,8 @@ public void createDirectory(Path dir, FileAttribute... attrs) throws IOExcept @Override public void delete(Path path) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.delete(absolutePath); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.delete(absolutePath); } else { delegateFileSystem.delete(path); } @@ -1263,8 +1308,8 @@ public void delete(Path path) throws IOException { @Override public SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.newByteChannel(absolutePath, options, attrs); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.newByteChannel(absolutePath, options, attrs); } else { return delegateFileSystem.newByteChannel(path, options, attrs); } @@ -1273,8 +1318,8 @@ public SeekableByteChannel newByteChannel(Path path, Set o @Override public DirectoryStream newDirectoryStream(Path dir, DirectoryStream.Filter filter) throws IOException { Path absolutePath = toNormalizedAbsolutePath(dir); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.newDirectoryStream(absolutePath, filter); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.newDirectoryStream(absolutePath, filter); } else { return delegateFileSystem.newDirectoryStream(dir, filter); } @@ -1288,8 +1333,8 @@ public Path toAbsolutePath(Path path) { @Override public Path toRealPath(Path path, LinkOption... linkOptions) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.toRealPath(path); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.toRealPath(path); } else { return delegateFileSystem.toRealPath(path); } @@ -1298,8 +1343,8 @@ public Path toRealPath(Path path, LinkOption... linkOptions) throws IOException @Override public Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.readAttributes(absolutePath, attributes, options); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.readAttributes(absolutePath, attributes, options); } else { return delegateFileSystem.readAttributes(path, attributes, options); } @@ -1308,8 +1353,8 @@ public Map readAttributes(Path path, String attributes, LinkOpti @Override public void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - languageHomeFileSystem.setAttribute(absolutePath, attribute, value, options); + if (inResourceRoot(absolutePath)) { + resourcesFileSystem.setAttribute(absolutePath, attribute, value, options); } else { delegateFileSystem.setAttribute(path, attribute, value, options); } @@ -1319,10 +1364,10 @@ public void setAttribute(Path path, String attribute, Object value, LinkOption.. public void createLink(Path link, Path existing) throws IOException { Path absoluteLink = toNormalizedAbsolutePath(link); Path absoluteExisting = toNormalizedAbsolutePath(existing); - boolean linkInHome = inLanguageHome(absoluteLink); - boolean existingInHome = inLanguageHome(absoluteExisting); + boolean linkInHome = inResourceRoot(absoluteLink); + boolean existingInHome = inResourceRoot(absoluteExisting); if (linkInHome && existingInHome) { - languageHomeFileSystem.createLink(absoluteLink, absoluteExisting); + resourcesFileSystem.createLink(absoluteLink, absoluteExisting); } else if (!linkInHome && !existingInHome) { delegateFileSystem.createLink(link, existing); } else { @@ -1334,10 +1379,10 @@ public void createLink(Path link, Path existing) throws IOException { public void createSymbolicLink(Path link, Path target, FileAttribute... attrs) throws IOException { Path absoluteLink = toNormalizedAbsolutePath(link); Path absoluteTarget = toNormalizedAbsolutePath(target); - boolean linkInHome = inLanguageHome(absoluteLink); - boolean targetInHome = inLanguageHome(absoluteTarget); + boolean linkInHome = inResourceRoot(absoluteLink); + boolean targetInHome = inResourceRoot(absoluteTarget); if (linkInHome && targetInHome) { - languageHomeFileSystem.createSymbolicLink(absoluteLink, target); + resourcesFileSystem.createSymbolicLink(absoluteLink, target); } else if (!linkInHome && !targetInHome) { delegateFileSystem.createSymbolicLink(link, target); } else { @@ -1348,8 +1393,8 @@ public void createSymbolicLink(Path link, Path target, FileAttribute... attrs @Override public Path readSymbolicLink(Path link) throws IOException { Path absolutePath = toNormalizedAbsolutePath(link); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.readSymbolicLink(absolutePath); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.readSymbolicLink(absolutePath); } else { return delegateFileSystem.readSymbolicLink(link); } @@ -1357,7 +1402,7 @@ public Path readSymbolicLink(Path link) throws IOException { @Override public void setCurrentWorkingDirectory(Path currentWorkingDirectory) { - languageHomeFileSystem.setCurrentWorkingDirectory(currentWorkingDirectory); + resourcesFileSystem.setCurrentWorkingDirectory(currentWorkingDirectory); delegateFileSystem.setCurrentWorkingDirectory(currentWorkingDirectory); } @@ -1374,8 +1419,8 @@ public String getPathSeparator() { @Override public String getMimeType(Path path) { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.getMimeType(absolutePath); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.getMimeType(absolutePath); } else { return delegateFileSystem.getMimeType(path); } @@ -1384,8 +1429,8 @@ public String getMimeType(Path path) { @Override public Charset getEncoding(Path path) { Path absolutePath = toNormalizedAbsolutePath(path); - if (inLanguageHome(absolutePath)) { - return languageHomeFileSystem.getEncoding(absolutePath); + if (inResourceRoot(absolutePath)) { + return resourcesFileSystem.getEncoding(absolutePath); } else { return delegateFileSystem.getEncoding(path); } @@ -1400,10 +1445,10 @@ public Path getTempDirectory() { public boolean isSameFile(Path path1, Path path2, LinkOption... options) throws IOException { Path absolutePath1 = toNormalizedAbsolutePath(path1); Path absolutePath2 = toNormalizedAbsolutePath(path2); - boolean path1InHome = inLanguageHome(absolutePath1); - boolean path2InHome = inLanguageHome(absolutePath2); + boolean path1InHome = inResourceRoot(absolutePath1); + boolean path2InHome = inResourceRoot(absolutePath2); if (path1InHome && path2InHome) { - return languageHomeFileSystem.isSameFile(absolutePath1, absolutePath2, options); + return resourcesFileSystem.isSameFile(absolutePath1, absolutePath2, options); } else if (!path1InHome && !path2InHome) { return delegateFileSystem.isSameFile(path1, path2); } else { @@ -1415,7 +1460,7 @@ private Path toNormalizedAbsolutePath(Path path) { if (path.isAbsolute()) { return path; } - Path absolutePath = languageHomeFileSystem.toAbsolutePath(path); + Path absolutePath = resourcesFileSystem.toAbsolutePath(path); if (isNormalized(path)) { return absolutePath; } else { @@ -1441,10 +1486,13 @@ private static boolean isNormalized(Path path) { return true; } - private boolean inLanguageHome(final Path path) { + private boolean inResourceRoot(final Path path) { if (!(path.isAbsolute() && isNormalized(path))) { throw new IllegalArgumentException("The path must be normalized absolute path."); } + if (InternalResourceRoots.findRoot(path) != null) { + return true; + } for (Path home : getLanguageHomes()) { if (path.startsWith(home)) { return true; @@ -1776,249 +1824,6 @@ public Map> get() { } } - private static final class InternalResourceFileSystem implements PolyglotFileSystem { - - private final FileSystem delegate; - private final Supplier rootSupplier; - - InternalResourceFileSystem(Supplier rootSupplier) { - Objects.requireNonNull(rootSupplier, "The rootSupplier must be non-null."); - this.delegate = newDefaultFileSystem(null); - this.rootSupplier = rootSupplier; - } - - @Override - public Path parsePath(URI uri) { - throw new UnsupportedOperationException(); - } - - @Override - public Path parsePath(String path) { - return new InternalResourcePath(delegate.parsePath(path)); - } - - @Override - public void checkAccess(Path path, Set modes, LinkOption... linkOptions) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - delegate.checkAccess(normalized, modes, linkOptions); - } - - @Override - public void createDirectory(Path dir, FileAttribute... attrs) throws IOException { - Path normalized = InternalResourcePath.as(dir).resolveDelegateAbsolutePath(); - delegate.createDirectory(normalized, attrs); - } - - @Override - public void delete(Path path) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - delegate.delete(normalized); - } - - @Override - public SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - return delegate.newByteChannel(normalized, options, attrs); - } - - @Override - public DirectoryStream newDirectoryStream(Path dir, DirectoryStream.Filter filter) throws IOException { - InternalResourcePath castedPath = InternalResourcePath.as(dir); - Path normalized = castedPath.resolveDelegateAbsolutePath(); - DirectoryStream delegateStream = delegate.newDirectoryStream(normalized, filter); - return new DirectoryStream<>() { - @Override - public Iterator iterator() { - return new ForwardingPath.ForwardingPathIterator<>(delegateStream.iterator(), castedPath::wrap); - } - - @Override - public void close() throws IOException { - delegateStream.close(); - } - }; - } - - @Override - public Path toAbsolutePath(Path path) { - return path.toAbsolutePath(); - } - - @Override - public Path toRealPath(Path path, LinkOption... linkOptions) throws IOException { - return path.toRealPath(linkOptions); - } - - @Override - public Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - return delegate.readAttributes(normalized, attributes, options); - } - - @Override - public void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { - Path normalized = InternalResourcePath.as(path).resolveDelegateAbsolutePath(); - delegate.setAttribute(normalized, attribute, value, options); - } - - @Override - public void createLink(Path link, Path existing) throws IOException { - Path normalizedLink = InternalResourcePath.as(link).resolveDelegateAbsolutePath(); - Path normalizedExisting = InternalResourcePath.as(existing).resolveDelegateAbsolutePath(); - delegate.createLink(normalizedLink, normalizedExisting); - } - - @Override - public void createSymbolicLink(Path link, Path target, FileAttribute... attrs) throws IOException { - Path normalizedLink = InternalResourcePath.as(link).resolveDelegateAbsolutePath(); - Path normalizedTarget = InternalResourcePath.as(target).resolveDelegateAbsolutePath(); - delegate.createSymbolicLink(normalizedLink, normalizedTarget, attrs); - } - - @Override - public Path readSymbolicLink(Path link) throws IOException { - InternalResourcePath castedPath = InternalResourcePath.as(link); - Path normalizedLink = castedPath.resolveDelegateAbsolutePath(); - InternalResourcePath result = castedPath.wrap(delegate.readSymbolicLink(normalizedLink)); - // Ensure that the link does not point outside the internal resource root. - result.resolveDelegateAbsolutePath(); - return result; - } - - @Override - public String getSeparator() { - return delegate.getSeparator(); - } - - @Override - public String getPathSeparator() { - return delegate.getPathSeparator(); - } - - @Override - public boolean isSameFile(Path path1, Path path2, LinkOption... options) { - Path normalized1 = InternalResourcePath.as(path1).resolveDelegateAbsolutePath(); - Path normalized2 = InternalResourcePath.as(path2).resolveDelegateAbsolutePath(); - return normalized1.equals(normalized2); - } - - @Override - public boolean isInternal(AbstractPolyglotImpl polyglot) { - return true; - } - - @Override - public boolean hasNoAccess() { - return false; - } - - @Override - public boolean isHost() { - return false; - } - - private final class InternalResourcePath extends ForwardingPath implements ResetablePath { - - private final Path delegate; - - private InternalResourcePath(Path delegate) { - this.delegate = delegate; - } - - @Override - InternalResourcePath wrap(Path path) { - return path == null ? null : new InternalResourcePath(path); - } - - @Override - Path unwrap() { - return delegate; - } - - static InternalResourcePath as(Path path) { - return (InternalResourcePath) path; - } - - @Override - public Path resolve(Path other) { - if (isRelativeResourceRoot()) { - return other; - } else { - return super.resolve(other); - } - } - - @Override - public Path resolve(String other) { - if (isRelativeResourceRoot()) { - return wrap(delegate.getFileSystem().getPath(other)); - } else { - return super.resolve(other); - } - } - - @Override - public Path toAbsolutePath() { - if (isAbsolute()) { - return this; - } else { - Path root = rootSupplier.get(); - Path resolvedAbsolute = isRelativeResourceRoot() ? root : root.resolve(delegate); - return wrap(resolvedAbsolute); - } - } - - @Override - public Path toRealPath(LinkOption... options) throws IOException { - return wrap(resolveDelegateAbsolutePath().toRealPath(options)); - } - - @Override - public URI toUri() { - if (delegate.isAbsolute()) { - return super.toUri(); - } else { - return toAbsolutePath().toUri(); - } - } - - /** - * Returns the absolute normalized default file system path. If the path after - * normalization escaped the internal resource root it throws {@link SecurityException}. - */ - Path resolveDelegateAbsolutePath() { - Path root = rootSupplier.get(); - Path absolutePath = delegate.isAbsolute() ? delegate : root.resolve(delegate); - absolutePath = absolutePath.normalize(); - if (!absolutePath.startsWith(root)) { - throw new SecurityException(delegate.toString()); - } - return absolutePath; - } - - boolean isRelativeResourceRoot() { - if (!delegate.isAbsolute() && delegate.getNameCount() == 1) { - Path name = delegate.getFileName(); - if (name == null) { - throw CompilerDirectives.shouldNotReachHere("Path has a name component but has no file name " + delegate); - } - return ".".equals(name.toString()); - } - return false; - } - - @Override - public String getReinitializedPath() { - return toAbsolutePath().toString(); - } - - @Override - public URI getReinitializedURI() { - return toUri(); - } - } - } - private interface PolyglotFileSystem extends FileSystem { boolean isInternal(AbstractPolyglotImpl polyglot); diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java index 2b0f3d8b2e4d..b83588d19099 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InstrumentCache.java @@ -294,6 +294,10 @@ Collection getResourceIds() { return internalResources.keySet(); } + Collection getResources() { + return internalResources.values(); + } + String getWebsite() { return website; } diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java index d386ba36be4a..b3a22bbc4cdf 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceCache.java @@ -46,27 +46,21 @@ import com.oracle.truffle.api.TruffleOptions; import com.oracle.truffle.api.provider.InternalResourceProvider; import com.oracle.truffle.polyglot.EngineAccessor.AbstractClassLoaderSupplier; -import org.graalvm.collections.Pair; import org.graalvm.nativeimage.ImageInfo; import org.graalvm.nativeimage.ImageSingletons; import org.graalvm.nativeimage.IsolateThread; -import org.graalvm.nativeimage.ProcessProperties; import org.graalvm.nativeimage.c.function.CEntryPoint; import org.graalvm.nativeimage.c.function.CEntryPointLiteral; import org.graalvm.nativeimage.c.function.CFunctionPointer; -import org.graalvm.polyglot.io.FileSystem; -import java.io.IOError; import java.io.IOException; -import java.io.PrintStream; import java.lang.reflect.Constructor; import java.net.URL; import java.nio.file.DirectoryStream; import java.nio.file.FileAlreadyExistsException; +import java.nio.file.FileSystemException; import java.nio.file.Files; -import java.nio.file.InvalidPathException; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.security.CodeSource; import java.util.Collection; @@ -79,8 +73,6 @@ import java.util.ServiceLoader; import java.util.Set; import java.util.TreeSet; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import java.util.function.BooleanSupplier; import java.util.function.Function; import java.util.function.Supplier; @@ -91,20 +83,30 @@ final class InternalResourceCache { private static final char[] FILE_SYSTEM_SPECIAL_CHARACTERS = {'/', '\\', ':'}; - private static final String OVERRIDDEN_CACHE_ROOT = "polyglot.engine.resourcePath"; - private static final String OVERRIDDEN_COMPONENT_ROOT = "polyglot.engine.resourcePath.%s"; - private static final String OVERRIDDEN_RESOURCE_ROOT = "polyglot.engine.resourcePath.%s.%s"; - - private static final Lock unpackLock = new ReentrantLock(); - private static final Map, Map>>> optionalInternalResourcesCaches = new HashMap<>(); private static final Map>> nativeImageCache = TruffleOptions.AOT ? new HashMap<>() : null; - private static volatile Pair cacheRoot; + + /** + * Recomputed before the analyses by a substitution in the {@code TruffleBaseFeature} based on + * the {@code CopyLanguageResources} option value. The field must not be declared as + * {@code final} to make the substitution function correctly. + */ + private static boolean useInternalResources = true; private final String id; private final String resourceId; private final Supplier resourceFactory; - private volatile FileSystem resourceFileSystem; + + /** + * This field is reset to {@code null} by the {@code TruffleBaseFeature} before writing the + * native image heap. + */ + private InternalResourceRoots.Root owningRoot; + /** + * This field is reset to {@code null} by the {@code TruffleBaseFeature} before writing the + * native image heap. + */ + private volatile Path path; InternalResourceCache(String languageId, String resourceId, Supplier resourceFactory) { this.id = Objects.requireNonNull(languageId); @@ -112,8 +114,50 @@ final class InternalResourceCache { this.resourceFactory = Objects.requireNonNull(resourceFactory); } - FileSystem getResourceFileSystem(PolyglotEngineImpl polyglotEngine) throws IOException { - return getResourceFileSystemImpl((resource) -> EngineAccessor.LANGUAGE.createInternalResourceEnv(resource, () -> polyglotEngine.inEnginePreInitialization)); + String getResourceId() { + return resourceId; + } + + Path getPathOrNull() { + return path; + } + + Path getPath(PolyglotEngineImpl polyglotEngine) throws IOException { + if (usesInternalResources()) { + Path result = path; + if (result == null) { + synchronized (this) { + result = path; + if (result == null) { + result = installResource((resource) -> EngineAccessor.LANGUAGE.createInternalResourceEnv(resource, () -> polyglotEngine.inEnginePreInitialization)); + path = result; + } + } + } + return result; + } else { + throw new IllegalArgumentException("Internal resources are restricted. To enable them, use '-H:+CopyLanguageResources' during the native image build."); + } + } + + void initializeOwningRoot(InternalResourceRoots.Root root) { + assert owningRoot == null; + assert path == null; + owningRoot = root; + path = switch (root.kind()) { + case RESOURCE -> root.path(); + case COMPONENT -> root.path().resolve(sanitize(resourceId)); + case UNVERSIONED -> findStandaloneResourceRoot(root.path()); + case VERSIONED -> null; + }; + } + + /** + * Resets state for unit test execution. This method is intended only for testing. + */ + void clearCache() { + owningRoot = null; + path = null; } /** @@ -125,13 +169,17 @@ FileSystem getResourceFileSystem(PolyglotEngineImpl polyglotEngine) throws IOExc */ static Path installRuntimeResource(InternalResource resource) throws IOException { InternalResourceCache cache = createRuntimeResourceCache(resource); - return cache.getResourceFileSystemImpl(InternalResourceCache::createInternalResourceEnvReflectively).parsePath("").toAbsolutePath(); + synchronized (cache) { + return cache.installResource(InternalResourceCache::createInternalResourceEnvReflectively); + } } private static InternalResourceCache createRuntimeResourceCache(InternalResource resource) { InternalResource.Id id = resource.getClass().getAnnotation(InternalResource.Id.class); assert id != null : resource.getClass() + " must be annotated by @InternalResource.Id"; - return new InternalResourceCache(PolyglotEngineImpl.ENGINE_ID, id.value(), () -> resource); + InternalResourceCache cache = new InternalResourceCache(PolyglotEngineImpl.ENGINE_ID, id.value(), () -> resource); + InternalResourceRoots.initializeRuntimeResource(cache); + return cache; } private static InternalResource.Env createInternalResourceEnvReflectively(InternalResource resource) { @@ -144,62 +192,43 @@ private static InternalResource.Env createInternalResourceEnvReflectively(Intern } } - private FileSystem getResourceFileSystemImpl(Function createEnv) throws IOException { - FileSystem result = resourceFileSystem; - if (result == null) { - synchronized (this) { - result = resourceFileSystem; - if (result == null) { - Path root = findOverriddenResourceRoot(); - if (root == null) { - if (hasExplicitCacheRoot()) { - root = findStandaloneResourceRoot(getExplicitCacheRoot()); - } else if (ImageInfo.inImageRuntimeCode()) { - root = findStandaloneResourceRoot(findCacheRootOnNativeImage()); - } else { - InternalResource resource = resourceFactory.get(); - InternalResource.Env env = createEnv.apply(resource); - String versionHash = resource.versionHash(env); - if (versionHash.getBytes().length > 128) { - throw new IOException("The version hash length is restricted to a maximum of 128 bytes."); - } - root = findCacheRootOnHotSpot().resolve(Path.of(sanitize(id), sanitize(resourceId), sanitize(versionHash))); - unpackResourceFiles(root, resource, env); - } - } - ResettableCachedRoot rootSupplier = new ResettableCachedRoot(root); - result = FileSystems.newInternalResourceFileSystem(rootSupplier); - resourceFileSystem = result; - } + private Path installResource(Function resourceEnvProvider) throws IOException { + Objects.requireNonNull(resourceEnvProvider, "ResourceEnvProvider must be non-null."); + assert Thread.holdsLock(this) : "Unpacking must be called under lock"; + assert owningRoot.kind() == InternalResourceRoots.Root.Kind.VERSIONED; + assert !ImageInfo.inImageRuntimeCode() : "Must not be called in the image execution time."; + InternalResource resource = resourceFactory.get(); + InternalResource.Env env = resourceEnvProvider.apply(resource); + String versionHash = resource.versionHash(env); + if (versionHash.getBytes().length > 128) { + throw new IOException("The version hash length is restricted to a maximum of 128 bytes."); + } + Path target = owningRoot.path().resolve(Path.of(sanitize(id), sanitize(resourceId), sanitize(versionHash))); + if (!Files.exists(target)) { + Path parent = target.getParent(); + if (parent == null) { + throw CompilerDirectives.shouldNotReachHere("Target must have a parent directory but was " + target); } - } - return result; - } - - private static void unpackResourceFiles(Path target, InternalResource resource, InternalResource.Env env) throws IOException { - unpackLock.lock(); - try { - if (!Files.exists(target)) { - Path parent = target.getParent(); - if (parent == null) { - throw CompilerDirectives.shouldNotReachHere("Target must have a parent directory but was " + target); - } - Path owner = Files.createDirectories(Objects.requireNonNull(parent)); - Path tmpDir = Files.createTempDirectory(owner, null); - resource.unpackFiles(env, tmpDir); - try { - Files.move(tmpDir, target, StandardCopyOption.ATOMIC_MOVE); - } catch (FileAlreadyExistsException existsException) { - // race with other process that already moved the folder just unlink the tmp - // directory + Path owner = Files.createDirectories(Objects.requireNonNull(parent)); + Path tmpDir = Files.createTempDirectory(owner, null); + resource.unpackFiles(env, tmpDir); + try { + Files.move(tmpDir, target, StandardCopyOption.ATOMIC_MOVE); + } catch (FileAlreadyExistsException existsException) { + // race with other process that already moved the folder just unlink the tmp + // directory + unlink(tmpDir); + } catch (FileSystemException fsException) { + // On some filesystem implementations, the generic FileSystemException is thrown + // instead of FileAlreadyExistsException. We need to check if this is the case. + if (Files.isDirectory(target)) { unlink(tmpDir); } - } else { - verifyResourceRoot(target); } - } finally { - unpackLock.unlock(); + } else { + verifyResourceRoot(target); } + return target; } private static void verifyResourceRoot(Path resourceRoot) throws IOException { @@ -215,18 +244,6 @@ private Path findStandaloneResourceRoot(Path root) { return root.resolve(Path.of(sanitize(id), sanitize(resourceId))); } - private Path findOverriddenResourceRoot() throws IOException { - String value = System.getProperty(String.format(OVERRIDDEN_RESOURCE_ROOT, id, resourceId)); - if (value != null) { - return Paths.get(value).toRealPath(); - } - value = System.getProperty(String.format(OVERRIDDEN_COMPONENT_ROOT, id)); - if (value != null) { - return Paths.get(value).resolve(sanitize(resourceId)).toRealPath(); - } - return null; - } - private static String sanitize(String pathElement) { String result = pathElement; for (char fileSystemsSpecialChar : FILE_SYSTEM_SPECIAL_CHARACTERS) { @@ -235,108 +252,13 @@ private static String sanitize(String pathElement) { return result; } - private static boolean hasExplicitCacheRoot() throws IOException { - Pair res = cacheRoot; - if (res == null) { - String resourcesFolder = System.getProperty(OVERRIDDEN_CACHE_ROOT); - if (resourcesFolder != null) { - Path cache = Paths.get(resourcesFolder).toRealPath(); - res = Pair.create(cache, true); - cacheRoot = res; - } - } - return res != null && res.getRight(); - } - - private static Path getExplicitCacheRoot() { - Pair res = cacheRoot; - if (res == null || !res.getRight()) { - throw CompilerDirectives.shouldNotReachHere("Can be only called when hasExplicitCacheRoot() returned true"); - } - return res.getLeft(); - } - - private static Path findCacheRootOnHotSpot() throws IOException { - Pair res = cacheRoot; - if (res == null) { - String userHomeValue = System.getProperty("user.home"); - if (userHomeValue == null) { - throw CompilerDirectives.shouldNotReachHere("The 'user.home' system property is not set."); - } - Path userHome = Paths.get(userHomeValue); - Path container = switch (InternalResource.OS.getCurrent()) { - case DARWIN -> userHome.resolve(Path.of("Library", "Caches")); - case LINUX -> { - Path userCacheDir = null; - String xdgCacheValue = System.getenv("XDG_CACHE_HOME"); - if (xdgCacheValue != null) { - try { - Path xdgCacheDir = Path.of(xdgCacheValue); - // Do not fail when XDG_CACHE_HOME value is invalid. Fall back to - // $HOME/.cache. - if (xdgCacheDir.isAbsolute()) { - userCacheDir = xdgCacheDir; - } else { - emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not an absolute path. Using the default cache folder '%s'.", userHome.resolve(".cache")); - } - } catch (InvalidPathException notPath) { - emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not a valid path. Using the default cache folder '%s'.", userHome.resolve(".cache")); - } - } - if (userCacheDir == null) { - userCacheDir = userHome.resolve(".cache"); - } - yield userCacheDir; - } - case WINDOWS -> userHome.resolve(Path.of("AppData", "Local")); - }; - Path cache = container.resolve("org.graalvm.polyglot"); - cache = Files.createDirectories(cache).toRealPath(); - res = Pair.create(cache, false); - cacheRoot = res; - } - return res.getLeft(); - } - - private static void emitWarning(String message, Object... args) { - PrintStream out = System.err; - out.printf(message + "%n", args); - } - - private static Path findCacheRootOnNativeImage() { - Pair res = cacheRoot; - if (res == null) { - assert ImageInfo.inImageRuntimeCode() : "Can be called only in the native-image execution time."; - Path executable = getExecutablePath(); - Path cache = executable.resolveSibling("resources"); - res = Pair.create(cache, false); - cacheRoot = res; - } - return res.getLeft(); - } - - private static Path getExecutablePath() { - assert ImageInfo.inImageRuntimeCode(); - if (useInternalResources) { - if (ImageInfo.isExecutable()) { - return Path.of(ProcessProperties.getExecutableName()); - } else if (ImageInfo.isSharedLibrary()) { - return Path.of(ProcessProperties.getObjectFile(InternalResourceCacheSymbol.SYMBOL)); - } else { - throw CompilerDirectives.shouldNotReachHere("Should only be invoked within native image runtime code."); - } - } else { - throw new IllegalArgumentException("Lookup an executable name is restricted. " + - "To enable it, use '-H:+CopyLanguageResources' during the native image build."); - } - } - /** - * Recomputed before the analyses by a substitution in the {@code TruffleBaseFeature} based on - * the {@code CopyLanguageResources} option value. The field must not be declared as - * {@code final} to make the substitution function correctly. + * Returns true if internal resources are enabled. Internal resources can be disabled in the + * native image using {-H:-CopyLanguageResources} option. */ - private static boolean useInternalResources = true; + public static boolean usesInternalResources() { + return useInternalResources; + } /** * Collects optional internal resources for native-image build. This method is called @@ -355,13 +277,6 @@ static void resetNativeImageState() { nativeImageCache.clear(); } - private void resetFileSystemNativeImageState() { - FileSystem fs = resourceFileSystem; - if (fs != null) { - ((ResettableCachedRoot) FileSystems.getInternalResourceFileSystemRoot(fs)).resourceCacheRoot = null; - } - } - /** * Unpacks internal resources after native-image write. This method is called reflectively by * the {@code TruffleBaseFeature#afterAnalysis}. @@ -399,34 +314,31 @@ static boolean copyResourcesForNativeImage(Path target, String... components) th instruments = requiredInstruments; } for (LanguageCache language : languages) { - for (String resourceId : language.getResourceIds()) { - InternalResourceCache cache = language.getResourceCache(resourceId); + for (InternalResourceCache cache : language.getResources()) { result |= cache.copyResourcesForNativeImage(target); } } for (InstrumentCache instrument : instruments) { - for (String resourceId : instrument.getResourceIds()) { - InternalResourceCache cache = instrument.getResourceCache(resourceId); + for (InternalResourceCache cache : instrument.getResources()) { result |= cache.copyResourcesForNativeImage(target); } } // Always install engine resources - for (String resourceId : getEngineResourceIds()) { - InternalResourceCache cache = getEngineResource(resourceId); + for (InternalResourceCache cache : getEngineResources()) { result |= cache.copyResourcesForNativeImage(target); } return result; } private boolean copyResourcesForNativeImage(Path target) throws IOException { - Path resourceRoot = findStandaloneResourceRoot(target); - unlink(resourceRoot); - Files.createDirectories(resourceRoot); + Path root = findStandaloneResourceRoot(target); + unlink(root); + Files.createDirectories(root); InternalResource resource = resourceFactory.get(); InternalResource.Env env = EngineAccessor.LANGUAGE.createInternalResourceEnv(resource, () -> false); - resource.unpackFiles(env, resourceRoot); - if (isEmpty(resourceRoot)) { - Files.deleteIfExists(resourceRoot); + resource.unpackFiles(env, root); + if (isEmpty(root)) { + Files.deleteIfExists(root); return false; } else { return true; @@ -438,6 +350,15 @@ static Collection getEngineResourceIds() { return engineResources != null ? engineResources.keySet() : List.of(); } + static Collection getEngineResources() { + Map> engineResources = loadOptionalInternalResources(EngineAccessor.locatorOrDefaultLoaders()).get(PolyglotEngineImpl.ENGINE_ID); + if (engineResources != null) { + return engineResources.values().stream().map(Supplier::get).collect(Collectors.toList()); + } else { + return List.of(); + } + } + static InternalResourceCache getEngineResource(String resourceId) { Map> engineResources = loadOptionalInternalResources(EngineAccessor.locatorOrDefaultLoaders()).get(PolyglotEngineImpl.ENGINE_ID); Supplier resourceSupplier = engineResources != null ? engineResources.get(resourceId) : null; @@ -531,69 +452,13 @@ private static void unlink(Path path) throws IOException { Files.deleteIfExists(path); } - /** - * Sets the {@link #cacheRoot} in unit tests. This method is called reflectively by the - * {@code InternalResourceTest}. - */ - @SuppressWarnings("unused") - private static void setTestCacheRoot(Path root, boolean disposeResourceFileSystem) { - cacheRoot = root == null ? null : Pair.create(root, false); - for (LanguageCache language : LanguageCache.languages().values()) { - for (String resourceId : language.getResourceIds()) { - InternalResourceCache cache = language.getResourceCache(resourceId); - if (disposeResourceFileSystem) { - cache.resourceFileSystem = null; - } else { - cache.resetFileSystemNativeImageState(); - } - } - } - for (InstrumentCache instrument : InstrumentCache.load()) { - for (String resourceId : instrument.getResourceIds()) { - InternalResourceCache cache = instrument.getResourceCache(resourceId); - if (disposeResourceFileSystem) { - cache.resourceFileSystem = null; - } else { - cache.resetFileSystemNativeImageState(); - } - } - } - } - - private final class ResettableCachedRoot implements Supplier { - - private volatile Path resourceCacheRoot; - - ResettableCachedRoot(Path resourceCacheRoot) { - Objects.requireNonNull(resourceCacheRoot, "ResourceCacheRoot must be non-null."); - this.resourceCacheRoot = resourceCacheRoot; - } - - @Override - public Path get() { - Path res = resourceCacheRoot; - if (res == null) { - if (ImageInfo.inImageBuildtimeCode()) { - throw CompilerDirectives.shouldNotReachHere("Reintroducing internal resource cache path into an image heap."); - } - try { - res = findOverriddenResourceRoot(); - if (res == null) { - Path cache; - if (hasExplicitCacheRoot()) { - cache = getExplicitCacheRoot(); - } else { - cache = findCacheRootOnNativeImage(); - } - res = findStandaloneResourceRoot(cache); - } - resourceCacheRoot = res; - } catch (IOException ioe) { - throw new IOError(ioe); - } - } - return res; - } + @Override + public String toString() { + return "InternalResourceCache[" + + "componentId='" + id + '\'' + + ", resourceId='" + resourceId + '\'' + + ", resourceRoot=" + path + + '}'; } private static final class OptionalResourceSupplier implements Supplier { diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java new file mode 100644 index 000000000000..46d91ea31340 --- /dev/null +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/InternalResourceRoots.java @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.truffle.polyglot; + +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.InternalResource; +import org.graalvm.collections.Pair; +import org.graalvm.nativeimage.ImageInfo; +import org.graalvm.nativeimage.ProcessProperties; + +import java.io.PrintStream; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +final class InternalResourceRoots { + + private static final String OVERRIDDEN_CACHE_ROOT = "polyglot.engine.resourcePath"; + private static final String OVERRIDDEN_COMPONENT_ROOT = "polyglot.engine.resourcePath."; + private static final String OVERRIDDEN_RESOURCE_ROOT = "polyglot.engine.resourcePath."; + + /** + * This field is reset to {@code null} by the {@code TruffleBaseFeature} before writing the + * native image heap. + */ + private static volatile Set roots; + + private InternalResourceRoots() { + } + + /** + * Initializes the internal resource roots. This method is called from entry-points in the + * polyglot during engine construction to ensure that internal resource roots are initialized + * before the engine is used. + */ + static synchronized void ensureInitialized() { + if (roots == null) { + if (InternalResourceCache.usesInternalResources()) { + roots = computeRoots(findDefaultRoot()); + } else { + roots = Set.of(); + } + } + } + + static Root findRoot(Path hostPath) { + for (Root root : roots) { + if (hostPath.startsWith(root.path)) { + return root; + } + } + return null; + } + + static InternalResourceCache findInternalResource(Path hostPath) { + Root root = findRoot(hostPath); + if (root != null) { + for (InternalResourceCache cache : root.caches) { + Path resourceRoot = cache.getPathOrNull(); + // Used InternalResourceCache instances always have non-null root. + if (resourceRoot != null && hostPath.startsWith(resourceRoot)) { + return cache; + } + } + } + return null; + } + + /** + * The unpacking of the Truffle attach library is called reflectively in a boot time when + * accessors, LanguageCache and InstrumentCache cannot be used. We are creating a temporary + * {@link InternalResourceCache} just to unpack the library. + * + */ + static void initializeRuntimeResource(InternalResourceCache truffleRuntimeResource) { + Pair defaultRoot = findDefaultRoot(); + Map, List> collector = new HashMap<>(); + collectRoots(PolyglotEngineImpl.ENGINE_ID, defaultRoot.getLeft(), defaultRoot.getRight(), + List.of(truffleRuntimeResource), collector); + var entry = collector.entrySet().iterator().next(); + var key = entry.getKey(); + truffleRuntimeResource.initializeOwningRoot(new Root(key.getLeft(), key.getRight(), entry.getValue())); + } + + /** + * Sets the {@code #roots} in unit tests. This method is called reflectively by the + * {@code InternalResourceTest}. + * + * @param newRoot the new enforced cache root used by unit tests. + * @param nativeImageRuntime simulates the native image runtime behavior on hotspot. Needed by + * the {@code ContextPreInitializationTest}. + * + */ + @SuppressWarnings("unused") + private static synchronized void setTestCacheRoot(Path newRoot, boolean nativeImageRuntime) { + if (roots != null) { + for (Root root : roots) { + for (InternalResourceCache cache : root.caches()) { + cache.clearCache(); + } + } + } + if (newRoot != null) { + roots = computeRoots(Pair.create(newRoot, nativeImageRuntime ? Root.Kind.UNVERSIONED : Root.Kind.VERSIONED)); + } else if (nativeImageRuntime) { + var defaultRoots = findDefaultRoot(); + roots = computeRoots(Pair.create(defaultRoots.getLeft(), Root.Kind.UNVERSIONED)); + } else { + roots = null; + } + } + + /** + * Computes the internal resource roots. + */ + private static Set computeRoots(Pair defaultRoot) { + Map, List> collector = new HashMap<>(); + for (LanguageCache language : LanguageCache.languages().values()) { + Collection resources = language.getResources(); + if (!resources.isEmpty()) { + collectRoots(language.getId(), defaultRoot.getLeft(), defaultRoot.getRight(), resources, collector); + } + } + for (InstrumentCache instrument : InstrumentCache.load()) { + Collection resources = instrument.getResources(); + if (!resources.isEmpty()) { + collectRoots(instrument.getId(), defaultRoot.getLeft(), defaultRoot.getRight(), resources, collector); + } + } + Collection engineResources = InternalResourceCache.getEngineResources(); + if (!engineResources.isEmpty()) { + collectRoots(PolyglotEngineImpl.ENGINE_ID, defaultRoot.getLeft(), defaultRoot.getRight(), engineResources, collector); + } + // Build a set of immutable Roots. + Set result = new HashSet<>(); + for (var entry : collector.entrySet()) { + var key = entry.getKey(); + var resources = entry.getValue(); + Root internalResourceRoot = new Root(key.getLeft(), key.getRight(), resources); + for (InternalResourceCache resource : resources) { + resource.initializeOwningRoot(internalResourceRoot); + } + result.add(internalResourceRoot); + } + return Collections.unmodifiableSet(result); + } + + private static Pair findDefaultRoot() { + Path root; + Root.Kind kind; + String overriddenRoot = System.getProperty(OVERRIDDEN_CACHE_ROOT); + if (overriddenRoot != null) { + root = Path.of(overriddenRoot); + kind = Root.Kind.UNVERSIONED; + } else if (ImageInfo.inImageRuntimeCode()) { + root = findCacheRootOnNativeImage(); + kind = Root.Kind.UNVERSIONED; + } else { + root = findCacheRootOnHotSpot(); + kind = Root.Kind.VERSIONED; + } + return Pair.create(root, kind); + } + + private static void collectRoots(String componentId, Path componentRoot, Root.Kind componentKind, Collection resources, + Map, List> collector) { + Path useRoot = componentRoot; + Root.Kind useKind = componentKind; + StringBuilder builder = new StringBuilder(OVERRIDDEN_COMPONENT_ROOT); + builder.append(componentId); + String overriddenRoot = System.getProperty(builder.toString()); + if (overriddenRoot != null) { + useRoot = Path.of(overriddenRoot); + useKind = Root.Kind.COMPONENT; + } + for (InternalResourceCache resource : resources) { + Path resourceRoot = useRoot; + Root.Kind resourceKind = useKind; + builder = new StringBuilder(OVERRIDDEN_RESOURCE_ROOT); + builder.append(componentId); + builder.append('.'); + builder.append(resource.getResourceId()); + overriddenRoot = System.getProperty(builder.toString()); + if (overriddenRoot != null) { + resourceRoot = Path.of(overriddenRoot); + resourceKind = Root.Kind.RESOURCE; + } + collector.computeIfAbsent(Pair.create(resourceRoot, resourceKind), (k) -> new ArrayList<>()).add(resource); + } + } + + private static Path findCacheRootOnNativeImage() { + assert ImageInfo.inImageRuntimeCode() : "Can be called only in the native-image execution time."; + Path executable = getExecutablePath(); + return executable.resolveSibling("resources"); + } + + private static Path getExecutablePath() { + assert ImageInfo.inImageRuntimeCode(); + if (ImageInfo.isExecutable()) { + return Path.of(ProcessProperties.getExecutableName()); + } else if (ImageInfo.isSharedLibrary()) { + return Path.of(ProcessProperties.getObjectFile(InternalResourceCacheSymbol.SYMBOL)); + } else { + throw CompilerDirectives.shouldNotReachHere("Should only be invoked within native image runtime code."); + } + } + + private static Path findCacheRootOnHotSpot() { + String userHomeValue = System.getProperty("user.home"); + if (userHomeValue == null) { + throw CompilerDirectives.shouldNotReachHere("The 'user.home' system property is not set."); + } + Path userHome = Paths.get(userHomeValue); + Path container = switch (InternalResource.OS.getCurrent()) { + case DARWIN -> userHome.resolve(Path.of("Library", "Caches")); + case LINUX -> { + Path userCacheDir = null; + String xdgCacheValue = System.getenv("XDG_CACHE_HOME"); + if (xdgCacheValue != null) { + try { + Path xdgCacheDir = Path.of(xdgCacheValue); + // Do not fail when XDG_CACHE_HOME value is invalid. Fall back to + // $HOME/.cache. + if (xdgCacheDir.isAbsolute()) { + userCacheDir = xdgCacheDir; + } else { + emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not an absolute path. Using the default cache folder '%s'.", userHome.resolve(".cache")); + } + } catch (InvalidPathException notPath) { + emitWarning("The value of the environment variable 'XDG_CACHE_HOME' is not a valid path. Using the default cache folder '%s'.", userHome.resolve(".cache")); + } + } + if (userCacheDir == null) { + userCacheDir = userHome.resolve(".cache"); + } + yield userCacheDir; + } + case WINDOWS -> userHome.resolve(Path.of("AppData", "Local")); + }; + return container.resolve("org.graalvm.polyglot"); + } + + private static void emitWarning(String message, Object... args) { + PrintStream out = System.err; + out.printf(message + "%n", args); + } + + record Root(Path path, Kind kind, List caches) { + + enum Kind { + COMPONENT, + RESOURCE, + UNVERSIONED, + VERSIONED, + } + } +} diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java index e613a16ac659..5c93cb469c37 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/LanguageCache.java @@ -675,6 +675,10 @@ Collection getResourceIds() { return internalResources.keySet(); } + Collection getResources() { + return internalResources.values(); + } + @Override public String toString() { return "LanguageCache [id=" + id + ", name=" + name + ", implementationName=" + implementationName + ", version=" + version + ", className=" + className + ", services=" + services + "]"; diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java index c3dd1ec34ac5..13d7b5f75ecd 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotEngineImpl.java @@ -1751,7 +1751,7 @@ public PolyglotContextImpl createContext(SandboxPolicy contextSandboxPolicy, Out } else if (customFileSystem != null) { fileSystemConfig = new FileSystemConfig(ioAccess, customFileSystem, customFileSystem); } else { - fileSystemConfig = new FileSystemConfig(ioAccess, FileSystems.newNoIOFileSystem(), FileSystems.newLanguageHomeFileSystem()); + fileSystemConfig = new FileSystemConfig(ioAccess, FileSystems.newNoIOFileSystem(), FileSystems.newResourcesFileSystem()); } if (currentWorkingDirectory != null) { Path publicFsCwd; diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java index 16c61b7e35bf..59586e5a6147 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotImpl.java @@ -272,6 +272,7 @@ public Object buildEngine(String[] permittedLanguages, SandboxPolicy sandboxPoli PolyglotEngineImpl impl = null; try { validateSandbox(sandboxPolicy); + InternalResourceRoots.ensureInitialized(); if (TruffleOptions.AOT) { EngineAccessor.ACCESSOR.initializeNativeImageTruffleLocator(); } @@ -435,6 +436,7 @@ public void preInitializeEngine() { * Used for preinitialized contexts and fallback engine. */ PolyglotEngineImpl createDefaultEngine(TruffleLanguage hostLanguage) { + InternalResourceRoots.ensureInitialized(); Map options = getAPIAccess().readOptionsFromSystemProperties(); LogConfig logConfig = new LogConfig(); SandboxPolicy sandboxPolicy = SandboxPolicy.TRUSTED; @@ -545,8 +547,8 @@ public FileSystem newDefaultFileSystem(String hostTmpDir) { } @Override - public FileSystem allowLanguageHomeAccess(FileSystem fileSystem) { - return FileSystems.allowLanguageHomeAccess(fileSystem); + public FileSystem allowInternalResourceAccess(FileSystem fileSystem) { + return FileSystems.allowInternalResourceAccess(fileSystem); } @Override diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java index 742a1a2039db..ea9f38c68da8 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotInstrument.java @@ -43,8 +43,6 @@ import static com.oracle.truffle.polyglot.EngineAccessor.INSTRUMENT; import static com.oracle.truffle.polyglot.EngineAccessor.LANGUAGE; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import java.util.function.Supplier; import org.graalvm.options.OptionDescriptor; @@ -53,7 +51,6 @@ import org.graalvm.polyglot.impl.AbstractPolyglotImpl.APIAccess; import com.oracle.truffle.api.InstrumentInfo; -import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.instrumentation.TruffleInstrument; import com.oracle.truffle.polyglot.PolyglotLocals.LocalLocation; @@ -65,7 +62,6 @@ class PolyglotInstrument implements com.oracle.truffle.polyglot.PolyglotImpl.VMO final PolyglotEngineImpl engine; private final Object instrumentLock = new Object(); - final Map internalResources = new ConcurrentHashMap<>(); private volatile OptionDescriptors engineOptions; private volatile OptionDescriptors contextOptions; private volatile OptionDescriptors allOptions; diff --git a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java index 886c0a306011..ee9c4a5e55de 100644 --- a/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java +++ b/truffle/src/com.oracle.truffle.polyglot/src/com/oracle/truffle/polyglot/PolyglotLanguage.java @@ -44,9 +44,7 @@ import static com.oracle.truffle.polyglot.EngineAccessor.LANGUAGE; import static com.oracle.truffle.polyglot.EngineAccessor.NODES; -import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import org.graalvm.home.Version; import org.graalvm.options.OptionDescriptors; @@ -56,7 +54,6 @@ import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; -import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.nodes.LanguageInfo; import com.oracle.truffle.polyglot.PolyglotLocals.LocalLocation; @@ -69,7 +66,6 @@ final class PolyglotLanguage implements com.oracle.truffle.polyglot.PolyglotImpl Object api; // effectively final final int engineIndex; final RuntimeException initError; - final Map internalResources = new ConcurrentHashMap<>(); private volatile OptionDescriptors options; private volatile OptionValuesImpl optionValues; diff --git a/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java b/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java index ff7132f32eac..3fffe943679f 100644 --- a/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java +++ b/truffle/src/com.oracle.truffle.runtime/src/com/oracle/truffle/runtime/hotspot/HotSpotTruffleRuntime.java @@ -596,7 +596,7 @@ protected int getBaseInstanceSize(Class type) { HotSpotMetaAccessProvider meta = (HotSpotMetaAccessProvider) getMetaAccess(); HotSpotResolvedObjectType resolvedType = (HotSpotResolvedObjectType) meta.lookupJavaType(type); - return resolvedType.instanceSize(); + return Math.abs(resolvedType.instanceSize()); } private static boolean fieldIsNotEligible(Class clazz, ResolvedJavaField f) {