From d856404f08044fc93d8e0fd177278b262000a4da Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 1 Dec 2024 14:18:39 +0100 Subject: [PATCH 01/29] Improved line tracking in recovery tests --- .../tests/concrete/recovery/RecoveryTestSupport.rsc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 9ee14b38de6..e933bf70c8a 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -80,6 +80,9 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser errorCount = size(errors); disambDuration = realTime() - parseEndTime; result = "recovery"; + if ("" != input) { + throw "Yield of recovered tree does not match the original input"; + } if (errors == []) { measurement = successfulDisambiguation(source=source, duration=duration); } else { @@ -237,11 +240,15 @@ FileStats testSingleCharDeletions(&T (value input, loc origin) standardParser, & FileStats testDeleteUntilEol(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, loc source, str input, int referenceParseTime, int recoverySuccessLimit, int begin=0, int end=-1, loc statFile=|unknown:///|) { FileStats stats = fileStats(); - int lineStart = begin; + int lineStart = 0; list[int] lineEndings = findAll(input, "\n"); - int line = 1; + int line = 0; for (int lineEnd <- lineEndings) { + line = line+1; + if (lineEnd < begin) { + continue; + } lineLength = lineEnd - lineStart; for (int pos <- [lineStart..lineEnd]) { // Check boundaries (only used for quick bug testing) @@ -258,7 +265,6 @@ FileStats testDeleteUntilEol(&T (value input, loc origin) standardParser, &T (va } lineStart = lineEnd+1; println(); - line = line+1; } return stats; From 27c7dba221847f0d5f20b0728a2bb85ed7c5d60b Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 1 Dec 2024 14:19:51 +0100 Subject: [PATCH 02/29] Simplified to string so result is actually readable --- .../parser/gtd/result/AbstractContainerNode.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/org/rascalmpl/parser/gtd/result/AbstractContainerNode.java b/src/org/rascalmpl/parser/gtd/result/AbstractContainerNode.java index f57f543ec94..55c4bb45a51 100644 --- a/src/org/rascalmpl/parser/gtd/result/AbstractContainerNode.java +++ b/src/org/rascalmpl/parser/gtd/result/AbstractContainerNode.java @@ -166,17 +166,13 @@ public String toString() { builder.append(",layout"); } if (firstAlternative != null) { - builder.append(",alternatives=["); - builder.append(firstAlternative); - builder.append(":"); + builder.append(","); builder.append(DebugUtil.prodToString((IConstructor) firstProduction)); - if (alternatives != null) { - for (int i=0; i Date: Sun, 1 Dec 2024 14:20:36 +0100 Subject: [PATCH 03/29] Recover all productions that match the prefix --- .../uptr/recovery/ToTokenRecoverer.java | 92 ++++++++----------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java index 307c71838f8..07060de9bf6 100644 --- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java +++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java @@ -13,8 +13,10 @@ package org.rascalmpl.parser.uptr.recovery; import java.net.URI; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Triple; @@ -82,8 +84,6 @@ private DoubleArrayList, AbstractNode> reviveNod DoubleArrayList, ArrayList> recoveryNodes) { DoubleArrayList, AbstractNode> recoveredNodes = new DoubleArrayList<>(); - Set> skippedIds = new HashSet<>(); - // Sort nodes by start location recoveryNodes.sort((e1, e2) -> Integer.compare(e2.getLeft().getStartLocation(), e1.getLeft().getStartLocation())); @@ -92,6 +92,10 @@ private DoubleArrayList, AbstractNode> reviveNod visualizer.visualizeRecoveryNodes(recoveryNodes); } + // Keep track of previously added nodes so we can reuse them when we encounter + // a node with the same constructor, start location, and skip length + Map, SkippingStackNode> addedNodes = new HashMap<>(); + for (int i = 0; i recoveryNode = recoveryNodes.getFirst(i); ArrayList prods = recoveryNodes.getSecond(i); @@ -103,28 +107,31 @@ private DoubleArrayList, AbstractNode> reviveNod for (int j = prods.size() - 1; j >= 0; --j) { IConstructor prod = prods.get(j); - IConstructor type = ProductionAdapter.getType(prod); + IConstructor sort = ProductionAdapter.getType(prod); - List> skippingNodes = - findSkippingNodes(input, location, recoveryNode, prod, startLocation); + List> skippingNodes = findSkippingNodes(input, location, recoveryNode, prod, startLocation); for (SkippingStackNode skippingNode : skippingNodes) { int skipLength = skippingNode.getLength(); - - if (!skippedIds.add(Triple.ofNonNull(startLocation, type, skipLength))) { - // Do not add this skipped node if a node with the same startLocation, type, and skipLength has been added already - continue; + Triple key = Triple.of(sort, startLocation, skipLength); + + // See if we already have added a node with this constructor, start location, and skip length + SkippingStackNode previouslyAddedNode = addedNodes.get(key); + if (previouslyAddedNode == null) { + // No, add the current node + addedNodes.put(key, skippingNode); + skippingNode.initEdges(); + recoveredNodes.add(skippingNode, skippingNode.getResult()); + } else { + // We already added a similar node, use that instead + skippingNode = previouslyAddedNode; } AbstractStackNode continuer = new RecoveryPointStackNode<>(stackNodeIdDispenser.dispenseId(), prod, recoveryNode); - EdgesSet edges = new EdgesSet<>(1); edges.add(continuer); - continuer.setIncomingEdges(edges); - skippingNode.initEdges(); skippingNode.addEdges(edges, startLocation); - recoveredNodes.add(skippingNode, skippingNode.getResult()); } } } @@ -132,6 +139,18 @@ private DoubleArrayList, AbstractNode> reviveNod return recoveredNodes; } + // Add a new SkippingStackNode, but only if another node with the same result does not already exist + private void addSkippingStackNode(List> nodes, IConstructor prod, int startLocation, SkippedNode result) { + for (SkippingStackNode node : nodes) { + if (((SkippedNode)node.getResult()).getLength() == result.getLength()) { + // Already added + return; + } + } + + nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + } + private List> findSkippingNodes(int[] input, int location, AbstractStackNode recoveryNode, IConstructor prod, int startLocation) { List> nodes = new java.util.ArrayList<>(); @@ -141,15 +160,8 @@ private List> findSkippingNodes(int[] input, int // If we are at the end of the input, skip nothing if (location >= input.length) { result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); - nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + addSkippingStackNode(nodes, prod, startLocation, result); return nodes; // No other nodes would be useful - } - - // If we are the top-level node, just skip the rest of the input - if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) { - result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); - nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); - return nodes; // No other nodes would be useful } // Find the last token of this production and skip until after that @@ -159,7 +171,7 @@ private List> findSkippingNodes(int[] input, int MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2); if (endMatch != null) { result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, endMatch.getEnd()); - nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + addSkippingStackNode(nodes, prod, startLocation, result); } } @@ -170,7 +182,7 @@ private List> findSkippingNodes(int[] input, int MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1, Integer.MAX_VALUE/2); if (nextMatch != null) { result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, nextMatch.getStart()); - nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + addSkippingStackNode(nodes, prod, startLocation, result); } } @@ -322,38 +334,6 @@ private boolean isNullable(AbstractStackNode stackNode) { return false; } - // Check if a node is a top-level production (i.e., its parent production node has no parents and - // starts at position -1) - // As this is experimental code, this method is extremely conservative. - // Any sharing will result in returning 'false'. - // We will need to change this strategy in the future to improve error recovery. - private boolean isTopLevelProduction(AbstractStackNode node) { - - while (node != null && node.getDot() != 0) { - node = getSinglePredecessor(node); - } - - if (node != null) { - node = getSinglePredecessor(node); - return node != null && node.getStartLocation() == -1; - } - - return false; - } - - private AbstractStackNode getSinglePredecessor(AbstractStackNode node) { - IntegerObjectList> edgeMap = node.getEdges(); - if (edgeMap.size() == 1) { - EdgesSet edges = edgeMap.getValue(0); - if (edges.size() == 1) { - return edges.get(0); - } - } - - return null; - } - - private DoubleArrayList, AbstractNode> reviveFailedNodes( int[] input, int location, @@ -367,7 +347,7 @@ private DoubleArrayList, AbstractNode> reviveFai long id = (long) failedNode.getId() << 32 | failedNode.getStartLocation(); if (!processedNodes.add(id)) { continue; - } + } findRecoveryNodes(failedNodes.get(i), recoveryNodes); } From 415d53676ac136feda6e3e1d67b72f66f8f3d8fe Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 1 Dec 2024 14:22:05 +0100 Subject: [PATCH 04/29] Added python script to summarize test stats --- .../rascal/tests/concrete/recovery/recovery_stats.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery_stats.py diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery_stats.py b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery_stats.py new file mode 100644 index 00000000000..b05986af268 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery_stats.py @@ -0,0 +1,10 @@ +# Simple script to extract some statistics from ErrorRecoveryBenchmark statistics +import pandas as pd +data = pd.read_csv("D:/stats/benchmark-stats-2024-11-26-0-5120.txt") +print(data.describe()) + +print("Duration stats:"); +print(data[data["result"] == "recovery"]["duration"].describe()) + +print("Disambiguation duration:") +print(data[data["result"] == "recovery"]["disambiguationDuration"].describe()) From 9d09eabb51758b7ed387b61ab5635ea0aa535d55 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 1 Dec 2024 14:22:32 +0100 Subject: [PATCH 05/29] Added tests for prefix-shared production recovery and related performance issues --- .../concrete/recovery/PrefixSharingTest.rsc | 30 +++++ .../recovery/bugs/ATermOutOfMemoryBug.rsc | 103 ++++++++++++++++++ .../recovery/bugs/PreludeOutOfMemoryBug.rsc | 14 +++ .../recovery/bugs/SlowExceptionBug.rsc | 15 +++ 4 files changed, 162 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingTest.rsc create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/PreludeOutOfMemoryBug.rsc create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingTest.rsc new file mode 100644 index 00000000000..b217311024e --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PrefixSharingTest.rsc @@ -0,0 +1,30 @@ +module lang::rascal::tests::concrete::recovery::PrefixSharingTest + +syntax Stat = Expr ";"; + +syntax Expr = N "+" N | N "-" N; + +syntax N = [0-9]; + +import ParseTree; +import util::ErrorRecovery; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import vis::Text; +import IO; + +Tree parseStat(str input, bool visualize=false) + = parser(#Stat, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); + +test bool exprOk() = checkRecovery(#Stat, "1+2+3;", []); + +test bool exprUnknownTerminator() = checkRecovery(#Stat, "1+2:", [":"], visualize=false); + +test bool exprUnknownOperator() = checkRecovery(#Stat, "1*2;", ["*2"], visualize=false); + +test bool exprPrefixSharing() { + Tree t = parseStat("1*2;", visualize=false); + println(prettyTree(t)); + return true; +} + + diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc new file mode 100644 index 00000000000..1f2186ab1e7 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc @@ -0,0 +1,103 @@ +module lang::rascal::tests::concrete::recovery::bugs::ATermOutOfMemoryBug + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; +import String; +import vis::Text; +import util::ErrorRecovery; +import Set; + +@memo +void treeDiff(appl(prod1, args1), appl(prod2, args2)) { + if (prod1 != prod2) { + println(""); + println(""); + throw "prods do not match"; + } + + if (size(args1) != size(args2)) { + throw "argument size mismatch"; + } + + for (int i <- [0..size(args1)]) { + arg1 = args1[i]; + arg2 = args2[i]; + treeDiff(arg1, arg2); + } +} + +@memo +void treeDiff(cycle(sym1, length1), cycle(sym2, length2)) { + if (sym1 != sym2) { + throw "cycle symbols do not match"; + } + + if (length1 != length2) { + throw "cycle lengths do not match"; + } +} + +@memo +void treeDiff(char(c1), char(c2)) { + if (c1 != c2) { + throw "character mismatch"; + } +} + +@memo +void treeDiff(amb(alts1), amb(alts2)) { + if (size(alts1) != size(alts2)) { + throw "alts size mismatch"; + } + + while (!isEmpty(alts1)) { + = takeFirstFrom(alts1); + = takeFirstFrom(alts2); + + treeDiff(alt1, alt2); + } +} + +Tree testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///lang/aterm/syntax/ATerm.rsc|; + loc sourceCache = |std:///lang/aterm/syntax/ATerm.rsc?enable-cache=true|; + input = readFile(source); + //modifiedInput = substring(input, 0, 369) + substring(input, 399); + modifiedInput = substring(input, 0, 369) + substring(input, 399); + println("without caching"); + Tree t1 = recoveryParser(modifiedInput, source); + println("with caching"); + Tree t2 = recoveryParser(modifiedInput, source); + if (t1 == t2) { + println("equal"); + } else { + println("not equal"); + if ("" != "") { + println("yields are not equal"); + } else { + println("yields are equal"); + } + + if (false) { + prettyT1 = prettyTree(t1); + prettyT2 = prettyTree(t2); + + if (prettyT1 != prettyT2) { + println("pretty trees are not equal"); + } else { + println("pretty trees are equal"); + } + writeFile(|cwd:///no-cache.txt|, ""); + writeFile(|cwd:///with-cache.txt|, ""); + } + } + + //treeDiff(t1, t2); + //println("trees are equal"); + return t1; + //testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 150, 369, 369); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/PreludeOutOfMemoryBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/PreludeOutOfMemoryBug.rsc new file mode 100644 index 00000000000..605a43a82bf --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/PreludeOutOfMemoryBug.rsc @@ -0,0 +1,14 @@ +module lang::rascal::tests::concrete::recovery::bugs::PreludeOutOfMemoryBug + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///Prelude.rsc|; + input = readFile(source); + testSingleCharDeletions(standardParser, recoveryParser, source, input, 200, 150, begin=1312, end=1313); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc new file mode 100644 index 00000000000..913becda80c --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc @@ -0,0 +1,15 @@ +module lang::rascal::tests::concrete::recovery::bugs::SlowExceptionBug + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///Exception.rsc|; + input = readFile(source); + //testSingleCharDeletions(standardParser, recoveryParser, source, input, 200, 150, begin=1744, end=1744); + testSingleCharDeletions(standardParser, recoveryParser, source, input, 200, 150); +} From c1e93c735f1ee70a6f47c916b9c0c4950d49dcb1 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 10:53:50 +0100 Subject: [PATCH 06/29] Added visualization of cycle nodes --- src/org/rascalmpl/library/vis/Text.rsc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/org/rascalmpl/library/vis/Text.rsc b/src/org/rascalmpl/library/vis/Text.rsc index 5c8c6846925..5299034149c 100644 --- a/src/org/rascalmpl/library/vis/Text.rsc +++ b/src/org/rascalmpl/library/vis/Text.rsc @@ -50,6 +50,7 @@ str prettyTree(Tree t, bool src=false, bool characters=true, bool \layout=false, str nodeLabel(char(9)) = "\\t"; str nodeLabel(amb(_) ) = "❖"; str nodeLabel(loc src) = ""; + str nodeLabel(cycle(Symbol nt, int len)) = "cycle(, )"; default str nodeLabel(Tree v) = ""; lrel[str,value] edges(Tree t:appl(_, list[Tree] args)) = [<"src", t@\loc> | src, t@\loc?] + [<"", k> | Tree k <- args, include(k)]; From 1ab40fe4f1b45b4f9d265b9a64473ab3fd87f72b Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 10:54:33 +0100 Subject: [PATCH 07/29] Added parses tree equality checker and visualizer --- .../rascalmpl/library/util/ErrorRecovery.java | 235 ++++++++++++++++++ .../rascalmpl/library/util/ErrorRecovery.rsc | 10 + 2 files changed, 245 insertions(+) diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.java b/src/org/rascalmpl/library/util/ErrorRecovery.java index d117a59d717..958ab9939d1 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.java +++ b/src/org/rascalmpl/library/util/ErrorRecovery.java @@ -1,11 +1,20 @@ package org.rascalmpl.library.util; +import java.io.IOException; +import java.io.OutputStream; +import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import java.util.Map; +import java.util.Objects; import org.rascalmpl.interpreter.asserts.Ambiguous; +import org.rascalmpl.parser.util.DebugUtil; +import org.rascalmpl.parser.util.ParseStateVisualizer; +import org.rascalmpl.unicode.UnicodeOutputStreamWriter; +import org.rascalmpl.uri.URIResolverRegistry; +import org.rascalmpl.util.visualize.dot.DotGraph; import org.rascalmpl.values.IRascalValueFactory; import org.rascalmpl.values.RascalValueFactory; import org.rascalmpl.values.parsetrees.ITree; @@ -14,10 +23,12 @@ import io.usethesource.vallang.IBool; import io.usethesource.vallang.IConstructor; +import io.usethesource.vallang.IInteger; import io.usethesource.vallang.IList; import io.usethesource.vallang.IListWriter; import io.usethesource.vallang.ISet; import io.usethesource.vallang.ISetWriter; +import io.usethesource.vallang.ISourceLocation; import io.usethesource.vallang.IValue; import io.usethesource.vallang.type.Type; @@ -217,4 +228,228 @@ private void collectAmbErrors(ITree amb, IListWriter errors, Set p collectErrors((ITree) alt, errors, processedTrees); } } + + // We need to keep track of trees that are equal, and be able to find them again based on + // reference equality of both trees. + private static class TreePair { + private ITree tree1; + private ITree tree2; + private int hashCode; + + public TreePair(ITree tree1, ITree tree2) { + this.tree1 = tree1; + this.tree2 = tree2; + hashCode = Objects.hash(System.identityHashCode(tree1), System.identityHashCode(tree2)); + } + + public boolean equals(Object peer) { + TreePair pair = (TreePair) peer; + return tree1 == pair.tree1 && tree2 == pair.tree2; + } + + public int hashCode() { + return hashCode; + } + }; + + public IBool treeEquality(IConstructor tree1, IConstructor tree2) { + return rascalValues.bool(checkTreeEquality((ITree) tree1, (ITree) tree2, new HashSet<>(), true)); + } + + private boolean checkTreeEquality(ITree tree1, ITree tree2, Set equalTrees, boolean logInequality) { + if (tree1 == tree2) { + return true; + } + + Type type = tree1.getConstructorType(); + + if (!type.equals(tree2.getConstructorType())) { + if (logInequality) { + System.out.println("types not equal"); + } + return false; + } + + if (!checkLocationEquality(tree1, tree2)) { + if (logInequality) { + System.out.println("locations not equal"); + } + return false; + } + + if (type == RascalValueFactory.Tree_Char) { + if (checkCharEquality(tree1, tree2)) { + return true; + } + + if (logInequality) { + System.out.println("char nodes not equal"); + } + + return false; + } else if (type == RascalValueFactory.Tree_Cycle) { + if (checkCycleEquality(tree1, tree2)) { + return true; + } + + if (logInequality) { + System.out.println("char nodes not equal"); + } + + return false; + } + + TreePair pair = new TreePair(tree1, tree2); + if (equalTrees.contains(pair)) { + return true; + } + + boolean result; + if (type == RascalValueFactory.Tree_Appl) { + result = checkApplEquality(tree1, tree2, equalTrees, logInequality); + if (logInequality && !result) { + System.out.println("appls not equal"); + } + } else if (type == RascalValueFactory.Tree_Amb) { + result = checkAmbEquality(tree1, tree2, equalTrees, logInequality); + if (logInequality && !result) { + System.out.println("ambs not equal"); + } + } else { + throw new IllegalArgumentException("unknown tree type: " + type); + } + + if (result) { + equalTrees.add(pair); + } + + return result; + } + + private boolean checkApplEquality(ITree tree1, ITree tree2, Set equalTrees, boolean logInequality) { + IConstructor type1 = ProductionAdapter.getType(tree1); + IConstructor type2 = ProductionAdapter.getType(tree2); + + if (!type1.equals(type2)) { + if (logInequality) { + System.out.println("types do not match: " + DebugUtil.prodToString(type1) + " != " + DebugUtil.prodToString(type2)); + } + return false; + } + + IList args1 = TreeAdapter.getArgs(tree1); + IList args2 = TreeAdapter.getArgs(tree2); + int size = args1.size(); + if (size != args2.size()) { + if (logInequality) { + System.out.println("argument count mismatch"); + } + return false; + } + + for (int i = 0; i < size; i++) { + ITree arg1 = (ITree) args1.get(i); + ITree arg2 = (ITree) args2.get(i); + if (!checkTreeEquality(arg1, arg2, equalTrees, logInequality)) { + return false; + } + } + + return true; + } + + private boolean checkAmbEquality(ITree tree1, ITree tree2, Set equalTrees, boolean logInequality) { + ISet alts1 = tree1.getAlternatives(); + ISet alts2 = tree2.getAlternatives(); + + if (alts1.size() != alts2.size()) { + if (logInequality) { + System.out.println("amb nodes do not have an equal size"); + } + return false; + } + + BitSet alts2Checked = new BitSet(alts2.size()); + + boolean ok = true; + + IValue missingAlt = null; + + for (IValue alt1 : alts1) { + int index2 = 0; + // This relies on iteration order being stable, which is a pretty safe bet. + for (IValue alt2 : alts2) { + if (!alts2Checked.get(index2) && checkTreeEquality((ITree) alt1, (ITree) alt2, equalTrees, false)) { + alts2Checked.set(index2); + break; + } + + index2++; + } + + if (index2 == alts2.size()) { + // We did not find alt1 in alts2 + if (logInequality) { + System.out.println("amb nodes not equal"); + System.err.println(" rhs amb node not found: " + index2); + missingAlt = alt1; + } + ok = false; + } + } + + if (!ok && logInequality && missingAlt != null) { + int index2 = 0; + for (IValue alt2 : alts2) { + if (!alts2Checked.get(index2)) { + System.err.println(" lhs amb node not found: " + index2); + checkTreeEquality((ITree) missingAlt, (ITree) alt2, equalTrees, true); + System.err.println(); + } + index2++; + } + } + + return ok; + } + + private boolean checkCharEquality(ITree tree1, ITree tree2) { + return ((IInteger) tree1.get(0)).intValue() == ((IInteger) tree2.get(0)).intValue(); + } + + private boolean checkCycleEquality(ITree tree1, ITree tree2) { + return tree1.get(0).equals(tree2.get(0)) && ((IInteger) tree1.get(1)).intValue() == ((IInteger) tree2.get(1)).intValue(); + } + + private boolean checkLocationEquality(ITree tree1, ITree tree2) { + ISourceLocation loc1 = TreeAdapter.getLocation(tree1); + ISourceLocation loc2 = TreeAdapter.getLocation(tree2); + if (loc1 == null && loc2 == null) { + return true; + } + + if (loc1 == null || loc2 == null) { + return false; + } + + return loc1.getOffset() == loc2.getOffset() && loc1.getLength() == loc2.getLength(); + } + + public void parseTree2Dot(IConstructor tree, ISourceLocation dotFile) { + ParseStateVisualizer visualizer = new ParseStateVisualizer("ParseTree"); + DotGraph graph = visualizer.createGraph((ITree) tree); + + URIResolverRegistry reg = URIResolverRegistry.getInstance(); + + try { + ISourceLocation dotLocation = reg.logicalToPhysical(dotFile); + + OutputStream outStream = reg.getOutputStream(dotLocation, false); + try (UnicodeOutputStreamWriter out = new UnicodeOutputStreamWriter(outStream, "UTF-8", false)) { + out.append(graph.toString()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc index 08673830a91..4e99a545ed9 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.rsc +++ b/src/org/rascalmpl/library/util/ErrorRecovery.rsc @@ -51,3 +51,13 @@ This filter removes error trees until no ambiguities caused by error recovery ar Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown. } java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true); + +@javaClass{org.rascalmpl.library.util.ErrorRecovery} +@synopsis{Check if two parse trees are equal. For "normal" parse trees `==` will suffice, but for highly ambiguous (error) trees we need an +equality check that takes advantage of the sharing present in those trees. +Note that this equality check only takes into account the offset and length fields of source locations. All other fields are ignored.} +java bool treeEquality(Tree t1, Tree t2); + +@javaClass{org.rascalmpl.library.util.ErrorRecovery} +@synopsis{Generate a dot representation of a parse forest.} +java void parseTree2Dot(Tree tree, loc dotFile); From 99b841149d7a37bea1663484d04674916322d8ba Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 10:57:02 +0100 Subject: [PATCH 08/29] Implemented parse tree visualization --- .../parser/util/ParseStateVisualizer.java | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java index c99537b0f91..1ef0aeaa40d 100644 --- a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java +++ b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java @@ -25,6 +25,7 @@ import java.nio.file.StandardCopyOption; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -59,8 +60,16 @@ import org.rascalmpl.util.visualize.dot.DotNode; import org.rascalmpl.util.visualize.dot.DotRecord; import org.rascalmpl.util.visualize.dot.NodeId; +import org.rascalmpl.values.RascalValueFactory; +import org.rascalmpl.values.parsetrees.ITree; +import org.rascalmpl.values.parsetrees.ProductionAdapter; +import org.rascalmpl.values.parsetrees.TreeAdapter; import io.usethesource.vallang.IConstructor; +import io.usethesource.vallang.IInteger; +import io.usethesource.vallang.ISet; +import io.usethesource.vallang.IValue; +import io.usethesource.vallang.type.Type; /** * The parser uses quite complex datastructures. @@ -128,6 +137,7 @@ public void run() { private final Map stackNodeNodes; private DotGraph graph; private int frame; + private int nextParseNodeId; public ParseStateVisualizer(String name) { @@ -193,6 +203,10 @@ public void highlightStack(AbstractStackNode stack) { } } + public void visualizeParseTree(ITree parseTree) { + writeGraph(createGraph(parseTree)); + } + private synchronized DotGraph createGraph(AbstractStackNode stackNode) { reset(); graph = new DotGraph(name, true); @@ -245,6 +259,13 @@ private DotGraph createProductionGraph(AbstractStackNode[] stackNo return graph; } + public DotGraph createGraph(ITree parseTree) { + reset(); + graph = new DotGraph(name, false); + addParseTree(graph, parseTree, new HashMap<>()); + return graph; + } + private

NodeId addProductionNodes(DotGraph graph, AbstractStackNode

stackNode) { DotNode node = createDotNode(stackNode); graph.addNode(node); @@ -706,4 +727,74 @@ private void writeGraph(DotGraph graph) { throw new RuntimeException(e); } } + + private NodeId addParseTree(DotGraph graph, ITree tree, Map addedTrees) { + NodeId id = addedTrees.get(tree); + if (id == null) { + Type type = tree.getConstructorType(); + if (type == RascalValueFactory.Tree_Char) { + id = addChar(graph, tree); + } else if (type == RascalValueFactory.Tree_Cycle) { + id = addCycle(graph, tree); + } else if (type == RascalValueFactory.Tree_Appl) { + id = addAppl(graph, tree, addedTrees); + } else if (type == RascalValueFactory.Tree_Amb) { + id = addAmb(graph, tree, addedTrees); + } else { + throw new IllegalStateException("Unsupported type: " + type); + } + + addedTrees.put(tree, id); + } + + return id; + } + + private NodeId addChar(DotGraph graph, ITree charNode) { + NodeId id = new NodeId("char-" + nextParseNodeId++); + int value = ((IInteger) charNode.get(0)).intValue(); + graph.addNode(id, "Char: " + (char) value + " (" + value + ")"); + return id; + } + + private NodeId addCycle(DotGraph graph, ITree cycle) { + NodeId id = new NodeId("cycle-" + nextParseNodeId++); + + String sym = String.valueOf(cycle.get(0)); + int depth = ((IInteger) cycle.get(1)).intValue(); + + graph.addNode(id, "Cycle: " + sym + ", depth=" + depth); + + return id; + } + + private NodeId addAppl(DotGraph graph, ITree appl, Map addedTrees) { + NodeId id = new NodeId("appl-" + nextParseNodeId++); + + IConstructor type = ProductionAdapter.getType(appl); + + graph.addNode(id, DebugUtil.prodToString(type)); + + for (IValue arg : TreeAdapter.getArgs(appl)) { + NodeId argId = addParseTree(graph, (ITree) arg, addedTrees); + graph.addEdge(id, argId); + } + + return id; + } + + private NodeId addAmb(DotGraph graph, ITree amb, Map addedTrees) { + NodeId id = new NodeId("amb-" + nextParseNodeId++); + + ISet alts = TreeAdapter.getAlternatives(amb); + graph.addNode(id, "Amb, size=" + alts.size()); + + for (IValue alt : alts) { + NodeId argId = addParseTree(graph, (ITree) alt, addedTrees); + graph.addEdge(id, argId); + } + + return id; + } + } From 46eae759f9084e64ae65bed09526240af00a0684 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 11:06:17 +0100 Subject: [PATCH 09/29] Implementend memoization of skipped nodes --- .../gtd/result/out/SkippedNodeFlattener.java | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/src/org/rascalmpl/parser/gtd/result/out/SkippedNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/SkippedNodeFlattener.java index 98d4cf30782..e9b29bfe19a 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/SkippedNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/SkippedNodeFlattener.java @@ -11,6 +11,10 @@ *******************************************************************************/ package org.rascalmpl.parser.gtd.result.out; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + import org.rascalmpl.parser.gtd.location.PositionStore; import org.rascalmpl.parser.gtd.result.SkippedNode; @@ -18,21 +22,56 @@ * A converter for result nodes that contain skipped characters for error recovery */ public class SkippedNodeFlattener{ + private static class MemoKey { + private int offset; + private int length; + + public MemoKey(SkippedNode node) { + this.offset = node.getOffset(); + this.length = node.getLength(); + } + + @Override + public boolean equals(Object peer) { + MemoKey peerKey = (MemoKey) peer; + return offset == peerKey.offset && length == peerKey.length; + } + + @Override + public int hashCode() { + return Objects.hash(offset, length); + } + } + + private Map memoTable; + public SkippedNodeFlattener(){ super(); + + memoTable = new HashMap<>(); } public T convertToUPTR(INodeConstructorFactory nodeConstructorFactory, SkippedNode node, PositionStore positionStore){ - T result = nodeConstructorFactory.createSkippedNode(node.getSkippedChars()); + MemoKey key = new MemoKey(node); + + T result = memoTable.get(key); + + if (result != null) { + return result; + } + + result = nodeConstructorFactory.createSkippedNode(node.getSkippedChars()); // Add source location if (node.getInputUri() != null) { - int startOffset = node.getOffset(); - int endOffset = startOffset + node.getLength(); + int startOffset = node.getOffset(); + int endOffset = startOffset + node.getLength(); P sourceLocation = nodeConstructorFactory.createPositionInformation(node.getInputUri(), startOffset, endOffset, positionStore); - result = nodeConstructorFactory.addPositionInformation(result, sourceLocation); + result = nodeConstructorFactory.addPositionInformation(result, sourceLocation); } + memoTable.put(key, result); + return result; } } From b92410b65b8ce642a1e067962b6bc1b5f6bdc5a0 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 11:41:41 +0100 Subject: [PATCH 10/29] Removed stats summary in R --- .../tests/concrete/recovery/recovery-stats.R | 42 ------------------- 1 file changed, 42 deletions(-) delete mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R deleted file mode 100644 index 142e01608c1..00000000000 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/recovery-stats.R +++ /dev/null @@ -1,42 +0,0 @@ -# nolint start: line_length_linter. - -options("width" = 60) - -library("fs") - -input <- path_expand("~/stats/benchmark-stats-2024-11-16-0-5120.txt") -raw_data <- read.csv(input, header = TRUE) - -# Select interesting data subsets -recovery_data <- raw_data[raw_data$result == "recovery",] -error_data <- raw_data[raw_data$result == "error", ] -success_data <- raw_data[raw_data$result == "success", ] - -drop <- c("source", "result") - -recovery_fail_data <- recovery_data[recovery_data$errorSize >= recovery_data$size / 4, ] -recovery_ok_data <- recovery_data[recovery_data$errorSize < recovery_data$size / 4, ] - -# Drop uninteresting columns -recovery <- recovery_data[, !(names(recovery_data) %in% drop)] -error <- error_data[, !(names(error_data) %in% drop)] -success <- success_data[, !(names(success_data) %in% drop)] -recovery_fail <- recovery_fail_data[, !(names(recovery_fail_data) %in% drop)] -recovery_ok <- recovery_ok_data[, !(names(recovery_ok_data) %in% drop)] - -print("Total recovery stats") -summary(recovery) - -print("Successful recovery stats (error size < 25% of file size)") -summary(recovery_ok) - -print("Failed recovery stats (error size >= 25% of file size)") -summary(recovery_fail) - -print("Parse error stats") -summary(error) - -print("Successful parse stats") -summary(success) - -# nolint end From 0ff2209a9cea29eba9a5c61dd5f9bc48a5324613 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 11:47:19 +0100 Subject: [PATCH 11/29] Focused SlowExceptionBug test --- .../tests/concrete/recovery/bugs/SlowExceptionBug.rsc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc index 913becda80c..6928af38f6d 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowExceptionBug.rsc @@ -4,12 +4,18 @@ import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; import lang::rascal::\syntax::Rascal; import ParseTree; import IO; +import util::Benchmark; +import String; void testBug() { standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); loc source = |std:///Exception.rsc|; input = readFile(source); + int begin = realTime(); + str modifiedInput = substring(input, 0, 1744) + substring(input, 1745); + Tree t = recoveryParser(modifiedInput, source); //testSingleCharDeletions(standardParser, recoveryParser, source, input, 200, 150, begin=1744, end=1744); - testSingleCharDeletions(standardParser, recoveryParser, source, input, 200, 150); + int duration = realTime() - begin; + println("duration: ms."); } From aa93410ca87f1c748758a60ee1695b6d6efd946f Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 11:48:00 +0100 Subject: [PATCH 12/29] Implemented support to disable memoization of parse nodes --- src/org/rascalmpl/parser/gtd/SGTDBF.java | 32 +++++++++++++------ .../gtd/result/out/DefaultNodeFlattener.java | 2 ++ .../out/ListContainerNodeFlattener.java | 4 +-- .../out/SortContainerNodeFlattener.java | 17 ++++++---- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index 9d38723078f..4ef2e9175c4 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -24,6 +24,7 @@ import org.rascalmpl.parser.gtd.result.SortContainerNode; import org.rascalmpl.parser.gtd.result.action.IActionExecutor; import org.rascalmpl.parser.gtd.result.action.VoidActionExecutor; +import org.rascalmpl.parser.gtd.result.out.DefaultNodeFlattener; import org.rascalmpl.parser.gtd.result.out.FilteringTracker; import org.rascalmpl.parser.gtd.result.out.INodeConstructorFactory; import org.rascalmpl.parser.gtd.result.out.INodeFlattener; @@ -63,6 +64,7 @@ */ public abstract class SGTDBF implements IGTD { private final static int DEFAULT_TODOLIST_CAPACITY = 16; + private static final String PARAM_ENABLE_CACHE = "enable-cache"; private URI inputURI; private int[] input; @@ -1509,6 +1511,16 @@ private static int[] charsToInts(char[] input){ return result; } + private void checkMemoization(URI inputURI) { + DefaultNodeFlattener.regularMemoization = true; + if (inputURI != null) { + String query = inputURI.getQuery(); + if (query != null) { + DefaultNodeFlattener.regularMemoization = !query.contains("disable-memoization"); + } + } + } + /** * Parses with post parse filtering. */ @@ -1517,6 +1529,7 @@ private T parse(String nonterminal, URI inputURI, int[] input, IActionExecutor debugListener) { AbstractNode result = parse(new NonTerminalStackNode

(AbstractStackNode.START_SYMBOL_ID, 0, nonterminal), inputURI, input, recoverer, debugListener); + checkMemoization(inputURI); return buildResult(result, converter, nodeConstructorFactory, actionExecutor); } @@ -1552,6 +1565,7 @@ private T parse(String nonterminal, URI inputURI, int[] input, INodeFlattener debugListener) { AbstractNode result = parse(new NonTerminalStackNode

(AbstractStackNode.START_SYMBOL_ID, 0, nonterminal), inputURI, input, recoverer, debugListener); + checkMemoization(inputURI); return buildResult(result, converter, nodeConstructorFactory, new VoidActionExecutor()); } @@ -1581,7 +1595,7 @@ protected T parse(AbstractStackNode

startNode, URI inputURI, char[] input, IN INodeConstructorFactory nodeConstructorFactory) { AbstractNode result = parse(startNode, inputURI, charsToInts(input), null, null); - + checkMemoization(inputURI); return buildResult(result, converter, nodeConstructorFactory, new VoidActionExecutor()); } @@ -1597,15 +1611,15 @@ protected T buildResult(AbstractNode result, INodeFlattener converter, Object rootEnvironment = actionExecutor != null ? actionExecutor.createRootEnvironment() : null; T parseResult = null; try { - parseResult = converter.convert(nodeConstructorFactory, result, positionStore, filteringTracker, + parseResult = converter.convert(nodeConstructorFactory, result, positionStore, filteringTracker, actionExecutor, rootEnvironment); - } - finally { - actionExecutor.completed(rootEnvironment, (parseResult == null)); + } + finally { + actionExecutor.completed(rootEnvironment, (parseResult == null)); } if(parseResult != null) { - if (recoverer != null && parseErrorRecovered) { - parseResult = introduceErrorNodes(parseResult, nodeConstructorFactory); + if (recoverer != null && parseErrorRecovered) { + parseResult = introduceErrorNodes(parseResult, nodeConstructorFactory); } return parseResult; // Success. } @@ -1617,8 +1631,8 @@ protected T buildResult(AbstractNode result, INodeFlattener converter, int beginColumn = positionStore.getColumn(offset, beginLine); int endLine = positionStore.findLine(endOffset); int endColumn = positionStore.getColumn(endOffset, endLine); - throw new ParseError("All results were filtered", inputURI, offset, length, beginLine + 1, endLine + 1, - beginColumn, endColumn); + throw new ParseError("All results were filtered", inputURI, offset, length, + beginLine + 1, endLine + 1, beginColumn, endColumn); } finally { checkTime("Unbinarizing, post-parse filtering, and mapping to UPTR"); diff --git a/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java index 12ca785fb41..0ff7b09ff32 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java @@ -26,6 +26,8 @@ * Converter for parse trees that produces trees in UPTR format. */ public class DefaultNodeFlattener implements INodeFlattener{ + public static boolean regularMemoization = true; + private final CharNodeFlattener charNodeConverter; private final LiteralNodeFlattener literalNodeConverter; private final SortContainerNodeFlattener sortContainerNodeConverter; diff --git a/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java index 59b875d8532..ea82e08b15d 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java @@ -461,7 +461,7 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory levelCache = preCache.get(offset); if(levelCache != null){ @@ -537,7 +537,7 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory levelCache = preCache.get(offset); if(levelCache != null){ diff --git a/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java index d2dcc72e78f..b878e34c256 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java @@ -32,10 +32,10 @@ public class SortContainerNodeFlattener{ @SuppressWarnings("unchecked") private final static ForwardLink NO_NODES = ForwardLink.TERMINATOR; - + private final IntegerKeyedHashMap> preCache; private final IntegerKeyedHashMap> cache; - + public SortContainerNodeFlattener(){ super(); @@ -122,8 +122,8 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory levelCache = preCache.get(offset); if(levelCache != null){ @@ -133,10 +133,12 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory 0) { // Ambiguous. stack.dirtyPurge(); // Pop this node off the stack. - if(result != null && depth < cycleMark.depth){ // Only share the constructed tree if we are not in a cycle. + if(DefaultNodeFlattener.regularMemoization && result != null && depth < cycleMark.depth){ // Only share the constructed tree if we are not in a cycle. if(!hasSideEffects){ // Cache side-effect free tree. ObjectIntegerKeyedHashMap levelCache = preCache.get(offset); if(levelCache != null){ @@ -229,4 +231,5 @@ else if (nrOfAlternatives > 0) { // Ambiguous. return result; } + } From 248cf0bd94ba325acc996a4a8bc0a65c6a15219f Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 7 Dec 2024 11:48:39 +0100 Subject: [PATCH 13/29] Added cycle tests --- .../tests/concrete/recovery/CycleTest.rsc | 42 +++++++++ .../tests/concrete/recovery/CycleTest.txt | 50 ++++++++++ .../tests/concrete/recovery/ListCycleTest.rsc | 26 ++++++ .../tests/concrete/recovery/NonAsciiTest.rsc | 29 ++++++ .../recovery/bugs/ATermOutOfMemoryBug.rsc | 91 +++++++------------ 5 files changed, 179 insertions(+), 59 deletions(-) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc new file mode 100644 index 00000000000..52a79a4b53a --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc @@ -0,0 +1,42 @@ +module lang::rascal::tests::concrete::recovery::CycleTest + +import ParseTree; +import vis::Text; +import IO; +import util::ErrorRecovery; + +syntax S = T | U; + +syntax T = X T? | "$"; + +syntax U = X T | "$"; + +syntax X = "b"? | "c"; + +void testCycles() { + str input = "bc$"; + //str input = "bcbcbcccbb$"; + Tree t1 = parse(#S, input, |unknown:///|, allowAmbiguity=true); + Tree t1b = parse(#S, input, |unknown:///|, allowAmbiguity=true); + Tree t2 = parse(#S, input, |unknown:///?disable-memoization=true|, allowAmbiguity=true); + println(prettyTree(t1)); + println(prettyTree(t2)); + + if (treeEquality(t1, t2)) { + println("equal"); + } else { + println("NOT EQUAL"); + } + + if (treeEquality(t1, t1b)) { + println("t1 and t1b are tree-equal"); + } else { + println("t1 and t1b NOT EQUAL"); + } + + if (t1 == t1b) { + println("identical"); + } else { + println("IDENTICAL PARSE CALLS BUT NOT IDENTICAL"); + } +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt new file mode 100644 index 00000000000..6d1b4dbe0bb --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt @@ -0,0 +1,50 @@ + + 1 + 2 + 3 + │ │ └─ T? + │ │ └─ ❖ + │ │ ├─ T = X T? + │ │ │ ├─ X = "b"? + │ │ │ │ └─ "b"? + │ │ └─ T? + │ │ └─ ❖ + │ │ ├─ T = X T? + │ │ │ ├─ X = "b"? + │ │ │ │ └─ "b"? + │ │ │ └─ cycle(T?, 2) + │ │ └─ T = X T? + │ │ ├─ X = "c" + │ │ └─ T? + │ │ └─ ❖ + │ │ ├─ T = X T? + │ │ │ ├─ X = "b"? + │ │ │ │ └─ "b"? + │ │ │ └─ cycle(T?, 2) + │ │ └─ T = "$" + │ │ └─ T = X T? + │ │ ├─ X = "c" + │ │ └─ T? + │ │ └─ ❖ + │ │ ├─ T = X T? + │ │ │ ├─ X = "b"? + │ │ │ │ └─ "b"? + │ │ │ └─ cycle(T?, 2) + │ │ └─ T = "$" + + │ └─ T? + │ └─ ❖ + │ ├─ T = X T? + │ │ ├─ X = "b"? + │ │ │ └─ "b"? + │ │ └─ T? + │ │ └─ cycle(T, 2) + │ └─ T = X T? + │ ├─ X = "c" + │ └─ T? + │ └─ ❖ + │ ├─ T = X T? + │ │ ├─ X = "b"? + │ │ │ └─ "b"? + │ │ └─ cycle(T?, 2) + │ └─ T = "$" diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc new file mode 100644 index 00000000000..46099f0f850 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc @@ -0,0 +1,26 @@ +module lang::rascal::tests::concrete::recovery::ListCycleTest + +import ParseTree; +import vis::Text; +import IO; +import util::ErrorRecovery; + +syntax S = T?*; + +syntax T = X T? | "$"; + +syntax X = "b"?; + +void testListCycles() { + str input = "b$"; + Tree t1 = parse(#S, input, |unknown:///|, allowAmbiguity=true); + Tree t2 = parse(#S, input, |unknown:///?disable-memoization=true|, allowAmbiguity=true); + println(prettyTree(t1)); + println(prettyTree(t2)); + + if (treeEquality(t1, t2)) { + println("equal"); + } else { + println("NOT EQUAL"); + } +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc new file mode 100644 index 00000000000..5cd22c056e6 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc @@ -0,0 +1,29 @@ +/** + * Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **/ + +module lang::rascal::tests::concrete::recovery::NonAsciiTest + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + + +syntax S = T; + +syntax T = A B C; + +syntax A = "ª"; +syntax B = "ß" "ß"; +syntax C = "©"; + +test bool nonAsciiOk() = checkRecovery(#S, "ªßß©", []); + diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc index 1f2186ab1e7..6ab17288a52 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc @@ -7,75 +7,48 @@ import IO; import String; import vis::Text; import util::ErrorRecovery; -import Set; +import util::Benchmark; -@memo -void treeDiff(appl(prod1, args1), appl(prod2, args2)) { - if (prod1 != prod2) { - println(""); - println(""); - throw "prods do not match"; - } - - if (size(args1) != size(args2)) { - throw "argument size mismatch"; - } - - for (int i <- [0..size(args1)]) { - arg1 = args1[i]; - arg2 = args2[i]; - treeDiff(arg1, arg2); - } -} - -@memo -void treeDiff(cycle(sym1, length1), cycle(sym2, length2)) { - if (sym1 != sym2) { - throw "cycle symbols do not match"; - } - - if (length1 != length2) { - throw "cycle lengths do not match"; - } -} - -@memo -void treeDiff(char(c1), char(c2)) { - if (c1 != c2) { - throw "character mismatch"; - } -} - -@memo -void treeDiff(amb(alts1), amb(alts2)) { - if (size(alts1) != size(alts2)) { - throw "alts size mismatch"; - } - - while (!isEmpty(alts1)) { - = takeFirstFrom(alts1); - = takeFirstFrom(alts2); - - treeDiff(alt1, alt2); - } -} - -Tree testBug() { +void testBug() { standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); loc source = |std:///lang/aterm/syntax/ATerm.rsc|; + loc sourceNoMemo = |std:///lang/aterm/syntax/ATerm.rsc?disable-memoization=true|; + + // "enable-cache" Does nothing for now loc sourceCache = |std:///lang/aterm/syntax/ATerm.rsc?enable-cache=true|; input = readFile(source); - //modifiedInput = substring(input, 0, 369) + substring(input, 399); modifiedInput = substring(input, 0, 369) + substring(input, 399); - println("without caching"); + + println("without optimized cycles"); + withoutCyclesStart = realTime(); Tree t1 = recoveryParser(modifiedInput, source); + withoutCyclesDuration = realTime() - withoutCyclesStart; + println("without cycle optimization duration: "); + +/* + println("without default memoization"); + withoutMemoStart = realTime(); + Tree tm = recoveryParser(modifiedInput, sourceNoMemo); + withoutMemoDuration = realTime() - withoutMemoStart; + println("without cycle optimization duration: "); + */ + println("with caching"); - Tree t2 = recoveryParser(modifiedInput, source); - if (t1 == t2) { + withCacheStart = realTime(); + Tree t2 = recoveryParser(modifiedInput, sourceCache); + withCacheDuration = realTime() - withCacheStart; + println("with cache duration: "); + + int equalityStart = realTime(); + if (treeEquality(t1, t2)) { println("equal"); + if (t1 != t2) { + println("but not the same?"); + } } else { - println("not equal"); + int equalityDuration = realTime() - equalityStart; + println("not equal in ms"); if ("" != "") { println("yields are not equal"); } else { @@ -98,6 +71,6 @@ Tree testBug() { //treeDiff(t1, t2); //println("trees are equal"); - return t1; + //return t1; //testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 150, 369, 369); } From 3b8b7872b2e0ebc090680f01c7a76a12503bb28f Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sun, 8 Dec 2024 12:04:32 +0100 Subject: [PATCH 14/29] Added @Override annotations --- src/org/rascalmpl/library/util/ErrorRecovery.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.java b/src/org/rascalmpl/library/util/ErrorRecovery.java index 958ab9939d1..e639a0124d3 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.java +++ b/src/org/rascalmpl/library/util/ErrorRecovery.java @@ -242,11 +242,13 @@ public TreePair(ITree tree1, ITree tree2) { hashCode = Objects.hash(System.identityHashCode(tree1), System.identityHashCode(tree2)); } + @Override public boolean equals(Object peer) { TreePair pair = (TreePair) peer; return tree1 == pair.tree1 && tree2 == pair.tree2; } + @Override public int hashCode() { return hashCode; } From 8446fcf149605705c7cf8d5f7267c9bcb56c5874 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 9 Dec 2024 11:43:01 +0100 Subject: [PATCH 15/29] Simplified test and fixed cycleMark checking in ListContainerNodeFlattener --- .../rascal/tests/concrete/recovery/CycleTest.rsc | 13 ------------- .../gtd/result/out/ListContainerNodeFlattener.java | 4 +++- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc index 52a79a4b53a..c6f6306fd77 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc @@ -17,7 +17,6 @@ void testCycles() { str input = "bc$"; //str input = "bcbcbcccbb$"; Tree t1 = parse(#S, input, |unknown:///|, allowAmbiguity=true); - Tree t1b = parse(#S, input, |unknown:///|, allowAmbiguity=true); Tree t2 = parse(#S, input, |unknown:///?disable-memoization=true|, allowAmbiguity=true); println(prettyTree(t1)); println(prettyTree(t2)); @@ -27,16 +26,4 @@ void testCycles() { } else { println("NOT EQUAL"); } - - if (treeEquality(t1, t1b)) { - println("t1 and t1b are tree-equal"); - } else { - println("t1 and t1b NOT EQUAL"); - } - - if (t1 == t1b) { - println("identical"); - } else { - println("IDENTICAL PARSE CALLS BUT NOT IDENTICAL"); - } } diff --git a/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java index ea82e08b15d..dd75ef5fe8a 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java @@ -471,7 +471,9 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory Date: Wed, 11 Dec 2024 12:03:41 +0100 Subject: [PATCH 16/29] Added support for checking memoization correctness during error recovery testing --- .../tests/concrete/recovery/CycleTest.rsc | 27 ++++++- .../tests/concrete/recovery/CycleTest.txt | 50 ------------ .../concrete/recovery/RecoveryTestSupport.rsc | 43 ++++++++++- .../recovery/bugs/ATermOutOfMemoryBug.rsc | 76 ------------------- 4 files changed, 68 insertions(+), 128 deletions(-) delete mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt delete mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc index c6f6306fd77..3cf9069575d 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc @@ -4,12 +4,13 @@ import ParseTree; import vis::Text; import IO; import util::ErrorRecovery; +import Node; syntax S = T | U; syntax T = X T? | "$"; -syntax U = X T | "$"; +syntax U = X T? | "$"; syntax X = "b"? | "c"; @@ -26,4 +27,28 @@ void testCycles() { } else { println("NOT EQUAL"); } + + if ({appl1Level1, *_ } := getChildren(t1)[0] && {appl2Level1, *_ } := getChildren(t2)[0]) { + println("appl1Level1:\n"); + println("appl2Level1:\n"); + + if ([amb({appl1Level2,*_})] := getChildren(appl1Level1)[1] && [amb({appl2Level2,*_})] := getChildren(appl2Level1)[1]) { + //println("Child 1:"); + //iprintln(appl1Level2); + //println("Child 2:"); + //iprintln(appl2Level2); + + println("child 1 tree:\n"); + println("child 2 tree:\n"); + + println("yield1: "); + println("yield2: "); + } + } + + //if (set[Tree] amb1Level1 := getChildren(t1)[0]) { + // println("children: "); + //} + + } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt deleted file mode 100644 index 6d1b4dbe0bb..00000000000 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.txt +++ /dev/null @@ -1,50 +0,0 @@ - - 1 - 2 - 3 - │ │ └─ T? - │ │ └─ ❖ - │ │ ├─ T = X T? - │ │ │ ├─ X = "b"? - │ │ │ │ └─ "b"? - │ │ └─ T? - │ │ └─ ❖ - │ │ ├─ T = X T? - │ │ │ ├─ X = "b"? - │ │ │ │ └─ "b"? - │ │ │ └─ cycle(T?, 2) - │ │ └─ T = X T? - │ │ ├─ X = "c" - │ │ └─ T? - │ │ └─ ❖ - │ │ ├─ T = X T? - │ │ │ ├─ X = "b"? - │ │ │ │ └─ "b"? - │ │ │ └─ cycle(T?, 2) - │ │ └─ T = "$" - │ │ └─ T = X T? - │ │ ├─ X = "c" - │ │ └─ T? - │ │ └─ ❖ - │ │ ├─ T = X T? - │ │ │ ├─ X = "b"? - │ │ │ │ └─ "b"? - │ │ │ └─ cycle(T?, 2) - │ │ └─ T = "$" - - │ └─ T? - │ └─ ❖ - │ ├─ T = X T? - │ │ ├─ X = "b"? - │ │ │ └─ "b"? - │ │ └─ T? - │ │ └─ cycle(T, 2) - │ └─ T = X T? - │ ├─ X = "c" - │ └─ T? - │ └─ ❖ - │ ├─ T = X T? - │ │ ├─ X = "b"? - │ │ │ └─ "b"? - │ │ └─ cycle(T?, 2) - │ └─ T = "$" diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index e933bf70c8a..123e1b9a460 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -25,6 +25,7 @@ import analysis::statistics::Descriptive; import util::Math; import Set; import List; +import Exception; import lang::rascal::grammar::definition::Modules; @@ -56,6 +57,31 @@ public data TestStats = testStats( FrequencyTable errorCounts=(), FrequencyTable errorSizes=()); +// When this is turned on, memoization is checked for correctness. This makes the tests take much longer. +// Note that this test requires the posibility to disable memoization. +// In the current test version this can be done by including a "disable-memoization" query parameter. +// We expect this feature to be removed eventually and then the `verifyMemoizationCorrectness` flag becomes useless. +bool verifyMemoizationCorrectness = false; + +// When verifying memoization we need to be able to timeout the conversin from parse graph to parse forest. +// We can do this by using a parse filter. +int timeoutLimit = 0; + +void setTimeout(int limit) { + timeoutLimit = limit; +} + +void clearTimeout() { + timeoutLimit = 0; +} + +Tree timeoutFilter(Tree tree) { + if (timeoutLimit != 0 && realTime() > timeoutLimit) { + throw Timeout(); + } + return tree; +} + private TestMeasurement testRecovery(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, loc source, loc statFile, int referenceParseTime) { int startTime = 0; int duration = 0; @@ -76,6 +102,20 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser Tree t = recoveryParser(input, source); int parseEndTime = realTime(); duration = parseEndTime - startTime; + + if (verifyMemoizationCorrectness) { + noMemoSource = source; + noMemoSource.query = noMemoSource.query + "&disable-memoization"; + setTimeout(realTime() + 2000); + try { + Tree noMemoTree = recoveryParser(input, noMemoSource); + if (!treeEquality(t, noMemoTree)) { + throw "Memo tree and non-memo tree are not equal for "; + } + } catch Timeout(): print("#"); + clearTimeout(); + } + list[Tree] errors = findBestErrors(t); errorCount = size(errors); disambDuration = realTime() - parseEndTime; @@ -397,7 +437,8 @@ FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str inpu if (sym:\start(\sort(topSort)) <- gram.starts) { type[value] begin = type(sym, gram.rules); standardParser = parser(begin, allowAmbiguity=true, allowRecovery=false); - recoveryParser = parser(begin, allowAmbiguity=true, allowRecovery=true); + set[Tree(Tree)] filters = verifyMemoizationCorrectness ? {timeoutFilter} : {}; + recoveryParser = parser(begin, allowAmbiguity=true, allowRecovery=true, filters=filters); // Initialization run standardParser(input, testInput); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc deleted file mode 100644 index 6ab17288a52..00000000000 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/ATermOutOfMemoryBug.rsc +++ /dev/null @@ -1,76 +0,0 @@ -module lang::rascal::tests::concrete::recovery::bugs::ATermOutOfMemoryBug - -import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; -import lang::rascal::\syntax::Rascal; -import ParseTree; -import IO; -import String; -import vis::Text; -import util::ErrorRecovery; -import util::Benchmark; - -void testBug() { - standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); - recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); - loc source = |std:///lang/aterm/syntax/ATerm.rsc|; - loc sourceNoMemo = |std:///lang/aterm/syntax/ATerm.rsc?disable-memoization=true|; - - // "enable-cache" Does nothing for now - loc sourceCache = |std:///lang/aterm/syntax/ATerm.rsc?enable-cache=true|; - input = readFile(source); - modifiedInput = substring(input, 0, 369) + substring(input, 399); - - println("without optimized cycles"); - withoutCyclesStart = realTime(); - Tree t1 = recoveryParser(modifiedInput, source); - withoutCyclesDuration = realTime() - withoutCyclesStart; - println("without cycle optimization duration: "); - -/* - println("without default memoization"); - withoutMemoStart = realTime(); - Tree tm = recoveryParser(modifiedInput, sourceNoMemo); - withoutMemoDuration = realTime() - withoutMemoStart; - println("without cycle optimization duration: "); - */ - - println("with caching"); - withCacheStart = realTime(); - Tree t2 = recoveryParser(modifiedInput, sourceCache); - withCacheDuration = realTime() - withCacheStart; - println("with cache duration: "); - - int equalityStart = realTime(); - if (treeEquality(t1, t2)) { - println("equal"); - if (t1 != t2) { - println("but not the same?"); - } - } else { - int equalityDuration = realTime() - equalityStart; - println("not equal in ms"); - if ("" != "") { - println("yields are not equal"); - } else { - println("yields are equal"); - } - - if (false) { - prettyT1 = prettyTree(t1); - prettyT2 = prettyTree(t2); - - if (prettyT1 != prettyT2) { - println("pretty trees are not equal"); - } else { - println("pretty trees are equal"); - } - writeFile(|cwd:///no-cache.txt|, ""); - writeFile(|cwd:///with-cache.txt|, ""); - } - } - - //treeDiff(t1, t2); - //println("trees are equal"); - //return t1; - //testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 150, 369, 369); -} From cc1384e518757287bc1bf58050e3ad76a611cb3f Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Wed, 11 Dec 2024 12:04:26 +0100 Subject: [PATCH 17/29] Added test for missing memoization in cycles --- .../recovery/bugs/CycleMemoizationBug.rsc | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc new file mode 100644 index 00000000000..73af4833199 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc @@ -0,0 +1,29 @@ +module lang::rascal::tests::concrete::recovery::bugs::CycleMemoizationBug + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; +import String; +import vis::Text; +import util::ErrorRecovery; +import util::Benchmark; +import Exception; + +/** +Originally memoization inside cycles was turned off. This caused this test to take a long time and then crash with an out-of-memory error. +*/ +void testCycleMemoizationFailure() { + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///lang/aterm/syntax/ATerm.rsc|; + + input = readFile(source); + modifiedInput = substring(input, 0, 369) + substring(input, 399); + + begin = realTime(); + Tree t1 = recoveryParser(modifiedInput, source); + duration = realTime() - begin; + println("with memoization duration: "); + + assert "" == modifiedInput; +} From 142679176f73c5b9223e83ceac3b28bca0b79660 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Fri, 13 Dec 2024 16:25:17 +0100 Subject: [PATCH 18/29] Implemented verification of memoizatoin approaches during parse graph -> parse forest conversion --- .../concrete/recovery/RecoveryTestSupport.rsc | 68 ++++++++++++++++--- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 123e1b9a460..61d3cc880fb 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -26,6 +26,7 @@ import util::Math; import Set; import List; import Exception; +import vis::Text; import lang::rascal::grammar::definition::Modules; @@ -89,7 +90,10 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser int errorCount = 0; int errorSize=0; str result = "?"; + str verificationResult = "-"; + TestMeasurement measurement = successfulParse(); + clearTimeout(); try { startTime = realTime(); Tree t = standardParser(input, source); @@ -104,22 +108,65 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser duration = parseEndTime - startTime; if (verifyMemoizationCorrectness) { + bool noMemoTimeout = false; + bool linkCorrect = true; + bool nodeMemoTimeout = false; + bool nodeCorrect = true; + //bool nodeLinkEqual = true; + noMemoSource = source; - noMemoSource.query = noMemoSource.query + "&disable-memoization"; + noMemoSource.query = noMemoSource.query + "&parse-memoization=none"; + setTimeout(realTime() + 2000); + Tree noMemoTree = char(0); + try { + Tree noMemo = recoveryParser(input, noMemoSource); + noMemoTree = noMemo; + linkMemoCorrect = treeEquality(t, noMemoTree); + } catch Timeout(): { + print("#"); + noMemoTimeout = true; + } + clearTimeout(); + + nodeMemoSource = source; + nodeMemoSource.query = nodeMemoSource.query + "&parse-memoization=node"; setTimeout(realTime() + 2000); try { - Tree noMemoTree = recoveryParser(input, noMemoSource); - if (!treeEquality(t, noMemoTree)) { - throw "Memo tree and non-memo tree are not equal for "; + Tree nodeMemoTree = recoveryParser(input, nodeMemoSource); + //nodeLinkEqual = treeEquality(t, nodeMemoTree); // Too expensive + if (!noMemoTimeout) { + nodeCorrect = treeEquality(noMemoTree, nodeMemoTree); } - } catch Timeout(): print("#"); + } catch Timeout(): { + print("@"); + nodeMemoTimeout = true; + } clearTimeout(); + + if (!linkCorrect) { + if (nodeMemoTimeout) { + println("\nlink memoization incorrect, node memoization timeout for "); + verificationResult = "linkFailed:nodeTimeout"; + } else if (nodeCorrect) { + verificationResult = "linkFailed:nodeSucceeded"; + println("\nonly link memoization incorrect for "); + } else { + verificationResult = "linkFailed:nodeFailed"; + println("\nboth node memoization and link memoization incorrect for "); + } + } else if (!nodeCorrect) { + verificationResult = "linkSucceeded:nodeFailed"; + println("\nonly node memoization incorrect for "); + } else if (noMemoTimeout) { + verificationResult="noMemoTimeout"; + } else { + verificationResult = "linkSucceeded:nodeSucceeded"; + } } list[Tree] errors = findBestErrors(t); errorCount = size(errors); disambDuration = realTime() - parseEndTime; - result = "recovery"; if ("" != input) { throw "Yield of recovered tree does not match the original input"; } @@ -129,8 +176,9 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser errorSize = (0 | it + size(getErrorText(err)) | err <- errors); measurement = recovered(source=source, duration=duration, errorCount=errorCount, errorSize=errorSize); } - } catch ParseError(_): { - result = "error"; + result = "recovery"; + } catch ParseError(_): { + result = "error"; duration = realTime() - startTime; measurement = parseError(source=source, duration=duration); } @@ -138,7 +186,7 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser if (statFile != |unknown:///|) { int ratio = percent(duration, referenceParseTime); - appendToFile(statFile, ",,,,,,,\n"); + appendToFile(statFile, ",,,,,,,,\n"); } return measurement; @@ -481,7 +529,7 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m fromFile = from; if (statFile != |unknown:///|) { - writeFile(statFile, "source,size,result,duration,ratio,disambiguationDuration,errorCount,errorSize\n"); + writeFile(statFile, "source,size,result,duration,ratio,disambiguationDuration,errorCount,errorSize,memoVerification\n"); } return runBatchRecoveryTest(syntaxFile, topSort, dir, ext, maxFiles, minFileSize, maxFileSize, statFile, testStats()); From f845509d976bbe002ed4bb8c4c484b371027805d Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Fri, 13 Dec 2024 16:26:38 +0100 Subject: [PATCH 19/29] Removed debug prints when comparing two parse trees --- .../rascalmpl/library/util/ErrorRecovery.java | 59 ++++--------------- 1 file changed, 11 insertions(+), 48 deletions(-) diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.java b/src/org/rascalmpl/library/util/ErrorRecovery.java index e639a0124d3..6f8033eebfb 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.java +++ b/src/org/rascalmpl/library/util/ErrorRecovery.java @@ -255,10 +255,10 @@ public int hashCode() { }; public IBool treeEquality(IConstructor tree1, IConstructor tree2) { - return rascalValues.bool(checkTreeEquality((ITree) tree1, (ITree) tree2, new HashSet<>(), true)); + return rascalValues.bool(checkTreeEquality((ITree) tree1, (ITree) tree2, new HashSet<>())); } - private boolean checkTreeEquality(ITree tree1, ITree tree2, Set equalTrees, boolean logInequality) { + private boolean checkTreeEquality(ITree tree1, ITree tree2, Set equalTrees) { if (tree1 == tree2) { return true; } @@ -266,39 +266,17 @@ private boolean checkTreeEquality(ITree tree1, ITree tree2, Set equalT Type type = tree1.getConstructorType(); if (!type.equals(tree2.getConstructorType())) { - if (logInequality) { - System.out.println("types not equal"); - } return false; } if (!checkLocationEquality(tree1, tree2)) { - if (logInequality) { - System.out.println("locations not equal"); - } return false; } if (type == RascalValueFactory.Tree_Char) { - if (checkCharEquality(tree1, tree2)) { - return true; - } - - if (logInequality) { - System.out.println("char nodes not equal"); - } - - return false; + return checkCharEquality(tree1, tree2); } else if (type == RascalValueFactory.Tree_Cycle) { - if (checkCycleEquality(tree1, tree2)) { - return true; - } - - if (logInequality) { - System.out.println("char nodes not equal"); - } - - return false; + return checkCycleEquality(tree1, tree2); } TreePair pair = new TreePair(tree1, tree2); @@ -308,15 +286,9 @@ private boolean checkTreeEquality(ITree tree1, ITree tree2, Set equalT boolean result; if (type == RascalValueFactory.Tree_Appl) { - result = checkApplEquality(tree1, tree2, equalTrees, logInequality); - if (logInequality && !result) { - System.out.println("appls not equal"); - } + result = checkApplEquality(tree1, tree2, equalTrees); } else if (type == RascalValueFactory.Tree_Amb) { - result = checkAmbEquality(tree1, tree2, equalTrees, logInequality); - if (logInequality && !result) { - System.out.println("ambs not equal"); - } + result = checkAmbEquality(tree1, tree2, equalTrees); } else { throw new IllegalArgumentException("unknown tree type: " + type); } @@ -328,14 +300,11 @@ private boolean checkTreeEquality(ITree tree1, ITree tree2, Set equalT return result; } - private boolean checkApplEquality(ITree tree1, ITree tree2, Set equalTrees, boolean logInequality) { + private boolean checkApplEquality(ITree tree1, ITree tree2, Set equalTrees) { IConstructor type1 = ProductionAdapter.getType(tree1); IConstructor type2 = ProductionAdapter.getType(tree2); if (!type1.equals(type2)) { - if (logInequality) { - System.out.println("types do not match: " + DebugUtil.prodToString(type1) + " != " + DebugUtil.prodToString(type2)); - } return false; } @@ -343,16 +312,13 @@ private boolean checkApplEquality(ITree tree1, ITree tree2, Set equalT IList args2 = TreeAdapter.getArgs(tree2); int size = args1.size(); if (size != args2.size()) { - if (logInequality) { - System.out.println("argument count mismatch"); - } return false; } for (int i = 0; i < size; i++) { ITree arg1 = (ITree) args1.get(i); ITree arg2 = (ITree) args2.get(i); - if (!checkTreeEquality(arg1, arg2, equalTrees, logInequality)) { + if (!checkTreeEquality(arg1, arg2, equalTrees)) { return false; } } @@ -360,14 +326,11 @@ private boolean checkApplEquality(ITree tree1, ITree tree2, Set equalT return true; } - private boolean checkAmbEquality(ITree tree1, ITree tree2, Set equalTrees, boolean logInequality) { + private boolean checkAmbEquality(ITree tree1, ITree tree2, Set equalTrees) { ISet alts1 = tree1.getAlternatives(); ISet alts2 = tree2.getAlternatives(); if (alts1.size() != alts2.size()) { - if (logInequality) { - System.out.println("amb nodes do not have an equal size"); - } return false; } @@ -381,7 +344,7 @@ private boolean checkAmbEquality(ITree tree1, ITree tree2, Set equalTr int index2 = 0; // This relies on iteration order being stable, which is a pretty safe bet. for (IValue alt2 : alts2) { - if (!alts2Checked.get(index2) && checkTreeEquality((ITree) alt1, (ITree) alt2, equalTrees, false)) { + if (!alts2Checked.get(index2) && checkTreeEquality((ITree) alt1, (ITree) alt2, equalTrees)) { alts2Checked.set(index2); break; } @@ -405,7 +368,7 @@ private boolean checkAmbEquality(ITree tree1, ITree tree2, Set equalTr for (IValue alt2 : alts2) { if (!alts2Checked.get(index2)) { System.err.println(" lhs amb node not found: " + index2); - checkTreeEquality((ITree) missingAlt, (ITree) alt2, equalTrees, true); + checkTreeEquality((ITree) missingAlt, (ITree) alt2, equalTrees, LOG_INEQUALITY); System.err.println(); } index2++; From 18d89f480633c06d220811b0016b0c2569bc6fee Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Fri, 13 Dec 2024 16:26:57 +0100 Subject: [PATCH 20/29] Added simple node count function --- .../rascalmpl/library/util/ErrorRecovery.rsc | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc index 4e99a545ed9..95401b3467b 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.rsc +++ b/src/org/rascalmpl/library/util/ErrorRecovery.rsc @@ -58,6 +58,26 @@ equality check that takes advantage of the sharing present in those trees. Note that this equality check only takes into account the offset and length fields of source locations. All other fields are ignored.} java bool treeEquality(Tree t1, Tree t2); +int nodeCount(appl(_, args)) { + int count = 1; + for (Tree arg <- args) { + count += nodeCount(arg); + } + return count; +} + +int nodeCount(amb(alts)) { + int count = 1; + for (Tree alt <- alts) { + count += nodeCount(alt); + } + return count; +} + +int nodeCount(char(_)) = 1; + +int nodeCount(cycle(_,_)) = 1; + @javaClass{org.rascalmpl.library.util.ErrorRecovery} @synopsis{Generate a dot representation of a parse forest.} java void parseTree2Dot(Tree tree, loc dotFile); From 53074d19ade747cd5f9379a3ff8854c255cf5b2e Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Fri, 13 Dec 2024 16:27:42 +0100 Subject: [PATCH 21/29] Implemented memoization configuartion using query params This is just for verification purposes, will be removed later. --- src/org/rascalmpl/parser/gtd/SGTDBF.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index 4ef2e9175c4..7b90bea9fe9 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -1512,11 +1512,23 @@ private static int[] charsToInts(char[] input){ } private void checkMemoization(URI inputURI) { - DefaultNodeFlattener.regularMemoization = true; + DefaultNodeFlattener.nodeMemoization = false; + DefaultNodeFlattener.linkMemoization = true; if (inputURI != null) { String query = inputURI.getQuery(); if (query != null) { - DefaultNodeFlattener.regularMemoization = !query.contains("disable-memoization"); + if (query.contains("parse-memoization=none")) { + DefaultNodeFlattener.nodeMemoization = false; + DefaultNodeFlattener.linkMemoization = false; + } else if (query.contains("parse-memoization=link")) { + DefaultNodeFlattener.nodeMemoization = false; + DefaultNodeFlattener.linkMemoization = true; + } else if (query.contains("parse-memoization=node")) { + DefaultNodeFlattener.nodeMemoization = true; + DefaultNodeFlattener.linkMemoization = false; + } else if (query.contains("parse-memoization")) { + throw new IllegalArgumentException("Unsupported memoization directive: " + query); + } } } } From ec5a76ae101e9bf902a5f59811501163773b1142 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Fri, 13 Dec 2024 16:28:59 +0100 Subject: [PATCH 22/29] Added debug configuration flags, will be removed later. --- .../rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java index 0ff7b09ff32..5c34f0b7b89 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java @@ -26,7 +26,8 @@ * Converter for parse trees that produces trees in UPTR format. */ public class DefaultNodeFlattener implements INodeFlattener{ - public static boolean regularMemoization = true; + public static boolean nodeMemoization = true; + public static boolean linkMemoization = false; private final CharNodeFlattener charNodeConverter; private final LiteralNodeFlattener literalNodeConverter; From ecc14121e4bbca27f64bd21b61652a4aa27368d8 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 14 Dec 2024 11:00:37 +0100 Subject: [PATCH 23/29] Fixed test to succeed independent of amb alternative order --- .../recovery/bugs/CycleMemoizationBug.rsc | 4 ++-- .../rascalmpl/library/util/ErrorRecovery.java | 18 ------------------ .../gtd/result/out/DefaultNodeFlattener.java | 4 ++-- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc index 73af4833199..fc3fc465d36 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/CycleMemoizationBug.rsc @@ -8,10 +8,10 @@ import String; import vis::Text; import util::ErrorRecovery; import util::Benchmark; -import Exception; /** -Originally memoization inside cycles was turned off. This caused this test to take a long time and then crash with an out-of-memory error. +* Originally memoization inside cycles was turned off. This caused this test to take a long time and then crash with an out-of-memory error. +* With the new link memoization this test should run fine. */ void testCycleMemoizationFailure() { recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.java b/src/org/rascalmpl/library/util/ErrorRecovery.java index 6f8033eebfb..46deede19f5 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.java +++ b/src/org/rascalmpl/library/util/ErrorRecovery.java @@ -10,7 +10,6 @@ import java.util.Objects; import org.rascalmpl.interpreter.asserts.Ambiguous; -import org.rascalmpl.parser.util.DebugUtil; import org.rascalmpl.parser.util.ParseStateVisualizer; import org.rascalmpl.unicode.UnicodeOutputStreamWriter; import org.rascalmpl.uri.URIResolverRegistry; @@ -354,27 +353,10 @@ private boolean checkAmbEquality(ITree tree1, ITree tree2, Set equalTr if (index2 == alts2.size()) { // We did not find alt1 in alts2 - if (logInequality) { - System.out.println("amb nodes not equal"); - System.err.println(" rhs amb node not found: " + index2); - missingAlt = alt1; - } ok = false; } } - if (!ok && logInequality && missingAlt != null) { - int index2 = 0; - for (IValue alt2 : alts2) { - if (!alts2Checked.get(index2)) { - System.err.println(" lhs amb node not found: " + index2); - checkTreeEquality((ITree) missingAlt, (ITree) alt2, equalTrees, LOG_INEQUALITY); - System.err.println(); - } - index2++; - } - } - return ok; } diff --git a/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java index 5c34f0b7b89..e566adbec9a 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/DefaultNodeFlattener.java @@ -26,8 +26,8 @@ * Converter for parse trees that produces trees in UPTR format. */ public class DefaultNodeFlattener implements INodeFlattener{ - public static boolean nodeMemoization = true; - public static boolean linkMemoization = false; + public static boolean nodeMemoization = false; + public static boolean linkMemoization = true; private final CharNodeFlattener charNodeConverter; private final LiteralNodeFlattener literalNodeConverter; From c8ae25632ba11e94aab79706f6644e8ab76b7b2a Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 6 Jan 2025 13:04:41 +0100 Subject: [PATCH 24/29] Added error recovery test for non-ascii grammer/input --- .../tests/concrete/recovery/ListCycleTest.rsc | 26 ------------------- .../tests/concrete/recovery/NonAsciiTest.rsc | 2 ++ 2 files changed, 2 insertions(+), 26 deletions(-) delete mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc deleted file mode 100644 index 46099f0f850..00000000000 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListCycleTest.rsc +++ /dev/null @@ -1,26 +0,0 @@ -module lang::rascal::tests::concrete::recovery::ListCycleTest - -import ParseTree; -import vis::Text; -import IO; -import util::ErrorRecovery; - -syntax S = T?*; - -syntax T = X T? | "$"; - -syntax X = "b"?; - -void testListCycles() { - str input = "b$"; - Tree t1 = parse(#S, input, |unknown:///|, allowAmbiguity=true); - Tree t2 = parse(#S, input, |unknown:///?disable-memoization=true|, allowAmbiguity=true); - println(prettyTree(t1)); - println(prettyTree(t2)); - - if (treeEquality(t1, t2)) { - println("equal"); - } else { - println("NOT EQUAL"); - } -} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc index 5cd22c056e6..768c98790d9 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NonAsciiTest.rsc @@ -27,3 +27,5 @@ syntax C = "©"; test bool nonAsciiOk() = checkRecovery(#S, "ªßß©", []); +test bool nonAsciiError() = checkRecovery(#S, "ªßxß©", ["xß"]); + From 5e881cef3d5cac5163f9306e5b06cb490f7a48f1 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 6 Jan 2025 13:05:44 +0100 Subject: [PATCH 25/29] Renamed conditional memoization booleans --- .../parser/gtd/result/out/ListContainerNodeFlattener.java | 4 ++-- .../parser/gtd/result/out/SortContainerNodeFlattener.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java index dd75ef5fe8a..633f906ebe1 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/ListContainerNodeFlattener.java @@ -461,7 +461,7 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory levelCache = preCache.get(offset); if(levelCache != null){ @@ -539,7 +539,7 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory levelCache = preCache.get(offset); if(levelCache != null){ diff --git a/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java b/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java index b878e34c256..d88cb692e65 100644 --- a/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java +++ b/src/org/rascalmpl/parser/gtd/result/out/SortContainerNodeFlattener.java @@ -123,7 +123,7 @@ public T convertToUPTR(INodeFlattener converter, INodeConstructorFactory levelCache = preCache.get(offset); if(levelCache != null){ @@ -195,7 +195,7 @@ else if (nrOfAlternatives > 0) { // Ambiguous. stack.dirtyPurge(); // Pop this node off the stack. - if(DefaultNodeFlattener.regularMemoization && result != null && depth < cycleMark.depth){ // Only share the constructed tree if we are not in a cycle. + if(DefaultNodeFlattener.nodeMemoization && result != null && depth < cycleMark.depth){ // Only share the constructed tree if we are not in a cycle. if(!hasSideEffects){ // Cache side-effect free tree. ObjectIntegerKeyedHashMap levelCache = preCache.get(offset); if(levelCache != null){ From 5856af60f2d01b216c366f2add061eecf1eb5af1 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Wed, 8 Jan 2025 09:51:39 +0100 Subject: [PATCH 26/29] Implemented visualization of AbstractNode trees --- .../parser/util/ParseStateVisualizer.java | 71 ++++++++++++++----- .../util/visualize/dot/DotGraph.java | 28 +++++--- 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java index 1ef0aeaa40d..3b40cf0193f 100644 --- a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java +++ b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java @@ -25,7 +25,6 @@ import java.nio.file.StandardCopyOption; import java.util.HashMap; import java.util.Map; -import java.util.Objects; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -40,6 +39,7 @@ import org.rascalmpl.parser.gtd.result.AbstractNode; import org.rascalmpl.parser.gtd.result.CharNode; import org.rascalmpl.parser.gtd.result.EpsilonNode; +import org.rascalmpl.parser.gtd.result.ExpandableContainerNode; import org.rascalmpl.parser.gtd.result.LiteralNode; import org.rascalmpl.parser.gtd.result.RecoveredNode; import org.rascalmpl.parser.gtd.result.SkippedNode; @@ -403,39 +403,73 @@ private

DotNode createDotNode(AbstractStackNode

stackNode) { } private NodeId addParserNodes(DotGraph graph, AbstractNode parserNode) { - NodeId id = addParserNode(graph, parserNode); + NodeId id = getNodeId(parserNode); + if (graph.containsNode(id)) { + return id; + } + + addParserNode(graph, parserNode, id); + if (parserNode instanceof AbstractContainerNode) { @SuppressWarnings("unchecked") AbstractContainerNode container = (AbstractContainerNode) parserNode; - Link link = container.getFirstAlternative(); - if (link != null) { - NodeId firstPrefix = addPrefixes(graph, link); - graph.addEdge(id, firstPrefix); + Link firstAlt = container.getFirstAlternative(); + IConstructor firstProd = container.getFirstProduction(); + if (firstAlt != null) { + NodeId firstAltId = addLink(graph, firstAlt, "Alt: " + DebugUtil.prodToString(firstProd)); + graph.addEdge(id, firstAltId); + + ArrayList alternatives = container.getAdditionalAlternatives(); + ArrayList prods = container.getAdditionalProductions(); + if (alternatives != null) { + for (int i=0; i prefixes = link.getPrefixes(); if (prefixes != null) { for (int i=0; i) parserNode); + case ExpandableContainerNode.ID: + enrichContainerNode(dotNode, (AbstractContainerNode) parserNode); break; case SkippedNode.ID: enrichSkippedNode(dotNode, (SkippedNode) parserNode); @@ -468,8 +503,6 @@ private NodeId addParserNode(DotGraph graph, AbstractNode parserNode) { } graph.addNode(dotNode); - - return id; } private void enrichCharNode(DotNode dotNode, CharNode charNode) { @@ -492,10 +525,10 @@ private void enrichSkippedNode(DotNode dotNode, SkippedNode skippedNode) { dotNode.setAttribute(DotAttribute.ATTR_LABEL, label); } - private void enrichSortContainerNode(DotNode dotNode, SortContainerNode sortNode) { + private void enrichContainerNode(DotNode dotNode, AbstractContainerNode node) { String label = dotNode.getAttributeValue(DotAttribute.ATTR_LABEL); - label += " " + sortNode.getOffset() + "-" + sortNode.getEndOffset(); - label += "\n" + DebugUtil.prodToString(sortNode.getFirstProduction()); + label += " " + node.getOffset() + "-" + node.getEndOffset(); + label += "\n" + ProductionAdapter.getSortName(node.getFirstProduction()); dotNode.setAttribute(DotAttribute.ATTR_LABEL, label); } diff --git a/src/org/rascalmpl/util/visualize/dot/DotGraph.java b/src/org/rascalmpl/util/visualize/dot/DotGraph.java index 1553491c368..9370b4a7b17 100644 --- a/src/org/rascalmpl/util/visualize/dot/DotGraph.java +++ b/src/org/rascalmpl/util/visualize/dot/DotGraph.java @@ -45,21 +45,32 @@ private void addStatement(DotStatement statement) { statements.add(statement); } - public void addNode(DotNode node) { - if (!nodes.containsKey(node.getId())) { - addStatement(node); - nodes.put(node.getId(), node); + public boolean containsNode(NodeId id) { + return nodes.containsKey(id); + } + + public boolean addNode(DotNode node) { + if (nodes.containsKey(node.getId())) { + return false; } + + addStatement(node); + nodes.put(node.getId(), node); + return true; } - public void addNode(String id, String label) { - addNode(new NodeId(id), label); + public boolean addNode(String id, String label) { + return addNode(new NodeId(id), label); } - public void addNode(NodeId id, String label) { + public boolean addNode(NodeId id, String label) { + if (nodes.containsKey(id)) { + return false; + } + DotNode node = new DotNode(id); node.addAttribute(DotAttribute.ATTR_LABEL, label); - addNode(node); + return addNode(node); } public void addArrayNode(NodeId id, int size) { @@ -108,6 +119,7 @@ public void writeSource(PrintWriter writer) { writer.write(id); } writer.println(" {"); + writer.println("ordering = out;"); for (DotStatement statement : statements) { statement.writeSource(writer); writer.println(";"); From 9dbe936268485d118fdf0d7e0458e5182c093e55 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Wed, 8 Jan 2025 09:52:18 +0100 Subject: [PATCH 27/29] Implemented support for parse result visualization --- src/org/rascalmpl/parser/gtd/SGTDBF.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index 7b90bea9fe9..c952408959d 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -1511,9 +1511,9 @@ private static int[] charsToInts(char[] input){ return result; } - private void checkMemoization(URI inputURI) { - DefaultNodeFlattener.nodeMemoization = false; - DefaultNodeFlattener.linkMemoization = true; + private void checkMemoization(URI inputURI, AbstractNode result) { + DefaultNodeFlattener.nodeMemoization = true; + DefaultNodeFlattener.linkMemoization = false; if (inputURI != null) { String query = inputURI.getQuery(); if (query != null) { @@ -1529,6 +1529,10 @@ private void checkMemoization(URI inputURI) { } else if (query.contains("parse-memoization")) { throw new IllegalArgumentException("Unsupported memoization directive: " + query); } + + if (query.contains("visualize-parse-result")) { + new ParseStateVisualizer("ParseResult").visualizeNode(result); + } } } } @@ -1541,7 +1545,7 @@ private T parse(String nonterminal, URI inputURI, int[] input, IActionExecutor debugListener) { AbstractNode result = parse(new NonTerminalStackNode

(AbstractStackNode.START_SYMBOL_ID, 0, nonterminal), inputURI, input, recoverer, debugListener); - checkMemoization(inputURI); + checkMemoization(inputURI, result); return buildResult(result, converter, nodeConstructorFactory, actionExecutor); } @@ -1577,7 +1581,7 @@ private T parse(String nonterminal, URI inputURI, int[] input, INodeFlattener debugListener) { AbstractNode result = parse(new NonTerminalStackNode

(AbstractStackNode.START_SYMBOL_ID, 0, nonterminal), inputURI, input, recoverer, debugListener); - checkMemoization(inputURI); + checkMemoization(inputURI, result); return buildResult(result, converter, nodeConstructorFactory, new VoidActionExecutor()); } @@ -1607,7 +1611,7 @@ protected T parse(AbstractStackNode

startNode, URI inputURI, char[] input, IN INodeConstructorFactory nodeConstructorFactory) { AbstractNode result = parse(startNode, inputURI, charsToInts(input), null, null); - checkMemoization(inputURI); + checkMemoization(inputURI, result); return buildResult(result, converter, nodeConstructorFactory, new VoidActionExecutor()); } From 6ea7144080a550749d0ddbaa02b9592c9dcd2b70 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Wed, 8 Jan 2025 09:53:12 +0100 Subject: [PATCH 28/29] Fixed comment on memoization disabling --- .../lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 61d3cc880fb..9f0c7c8f59f 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -60,7 +60,7 @@ public data TestStats = testStats( // When this is turned on, memoization is checked for correctness. This makes the tests take much longer. // Note that this test requires the posibility to disable memoization. -// In the current test version this can be done by including a "disable-memoization" query parameter. +// In the current test version this can be done by including a "parse-memoization=none" query parameter. // We expect this feature to be removed eventually and then the `verifyMemoizationCorrectness` flag becomes useless. bool verifyMemoizationCorrectness = false; From 41d05a5fc2eddde75385d63ed126d29be40a047e Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Wed, 8 Jan 2025 09:53:50 +0100 Subject: [PATCH 29/29] Switched to lexical rules to make result graph simpler --- .../lang/rascal/tests/concrete/recovery/CycleTest.rsc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc index 3cf9069575d..69e062594af 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/CycleTest.rsc @@ -6,19 +6,19 @@ import IO; import util::ErrorRecovery; import Node; -syntax S = T | U; +lexical S = T | U; -syntax T = X T? | "$"; +lexical T = X T? | "$"; -syntax U = X T? | "$"; +lexical U = X T? | "$"; -syntax X = "b"? | "c"; +lexical X = "b"? | "c"; void testCycles() { str input = "bc$"; //str input = "bcbcbcccbb$"; Tree t1 = parse(#S, input, |unknown:///|, allowAmbiguity=true); - Tree t2 = parse(#S, input, |unknown:///?disable-memoization=true|, allowAmbiguity=true); + Tree t2 = parse(#S, input, |unknown:///?parse-memoization=none&visualize-parse-result|, allowAmbiguity=true); println(prettyTree(t1)); println(prettyTree(t2));