Merge pull request #2057 from usethesource/recovery/only-recovery-sta…

…cks-left Trigger recovery when only recovery stacks are left
usethesource · Oct 24, 2024 · c75cba6 · c75cba6
2 parents 2683d8c + 4d94b28
commit c75cba6
Show file tree

Hide file tree

Showing 15 changed files with 186 additions and 30 deletions.
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,6 @@ dependency-reduced-pom.xml
 
 *.iml
 .idea/
+
+# generated by error recovery benchmark
+rascal-recovery-stats.csv
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc
@@ -54,7 +54,7 @@ test bool rascalFunctionDeclarationMissingCloseParen() = checkRecovery(#Function
 
 test bool rascalIfMissingExpr() = checkRecovery(#FunctionDeclaration, "void f(){if(){1;}}", [")"]);
 
-test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["} void h(){1"]);
+test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["{", "} "]);
 
 // Not working yet:
 /*

diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc
@@ -433,7 +433,7 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m
 
 TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, loc statFile, TestStats cumulativeStats) {
     println("Batch testing in directory <dir> (maxFiles=<maxFiles>, maxFileSize=<maxFileSize>, fromFile=<fromFile>)");
-    writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorSize\n");
+    writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorCount,errorSize\n");
     for (entry <- listEntries(dir)) {
         loc file = dir + entry;
         if (isFile(file)) {

diff --git a/...org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoopMultiError.rsc b/...org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoopMultiError.rsc
@@ -0,0 +1,15 @@
+module lang::rascal::tests::concrete::recovery::bugs::InfiniteLoopMultiError
+
+
+import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
+import lang::rascal::\syntax::Rascal;
+import ParseTree;
+import IO;
+
+void testInfiniteLoopMultiError() {
+    standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true);
+    recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true);
+    source = |std:///Exception.rsc|;
+    input = readFile(source);
+    testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 100, begin=662, end=664);
+}
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MinimalMultiError.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MinimalMultiError.rsc
@@ -0,0 +1,17 @@
+module lang::rascal::tests::concrete::recovery::bugs::MinimalMultiError
+
+import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
+
+layout Layout = [\ ]* !>> [\ ];
+
+syntax S = T;
+
+syntax T = AB AB AB End;
+syntax AB = 'a' 'b';
+syntax End = "$";
+
+test bool multiOk() = checkRecovery(#S, "ababab$", []);
+
+test bool multiOneError() = checkRecovery(#S, "acabab$", ["c"]);
+
+test bool multiTwoError() = checkRecovery(#S, "acacab$", ["c","c"]);
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorBug.rsc
@@ -0,0 +1,51 @@
+module lang::rascal::tests::concrete::recovery::bugs::MultiErrorBug
+
+import ParseTree;
+import IO;
+import util::ErrorRecovery;
+import List;
+import vis::Text;
+
+start syntax Module	= SyntaxDefinition* ;
+
+syntax SyntaxDefinition =  Prod ";" ;
+
+lexical Name = [A-Z];
+
+syntax Sym
+	= Sym NonterminalLabel
+	| StringConstant
+	;
+
+lexical StringConstant = "\"" StringCharacter* "\"" ;
+
+lexical StringCharacter = ![\"] ;
+
+lexical NonterminalLabel = [a-z] ;
+
+syntax Prod = Sym* ;
+
+// This is an open issue: instead of two small errors, one big error tree is returned.
+bool multiErrorBug() {
+    str input = "#\"a\";#\"b\";";
+    Tree t = parser(#start[Module], allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=false|);
+
+    println(prettyTree(t));
+
+    list[Tree] errors = findAllErrors(t);
+    println("<size(errors)> Errors");
+    for (Tree error <- errors) {
+        Tree skipped = getSkipped(error);
+        println("  <skipped@\loc>: <getErrorText(error)>");
+    }
+    Tree disambiguated = disambiguateErrors(t);
+    list[Tree] disambiguatedErrors = findAllErrors(disambiguated);
+    println("After disambiguating:");
+    println("<size(disambiguatedErrors)> Errors");
+    for (Tree error <- disambiguatedErrors) {
+        Tree skipped = getSkipped(error);
+        println("  <skipped@\loc>: <getErrorText(error)>");
+    }
+    return true;
+}
+
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPico.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPico.rsc
@@ -0,0 +1,13 @@
+module lang::rascal::tests::concrete::recovery::bugs::MultiErrorPico
+
+import lang::pico::\syntax::Main;
+import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
+
+bool multiErrorPico() {
+    return checkRecovery(#start[Program], "begin
+  declare;
+  i := #1;
+  j := #2;
+  k := 3
+end" , ["#1", "#2"]);
+}
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPicoInput.pico b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPicoInput.pico
@@ -0,0 +1,6 @@
+begin
+  declare;
+  i := #1;
+  j := #2;
+  k := 3
+end
diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiProdRecovery.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiProdRecovery.rsc
@@ -0,0 +1,20 @@
+module lang::rascal::tests::concrete::recovery::bugs::MultiProdRecovery
+
+import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
+
+layout Layout = [\ ]* !>> [\ ];
+
+syntax Prog = Stat*;
+
+syntax Stat 
+    = Expr ";"
+    | "{" Stat* "}";
+
+syntax Expr 
+    = Expr "+" Expr
+    | Expr "-" Expr
+    | "e";
+
+test bool multiProdOk() = checkRecovery(#Prog, "{ e + e; }", []);
+
+test bool multiProdOperatorError() = checkRecovery(#Prog, "{ e * e; }", ["* "]);
diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java
@@ -32,6 +32,7 @@
 import org.rascalmpl.parser.gtd.stack.AbstractStackNode;
 import org.rascalmpl.parser.gtd.stack.EpsilonStackNode;
 import org.rascalmpl.parser.gtd.stack.NonTerminalStackNode;
+import org.rascalmpl.parser.gtd.stack.SkippingStackNode;
 import org.rascalmpl.parser.gtd.stack.edge.EdgesSet;
 import org.rascalmpl.parser.gtd.stack.filter.ICompletionFilter;
 import org.rascalmpl.parser.gtd.stack.filter.IEnterFilter;
@@ -1347,12 +1348,10 @@ protected AbstractNode parse(AbstractStackNode<P> startNode, URI inputURI, int[]
 	    initTime();
 
 	  try {
-
 	    if(invoked){
 	      throw new RuntimeException("Can only invoke 'parse' once.");
 	    }
 
-
 	    invoked = true;
 
 	    // Initialize.
@@ -1395,6 +1394,7 @@ protected AbstractNode parse(AbstractStackNode<P> startNode, URI inputURI, int[]
 
 							debugListener.shifting(location, input, positionStore);
 					}
+
 	        // Reduce-expand loop.
 	        do {
 							debugListener.iterating();
@@ -1409,11 +1409,15 @@ protected AbstractNode parse(AbstractStackNode<P> startNode, URI inputURI, int[]
 
 	        shiftedLevel = true;
 
+					if (onlyRecoveredStacksLeft() && attemptRecovery()) {
+						continue;
+					}
+
 					if (!findStacksToReduce()) {
-	    if(location == input.length){
+	    				if(location == input.length) {
 	      EdgesSet<P> startNodeEdgesSet = startNode.getIncomingEdges();
 	      int resultStoreId = getResultStoreId(startNode.getId());
-	      if(startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length){
+	      					if(startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) {
 								result = startNodeEdgesSet.getLastResult(resultStoreId); // Success.
 								break;
 							}
@@ -1464,6 +1468,28 @@ private void initTime() {
 	  timestamp = System.nanoTime();
 	}
 
+	private boolean onlyRecoveredStacksLeft() {
+		int recoveredStacksFound = 0;
+
+		for (int i=0; i<todoLists.length; i++) {
+			DoubleStack<AbstractStackNode<P>, AbstractNode> todoList = todoLists[i];
+			if (todoList != null) {
+				int size = todoList.getSize();
+				for (int j=0; j<size; j++) {
+					if (!(todoList.getFirst(j) instanceof SkippingStackNode)) {
+						return false;
+					}
+
+					if (recoveredStacksFound++ > 50) {
+						return false;
+					}
+				}
+			}
+		}
+
+		return recoveredStacksFound > 0;
+	}
+
   private void checkTime(String msg) {
     long newStamp = System.nanoTime();
 		long duration = newStamp - timestamp;

diff --git a/src/org/rascalmpl/parser/uptr/recovery/CaseInsensitiveLiteralMatcher.java b/src/org/rascalmpl/parser/uptr/recovery/CaseInsensitiveLiteralMatcher.java
@@ -46,10 +46,11 @@ public CaseInsensitiveLiteralMatcher(int[][] ciLiteral) {
     }
 
     @Override
-    public MatchResult findMatch(int[] input, int startLocation) {
+    public MatchResult findMatch(int[] input, int startLocation, int maxLength) {
         int length = chars.length;
 
-        for (int start=startLocation; start < input.length - length; start++) {
+        int limit = Math.min(startLocation + maxLength - length, input.length - length + 1);
+        for (int start=startLocation; start < limit; start++) {
             boolean matches = true;
             for (int i=0; i<length; i++) {
                 int inputChar = input[start+i];

diff --git a/src/org/rascalmpl/parser/uptr/recovery/FailingMatcher.java b/src/org/rascalmpl/parser/uptr/recovery/FailingMatcher.java
@@ -16,7 +16,7 @@
 
 public class FailingMatcher implements InputMatcher {
     @Override
-    public MatchResult findMatch(int[] input, int startLocation) {
+    public MatchResult findMatch(int[] input, int startLocation, int maxLength) {
         return null;
     }
 }
diff --git a/src/org/rascalmpl/parser/uptr/recovery/InputMatcher.java b/src/org/rascalmpl/parser/uptr/recovery/InputMatcher.java
@@ -27,7 +27,7 @@ public interface InputMatcher {
     public static InputMatcher FAIL = new FailingMatcher();
 
     // Find a match in the input. Returning `null` means no match has been found.
-    MatchResult findMatch(int[] input, int startLocation);
+    MatchResult findMatch(int[] input, int startLocation, int maxLength);
 
     public static class MatchResult {
         private final int start;

diff --git a/src/org/rascalmpl/parser/uptr/recovery/LiteralMatcher.java b/src/org/rascalmpl/parser/uptr/recovery/LiteralMatcher.java
@@ -29,9 +29,10 @@ public LiteralMatcher(int[] literal) {
     }
 
     @Override
-    public MatchResult findMatch(int[] input, int startLocation) {
+    public MatchResult findMatch(int[] input, int startLocation, int maxLength) {
         int length = chars.length;
-        for (int start=startLocation; start<input.length - length + 1; start++) {
+        int limit = Math.min(startLocation + maxLength - length, input.length - length + 1);
+        for (int start=startLocation; start<limit; start++) {
             boolean matches = true;
             for (int i=0; i<length; i++) {
                 if (input[start + i] != chars[i]) {

diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
@@ -17,6 +17,7 @@
 import java.util.List;
 import java.util.Set;
 
+import org.apache.commons.lang3.tuple.Triple;
 import org.rascalmpl.parser.gtd.ExpectsProvider;
 import org.rascalmpl.parser.gtd.recovery.IRecoverer;
 import org.rascalmpl.parser.gtd.result.AbstractNode;
@@ -69,7 +70,7 @@ public DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveStac
 
         // For now we ignore unmatchable leaf nodes and filtered nodes. At some point we might use those to
         // improve error recovery.
-
+		
 		ArrayList<AbstractStackNode<IConstructor>> failedNodes = new ArrayList<>();
 		collectUnexpandableNodes(unexpandableNodes, failedNodes);
 		collectUnmatchableMidProductionNodes(location, unmatchableMidProductionNodes, failedNodes);
@@ -81,13 +82,15 @@ private DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveNod
         DoubleArrayList<AbstractStackNode<IConstructor>, ArrayList<IConstructor>> recoveryNodes) {
 		DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> recoveredNodes = new DoubleArrayList<>();
 
+		Set<Triple<Integer, IConstructor, Integer>> skippedIds = new HashSet<>();
+
 		// Sort nodes by start location
-        recoveryNodes
+		recoveryNodes
             .sort((e1, e2) -> Integer.compare(e2.getLeft().getStartLocation(), e1.getLeft().getStartLocation()));
 
         if (VISUALIZE_RECOVERY_NODES) {
-		ParseStateVisualizer visualizer = new ParseStateVisualizer("Recovery");
-		visualizer.visualizeRecoveryNodes(recoveryNodes);
+			ParseStateVisualizer visualizer = new ParseStateVisualizer("Recovery");
+			visualizer.visualizeRecoveryNodes(recoveryNodes);
         }
 
 		for (int i = 0; i<recoveryNodes.size(); i++) {
@@ -101,9 +104,18 @@ private DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveNod
 			for (int j = prods.size() - 1; j >= 0; --j) {
 				IConstructor prod = prods.get(j);
 
+				IConstructor type = ProductionAdapter.getType(prod);
+
                 List<SkippingStackNode<IConstructor>> skippingNodes =
                     findSkippingNodes(input, location, recoveryNode, prod, startLocation);
 				for (SkippingStackNode<IConstructor> skippingNode : skippingNodes) {
+					int skipLength = skippingNode.getLength();
+
+					if (!skippedIds.add(Triple.ofNonNull(startLocation, type, skipLength))) {
+						// Do not add this skipped node if a node with the same startLocation, type, and skipLength has been added already
+						continue;
+					}
+
                     AbstractStackNode<IConstructor> continuer =
                         new RecoveryPointStackNode<>(stackNodeIdDispenser.dispenseId(), prod, recoveryNode);
 
@@ -142,19 +154,11 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
 			return nodes;	// No other nodes would be useful
 		}
 
-		// Try to find whitespace to skip to
-		// This often creates hopeless recovery attempts, but it might help in some cases.
-		// Further experimentation should quantify this statement.
-		/*
-         * result = SkippingStackNode.createResultUntilCharClass(WHITESPACE, input, startLocation, prod,
-         * dot); if (result != null) { nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(),
-         * prod, result, startLocation)); }
-         */
-
 		// Find the last token of this production and skip until after that
 		List<InputMatcher> endMatchers = findEndMatchers(recoveryNode);
 		for (InputMatcher endMatcher : endMatchers) {
-			MatchResult endMatch = endMatcher.findMatch(input, startLocation);
+			// For now take a very large (basically unlimited) "max match length", experiment with smaller limit later
+			MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2);
 			if (endMatch != null) {
 				result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, endMatch.getEnd());
 				nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
@@ -164,7 +168,8 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
 		// Find the first token of the next production and skip until before that
 		List<InputMatcher> nextMatchers = findNextMatchers(recoveryNode);
 		for (InputMatcher nextMatcher : nextMatchers) {
-			MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1);
+			// For now take a very large (basically unlimited) "max match length", experiment with smaller limit later
+			MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1, Integer.MAX_VALUE/2);
 			if (nextMatch != null) {
 				result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, nextMatch.getStart());
 				nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
@@ -280,9 +285,7 @@ public Void visit(LiteralStackNode<IConstructor> literal) {
 				return null;
 		}
 
-
 			@Override
-
 			public Void visit(CaseInsensitiveLiteralStackNode<IConstructor> literal) {
 				matchers.add(new CaseInsensitiveLiteralMatcher(literal.getLiteral()));
 				return null;
@@ -396,7 +399,7 @@ private static void collectUnmatchableMidProductionNodes(int location,
             AbstractStackNode<IConstructor> failedNode =
                 unmatchableMidProductionNodes.getSecond(i).getCleanCopy(location); // Clone it to prevent by-reference
                                                                                    // updates of the static version
-
+			
 			// Merge the information on the predecessors into the failed node.
 			for(int j = failedNodePredecessors.size() - 1; j >= 0; --j) {
 				AbstractStackNode<IConstructor> predecessor = failedNodePredecessors.getFirst(j);
-Original file line number
+Diff line change
@@ Expand Up / @@ -32,3 +32,6 @@ dependency-reduced-pom.xml @@
     *.iml
     .idea/
+    # generated by error recovery benchmark
+    rascal-recovery-stats.csv