diff --git a/.gitignore b/.gitignore index b5d6e93bca0..e93bbee77b0 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,6 @@ dependency-reduced-pom.xml *.iml .idea/ + +# generated by error recovery benchmark +rascal-recovery-stats.csv diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc index da52a010466..1d54ed8ab92 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc @@ -54,7 +54,7 @@ test bool rascalFunctionDeclarationMissingCloseParen() = checkRecovery(#Function test bool rascalIfMissingExpr() = checkRecovery(#FunctionDeclaration, "void f(){if(){1;}}", [")"]); -test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["} void h(){1"]); +test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["{", "} "]); // Not working yet: /* diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 847345d35a1..49cbc602f5f 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -433,7 +433,7 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, loc statFile, TestStats cumulativeStats) { println("Batch testing in directory (maxFiles=, maxFileSize=, fromFile=)"); - writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorSize\n"); + writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorCount,errorSize\n"); for (entry <- listEntries(dir)) { loc file = dir + entry; if (isFile(file)) { diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoopMultiError.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoopMultiError.rsc new file mode 100644 index 00000000000..6fa4525a051 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoopMultiError.rsc @@ -0,0 +1,15 @@ +module lang::rascal::tests::concrete::recovery::bugs::InfiniteLoopMultiError + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testInfiniteLoopMultiError() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + source = |std:///Exception.rsc|; + input = readFile(source); + testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 100, begin=662, end=664); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MinimalMultiError.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MinimalMultiError.rsc new file mode 100644 index 00000000000..75797f226f2 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MinimalMultiError.rsc @@ -0,0 +1,17 @@ +module lang::rascal::tests::concrete::recovery::bugs::MinimalMultiError + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + +layout Layout = [\ ]* !>> [\ ]; + +syntax S = T; + +syntax T = AB AB AB End; +syntax AB = 'a' 'b'; +syntax End = "$"; + +test bool multiOk() = checkRecovery(#S, "ababab$", []); + +test bool multiOneError() = checkRecovery(#S, "acabab$", ["c"]); + +test bool multiTwoError() = checkRecovery(#S, "acacab$", ["c","c"]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorBug.rsc new file mode 100644 index 00000000000..0b199a923d7 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorBug.rsc @@ -0,0 +1,51 @@ +module lang::rascal::tests::concrete::recovery::bugs::MultiErrorBug + +import ParseTree; +import IO; +import util::ErrorRecovery; +import List; +import vis::Text; + +start syntax Module = SyntaxDefinition* ; + +syntax SyntaxDefinition = Prod ";" ; + +lexical Name = [A-Z]; + +syntax Sym + = Sym NonterminalLabel + | StringConstant + ; + +lexical StringConstant = "\"" StringCharacter* "\"" ; + +lexical StringCharacter = ![\"] ; + +lexical NonterminalLabel = [a-z] ; + +syntax Prod = Sym* ; + +// This is an open issue: instead of two small errors, one big error tree is returned. +bool multiErrorBug() { + str input = "#\"a\";#\"b\";"; + Tree t = parser(#start[Module], allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=false|); + + println(prettyTree(t)); + + list[Tree] errors = findAllErrors(t); + println(" Errors"); + for (Tree error <- errors) { + Tree skipped = getSkipped(error); + println(" : "); + } + Tree disambiguated = disambiguateErrors(t); + list[Tree] disambiguatedErrors = findAllErrors(disambiguated); + println("After disambiguating:"); + println(" Errors"); + for (Tree error <- disambiguatedErrors) { + Tree skipped = getSkipped(error); + println(" : "); + } + return true; +} + diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPico.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPico.rsc new file mode 100644 index 00000000000..2a81b5f6af3 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPico.rsc @@ -0,0 +1,13 @@ +module lang::rascal::tests::concrete::recovery::bugs::MultiErrorPico + +import lang::pico::\syntax::Main; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + +bool multiErrorPico() { + return checkRecovery(#start[Program], "begin + declare; + i := #1; + j := #2; + k := 3 +end" , ["#1", "#2"]); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPicoInput.pico b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPicoInput.pico new file mode 100644 index 00000000000..a7393e9f705 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiErrorPicoInput.pico @@ -0,0 +1,6 @@ +begin + declare; + i := #1; + j := #2; + k := 3 +end \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiProdRecovery.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiProdRecovery.rsc new file mode 100644 index 00000000000..ed6d77fe645 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/MultiProdRecovery.rsc @@ -0,0 +1,20 @@ +module lang::rascal::tests::concrete::recovery::bugs::MultiProdRecovery + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + +layout Layout = [\ ]* !>> [\ ]; + +syntax Prog = Stat*; + +syntax Stat + = Expr ";" + | "{" Stat* "}"; + +syntax Expr + = Expr "+" Expr + | Expr "-" Expr + | "e"; + +test bool multiProdOk() = checkRecovery(#Prog, "{ e + e; }", []); + +test bool multiProdOperatorError() = checkRecovery(#Prog, "{ e * e; }", ["* "]); diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index fea89a68099..2c249b0cc3f 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -32,6 +32,7 @@ import org.rascalmpl.parser.gtd.stack.AbstractStackNode; import org.rascalmpl.parser.gtd.stack.EpsilonStackNode; import org.rascalmpl.parser.gtd.stack.NonTerminalStackNode; +import org.rascalmpl.parser.gtd.stack.SkippingStackNode; import org.rascalmpl.parser.gtd.stack.edge.EdgesSet; import org.rascalmpl.parser.gtd.stack.filter.ICompletionFilter; import org.rascalmpl.parser.gtd.stack.filter.IEnterFilter; @@ -1347,12 +1348,10 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] initTime(); try { - if(invoked){ throw new RuntimeException("Can only invoke 'parse' once."); } - invoked = true; // Initialize. @@ -1395,6 +1394,7 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] debugListener.shifting(location, input, positionStore); } + // Reduce-expand loop. do { debugListener.iterating(); @@ -1409,11 +1409,15 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] shiftedLevel = true; + if (onlyRecoveredStacksLeft() && attemptRecovery()) { + continue; + } + if (!findStacksToReduce()) { - if(location == input.length){ + if(location == input.length) { EdgesSet

startNodeEdgesSet = startNode.getIncomingEdges(); int resultStoreId = getResultStoreId(startNode.getId()); - if(startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length){ + if(startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) { result = startNodeEdgesSet.getLastResult(resultStoreId); // Success. break; } @@ -1464,6 +1468,28 @@ private void initTime() { timestamp = System.nanoTime(); } + private boolean onlyRecoveredStacksLeft() { + int recoveredStacksFound = 0; + + for (int i=0; i, AbstractNode> todoList = todoLists[i]; + if (todoList != null) { + int size = todoList.getSize(); + for (int j=0; j 50) { + return false; + } + } + } + } + + return recoveredStacksFound > 0; + } + private void checkTime(String msg) { long newStamp = System.nanoTime(); long duration = newStamp - timestamp; diff --git a/src/org/rascalmpl/parser/uptr/recovery/CaseInsensitiveLiteralMatcher.java b/src/org/rascalmpl/parser/uptr/recovery/CaseInsensitiveLiteralMatcher.java index f1dda955e94..c72e1fa1387 100644 --- a/src/org/rascalmpl/parser/uptr/recovery/CaseInsensitiveLiteralMatcher.java +++ b/src/org/rascalmpl/parser/uptr/recovery/CaseInsensitiveLiteralMatcher.java @@ -46,10 +46,11 @@ public CaseInsensitiveLiteralMatcher(int[][] ciLiteral) { } @Override - public MatchResult findMatch(int[] input, int startLocation) { + public MatchResult findMatch(int[] input, int startLocation, int maxLength) { int length = chars.length; - for (int start=startLocation; start < input.length - length; start++) { + int limit = Math.min(startLocation + maxLength - length, input.length - length + 1); + for (int start=startLocation; start < limit; start++) { boolean matches = true; for (int i=0; i, AbstractNode> reviveStac // For now we ignore unmatchable leaf nodes and filtered nodes. At some point we might use those to // improve error recovery. - + ArrayList> failedNodes = new ArrayList<>(); collectUnexpandableNodes(unexpandableNodes, failedNodes); collectUnmatchableMidProductionNodes(location, unmatchableMidProductionNodes, failedNodes); @@ -81,13 +82,15 @@ private DoubleArrayList, AbstractNode> reviveNod DoubleArrayList, ArrayList> recoveryNodes) { DoubleArrayList, AbstractNode> recoveredNodes = new DoubleArrayList<>(); + Set> skippedIds = new HashSet<>(); + // Sort nodes by start location - recoveryNodes + recoveryNodes .sort((e1, e2) -> Integer.compare(e2.getLeft().getStartLocation(), e1.getLeft().getStartLocation())); if (VISUALIZE_RECOVERY_NODES) { - ParseStateVisualizer visualizer = new ParseStateVisualizer("Recovery"); - visualizer.visualizeRecoveryNodes(recoveryNodes); + ParseStateVisualizer visualizer = new ParseStateVisualizer("Recovery"); + visualizer.visualizeRecoveryNodes(recoveryNodes); } for (int i = 0; i, AbstractNode> reviveNod for (int j = prods.size() - 1; j >= 0; --j) { IConstructor prod = prods.get(j); + IConstructor type = ProductionAdapter.getType(prod); + List> skippingNodes = findSkippingNodes(input, location, recoveryNode, prod, startLocation); for (SkippingStackNode skippingNode : skippingNodes) { + int skipLength = skippingNode.getLength(); + + if (!skippedIds.add(Triple.ofNonNull(startLocation, type, skipLength))) { + // Do not add this skipped node if a node with the same startLocation, type, and skipLength has been added already + continue; + } + AbstractStackNode continuer = new RecoveryPointStackNode<>(stackNodeIdDispenser.dispenseId(), prod, recoveryNode); @@ -142,19 +154,11 @@ private List> findSkippingNodes(int[] input, int return nodes; // No other nodes would be useful } - // Try to find whitespace to skip to - // This often creates hopeless recovery attempts, but it might help in some cases. - // Further experimentation should quantify this statement. - /* - * result = SkippingStackNode.createResultUntilCharClass(WHITESPACE, input, startLocation, prod, - * dot); if (result != null) { nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), - * prod, result, startLocation)); } - */ - // Find the last token of this production and skip until after that List endMatchers = findEndMatchers(recoveryNode); for (InputMatcher endMatcher : endMatchers) { - MatchResult endMatch = endMatcher.findMatch(input, startLocation); + // For now take a very large (basically unlimited) "max match length", experiment with smaller limit later + MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2); if (endMatch != null) { result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, endMatch.getEnd()); nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); @@ -164,7 +168,8 @@ private List> findSkippingNodes(int[] input, int // Find the first token of the next production and skip until before that List nextMatchers = findNextMatchers(recoveryNode); for (InputMatcher nextMatcher : nextMatchers) { - MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1); + // For now take a very large (basically unlimited) "max match length", experiment with smaller limit later + MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1, Integer.MAX_VALUE/2); if (nextMatch != null) { result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, nextMatch.getStart()); nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); @@ -280,9 +285,7 @@ public Void visit(LiteralStackNode literal) { return null; } - @Override - public Void visit(CaseInsensitiveLiteralStackNode literal) { matchers.add(new CaseInsensitiveLiteralMatcher(literal.getLiteral())); return null; @@ -396,7 +399,7 @@ private static void collectUnmatchableMidProductionNodes(int location, AbstractStackNode failedNode = unmatchableMidProductionNodes.getSecond(i).getCleanCopy(location); // Clone it to prevent by-reference // updates of the static version - + // Merge the information on the predecessors into the failed node. for(int j = failedNodePredecessors.size() - 1; j >= 0; --j) { AbstractStackNode predecessor = failedNodePredecessors.getFirst(j);