Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trigger recovery when only recovery stacks are left #2057

Merged
merged 13 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ dependency-reduced-pom.xml

*.iml
.idea/

# generated by error recovery benchmark
rascal-recovery-stats.csv
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ test bool rascalFunctionDeclarationMissingCloseParen() = checkRecovery(#Function

test bool rascalIfMissingExpr() = checkRecovery(#FunctionDeclaration, "void f(){if(){1;}}", [")"]);

test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["} void h(){1"]);
test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["{", "} "]);

// Not working yet:
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m

TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, loc statFile, TestStats cumulativeStats) {
println("Batch testing in directory <dir> (maxFiles=<maxFiles>, maxFileSize=<maxFileSize>, fromFile=<fromFile>)");
writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorSize\n");
writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorCount,errorSize\n");
for (entry <- listEntries(dir)) {
loc file = dir + entry;
if (isFile(file)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module lang::rascal::tests::concrete::recovery::bugs::InfiniteLoopMultiError


import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
import lang::rascal::\syntax::Rascal;
import ParseTree;
import IO;

void testInfiniteLoopMultiError() {
standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true);
recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true);
source = |std:///Exception.rsc|;
input = readFile(source);
testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 100, begin=662, end=664);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module lang::rascal::tests::concrete::recovery::bugs::MinimalMultiError

import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;

layout Layout = [\ ]* !>> [\ ];

syntax S = T;

syntax T = AB AB AB End;
syntax AB = 'a' 'b';
syntax End = "$";

test bool multiOk() = checkRecovery(#S, "ababab$", []);

test bool multiOneError() = checkRecovery(#S, "acabab$", ["c"]);

test bool multiTwoError() = checkRecovery(#S, "acacab$", ["c","c"]);
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
module lang::rascal::tests::concrete::recovery::bugs::MultiErrorBug

import ParseTree;
import IO;
import util::ErrorRecovery;
import List;
import vis::Text;

start syntax Module = SyntaxDefinition* ;

syntax SyntaxDefinition = Prod ";" ;

lexical Name = [A-Z];

syntax Sym
= Sym NonterminalLabel
| StringConstant
;

lexical StringConstant = "\"" StringCharacter* "\"" ;

lexical StringCharacter = ![\"] ;

lexical NonterminalLabel = [a-z] ;

syntax Prod = Sym* ;

// This is an open issue: instead of two small errors, one big error tree is returned.
bool multiErrorBug() {
str input = "#\"a\";#\"b\";";
Tree t = parser(#start[Module], allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=false|);

println(prettyTree(t));

list[Tree] errors = findAllErrors(t);
println("<size(errors)> Errors");
for (Tree error <- errors) {
Tree skipped = getSkipped(error);
println(" <skipped@\loc>: <getErrorText(error)>");
}
Tree disambiguated = disambiguateErrors(t);
list[Tree] disambiguatedErrors = findAllErrors(disambiguated);
println("After disambiguating:");
println("<size(disambiguatedErrors)> Errors");
for (Tree error <- disambiguatedErrors) {
Tree skipped = getSkipped(error);
println(" <skipped@\loc>: <getErrorText(error)>");
}
return true;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module lang::rascal::tests::concrete::recovery::bugs::MultiErrorPico

import lang::pico::\syntax::Main;
import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;

bool multiErrorPico() {
return checkRecovery(#start[Program], "begin
declare;
i := #1;
j := #2;
k := 3
end" , ["#1", "#2"]);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
begin
declare;
i := #1;
j := #2;
k := 3
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module lang::rascal::tests::concrete::recovery::bugs::MultiProdRecovery

import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;

layout Layout = [\ ]* !>> [\ ];

syntax Prog = Stat*;

syntax Stat
= Expr ";"
| "{" Stat* "}";

syntax Expr
= Expr "+" Expr
| Expr "-" Expr
| "e";

test bool multiProdOk() = checkRecovery(#Prog, "{ e + e; }", []);

test bool multiProdOperatorError() = checkRecovery(#Prog, "{ e * e; }", ["* "]);
34 changes: 30 additions & 4 deletions src/org/rascalmpl/parser/gtd/SGTDBF.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.rascalmpl.parser.gtd.stack.AbstractStackNode;
import org.rascalmpl.parser.gtd.stack.EpsilonStackNode;
import org.rascalmpl.parser.gtd.stack.NonTerminalStackNode;
import org.rascalmpl.parser.gtd.stack.SkippingStackNode;
import org.rascalmpl.parser.gtd.stack.edge.EdgesSet;
import org.rascalmpl.parser.gtd.stack.filter.ICompletionFilter;
import org.rascalmpl.parser.gtd.stack.filter.IEnterFilter;
Expand Down Expand Up @@ -1347,12 +1348,10 @@
initTime();

try {

if(invoked){
throw new RuntimeException("Can only invoke 'parse' once.");
}


invoked = true;

// Initialize.
Expand Down Expand Up @@ -1395,6 +1394,7 @@

debugListener.shifting(location, input, positionStore);
}

// Reduce-expand loop.
do {
debugListener.iterating();
Expand All @@ -1409,11 +1409,15 @@

shiftedLevel = true;

if (onlyRecoveredStacksLeft() && attemptRecovery()) {
continue;
}

if (!findStacksToReduce()) {
if(location == input.length){
if(location == input.length) {
EdgesSet<P> startNodeEdgesSet = startNode.getIncomingEdges();
int resultStoreId = getResultStoreId(startNode.getId());
if(startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length){
if(startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) {
result = startNodeEdgesSet.getLastResult(resultStoreId); // Success.
break;
}
Expand Down Expand Up @@ -1464,6 +1468,28 @@
timestamp = System.nanoTime();
}

private boolean onlyRecoveredStacksLeft() {
int recoveredStacksFound = 0;

for (int i=0; i<todoLists.length; i++) {
DoubleStack<AbstractStackNode<P>, AbstractNode> todoList = todoLists[i];
if (todoList != null) {
int size = todoList.getSize();
for (int j=0; j<size; j++) {
if (!(todoList.getFirst(j) instanceof SkippingStackNode)) {
return false;
}

if (recoveredStacksFound++ > 50) {
return false;

Check warning on line 1484 in src/org/rascalmpl/parser/gtd/SGTDBF.java

View check run for this annotation

Codecov / codecov/patch

src/org/rascalmpl/parser/gtd/SGTDBF.java#L1484

Added line #L1484 was not covered by tests
}
}
}
}

return recoveredStacksFound > 0;
}

private void checkTime(String msg) {
long newStamp = System.nanoTime();
long duration = newStamp - timestamp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@ public CaseInsensitiveLiteralMatcher(int[][] ciLiteral) {
}

@Override
public MatchResult findMatch(int[] input, int startLocation) {
public MatchResult findMatch(int[] input, int startLocation, int maxLength) {
int length = chars.length;

for (int start=startLocation; start < input.length - length; start++) {
int limit = Math.min(startLocation + maxLength - length, input.length - length + 1);
for (int start=startLocation; start < limit; start++) {
boolean matches = true;
for (int i=0; i<length; i++) {
int inputChar = input[start+i];
Expand Down
2 changes: 1 addition & 1 deletion src/org/rascalmpl/parser/uptr/recovery/FailingMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

public class FailingMatcher implements InputMatcher {
@Override
public MatchResult findMatch(int[] input, int startLocation) {
public MatchResult findMatch(int[] input, int startLocation, int maxLength) {
return null;
}
}
2 changes: 1 addition & 1 deletion src/org/rascalmpl/parser/uptr/recovery/InputMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public interface InputMatcher {
public static InputMatcher FAIL = new FailingMatcher();

// Find a match in the input. Returning `null` means no match has been found.
MatchResult findMatch(int[] input, int startLocation);
MatchResult findMatch(int[] input, int startLocation, int maxLength);

public static class MatchResult {
private final int start;
Expand Down
5 changes: 3 additions & 2 deletions src/org/rascalmpl/parser/uptr/recovery/LiteralMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ public LiteralMatcher(int[] literal) {
}

@Override
public MatchResult findMatch(int[] input, int startLocation) {
public MatchResult findMatch(int[] input, int startLocation, int maxLength) {
int length = chars.length;
for (int start=startLocation; start<input.length - length + 1; start++) {
int limit = Math.min(startLocation + maxLength - length, input.length - length + 1);
for (int start=startLocation; start<limit; start++) {
boolean matches = true;
for (int i=0; i<length; i++) {
if (input[start + i] != chars[i]) {
Expand Down
39 changes: 21 additions & 18 deletions src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.List;
import java.util.Set;

import org.apache.commons.lang3.tuple.Triple;
import org.rascalmpl.parser.gtd.ExpectsProvider;
import org.rascalmpl.parser.gtd.recovery.IRecoverer;
import org.rascalmpl.parser.gtd.result.AbstractNode;
Expand Down Expand Up @@ -69,7 +70,7 @@ public DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveStac

// For now we ignore unmatchable leaf nodes and filtered nodes. At some point we might use those to
// improve error recovery.

ArrayList<AbstractStackNode<IConstructor>> failedNodes = new ArrayList<>();
collectUnexpandableNodes(unexpandableNodes, failedNodes);
collectUnmatchableMidProductionNodes(location, unmatchableMidProductionNodes, failedNodes);
Expand All @@ -81,13 +82,15 @@ private DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveNod
DoubleArrayList<AbstractStackNode<IConstructor>, ArrayList<IConstructor>> recoveryNodes) {
DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> recoveredNodes = new DoubleArrayList<>();

Set<Triple<Integer, IConstructor, Integer>> skippedIds = new HashSet<>();

// Sort nodes by start location
recoveryNodes
recoveryNodes
.sort((e1, e2) -> Integer.compare(e2.getLeft().getStartLocation(), e1.getLeft().getStartLocation()));

if (VISUALIZE_RECOVERY_NODES) {
ParseStateVisualizer visualizer = new ParseStateVisualizer("Recovery");
visualizer.visualizeRecoveryNodes(recoveryNodes);
ParseStateVisualizer visualizer = new ParseStateVisualizer("Recovery");
visualizer.visualizeRecoveryNodes(recoveryNodes);
}

for (int i = 0; i<recoveryNodes.size(); i++) {
Expand All @@ -101,9 +104,18 @@ private DoubleArrayList<AbstractStackNode<IConstructor>, AbstractNode> reviveNod
for (int j = prods.size() - 1; j >= 0; --j) {
IConstructor prod = prods.get(j);

IConstructor type = ProductionAdapter.getType(prod);

List<SkippingStackNode<IConstructor>> skippingNodes =
findSkippingNodes(input, location, recoveryNode, prod, startLocation);
for (SkippingStackNode<IConstructor> skippingNode : skippingNodes) {
int skipLength = skippingNode.getLength();

if (!skippedIds.add(Triple.ofNonNull(startLocation, type, skipLength))) {
// Do not add this skipped node if a node with the same startLocation, type, and skipLength has been added already
continue;
}

AbstractStackNode<IConstructor> continuer =
new RecoveryPointStackNode<>(stackNodeIdDispenser.dispenseId(), prod, recoveryNode);

Expand Down Expand Up @@ -142,19 +154,11 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
return nodes; // No other nodes would be useful
}

// Try to find whitespace to skip to
// This often creates hopeless recovery attempts, but it might help in some cases.
// Further experimentation should quantify this statement.
/*
* result = SkippingStackNode.createResultUntilCharClass(WHITESPACE, input, startLocation, prod,
* dot); if (result != null) { nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(),
* prod, result, startLocation)); }
*/

// Find the last token of this production and skip until after that
List<InputMatcher> endMatchers = findEndMatchers(recoveryNode);
for (InputMatcher endMatcher : endMatchers) {
MatchResult endMatch = endMatcher.findMatch(input, startLocation);
// For now take a very large (basically unlimited) "max match length", experiment with smaller limit later
MatchResult endMatch = endMatcher.findMatch(input, startLocation, Integer.MAX_VALUE/2);
if (endMatch != null) {
result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, endMatch.getEnd());
nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
Expand All @@ -164,7 +168,8 @@ private List<SkippingStackNode<IConstructor>> findSkippingNodes(int[] input, int
// Find the first token of the next production and skip until before that
List<InputMatcher> nextMatchers = findNextMatchers(recoveryNode);
for (InputMatcher nextMatcher : nextMatchers) {
MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1);
// For now take a very large (basically unlimited) "max match length", experiment with smaller limit later
MatchResult nextMatch = nextMatcher.findMatch(input, startLocation+1, Integer.MAX_VALUE/2);
if (nextMatch != null) {
result = SkippingStackNode.createResultUntilChar(uri, input, startLocation, nextMatch.getStart());
nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation));
Expand Down Expand Up @@ -280,9 +285,7 @@ public Void visit(LiteralStackNode<IConstructor> literal) {
return null;
}


@Override

public Void visit(CaseInsensitiveLiteralStackNode<IConstructor> literal) {
matchers.add(new CaseInsensitiveLiteralMatcher(literal.getLiteral()));
return null;
Expand Down Expand Up @@ -396,7 +399,7 @@ private static void collectUnmatchableMidProductionNodes(int location,
AbstractStackNode<IConstructor> failedNode =
unmatchableMidProductionNodes.getSecond(i).getCleanCopy(location); // Clone it to prevent by-reference
// updates of the static version

// Merge the information on the predecessors into the failed node.
for(int j = failedNodePredecessors.size() - 1; j >= 0; --j) {
AbstractStackNode<IConstructor> predecessor = failedNodePredecessors.getFirst(j);
Expand Down
Loading