diff --git a/src/org/rascalmpl/library/ParseTree.rsc b/src/org/rascalmpl/library/ParseTree.rsc index cbacbb70c4e..3fff07aa9d1 100644 --- a/src/org/rascalmpl/library/ParseTree.rsc +++ b/src/org/rascalmpl/library/ParseTree.rsc @@ -144,7 +144,7 @@ extend Message; extend List; import String; -import Set; +import util::Maybe; @synopsis{The Tree data type as produced by the parser.} @description{ @@ -355,6 +355,16 @@ The latter option terminates much faster, i.e. always in cubic time, and always while constructing ambiguous parse forests may grow to O(n^p+1), where p is the length of the longest production rule and n is the length of the input. +The `allowRecovery` can be set to `true` to enable error recovery. This is an experimental feature. +When error recovery is enabled, the parser will attempt to recover from parse errors and continue parsing. +If successful, a parse tree with error and skipped productions is returned (see the definition of `Production` above). +A number of functions is provided to analyze trees with errors, for example `hasErrors`, `getSkipped`, and `getErrorText`. +Note that the resulting parse forest can contain a lot of error nodes. `disambiguateErrors` can be used to prune the forest +and leave a tree with a single (or even zero) errors based on simple heuristics. +When `allowAmbiguity` is set to false, `allowRecovery` is set to true, and `filters` is empty, this disambiguation is done +automatically so you should end up with a tree with no error ambiguities. Regular ambiguities can still occur +and will result in an error. + The `filters` set contains functions which may be called optionally after the parse algorithm has finished and just before the Tree representation is built. The set of functions contain alternative functions, only on of them is successfully applied to each node in a tree. If such a function fails to apply, the other ones are tried. There is no fixed-point computation, so @@ -784,7 +794,15 @@ list[Tree] findAllErrors(Tree tree) = [err | /err:appl(error(_, _, _), _) := tr Tree findFirstError(/err:appl(error(_, _, _), _)) = err; @synopsis{Find the best error from a tree containing errors. This function will fail if `tree` does not contain an error.} -Tree findBestError(Tree tree) = findFirstError(defaultErrorDisambiguationFilter(tree)); +Maybe[Tree] findBestError(Tree tree) { + Tree disambiguated = disambiguateErrors(tree); + if (/err:appl(error(_, _, _), _) := disambiguated) { + return just(err); + } + + // All errors have disappeared + return nothing(); +} @synopsis{Get the symbol (sort) of the failing production} Symbol getErrorSymbol(appl(error(Symbol sym, _, _), _)) = sym; @@ -803,35 +821,9 @@ If you want the text of the whole error tree, you can just use string interpolat } str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]); +@javaClass{org.rascalmpl.parser.gtd.recovery.ParseErrorDisambiguator} @synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways. This filter removes error trees until no ambiguities caused by error recovery are left. -Note that regular ambiguous trees remain in the parse forest. +Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown. } -Tree defaultErrorDisambiguationFilter(Tree t) { - return visit(t) { - case a:amb(_) => ambDisambiguation(a) - }; -} - -private Tree ambDisambiguation(amb(set[Tree] alternatives)) { - // Go depth-first - rel[int score, Tree alt] scoredErrorTrees = { | Tree alt <- alternatives }; - set[Tree] nonErrorTrees = scoredErrorTrees[0]; - - if (nonErrorTrees == {}) { - return (getFirstFrom(scoredErrorTrees) | it.score > c.score ? c : it | c <- scoredErrorTrees).alt; - } - - if ({Tree single} := nonErrorTrees) { - // One ambiguity left, no ambiguity concerns here - return single; - } - - // Multiple non-error trees left, return an ambiguity node with just the non-error trees - return amb(nonErrorTrees); -} - -private int scoreErrors(Tree t) = (0 | it + getSkipped(e).src.length | /e:appl(error(_,_,_),_) := t); - -// Handle char and cycle nodes -default Tree defaultErrorDisambiguationFilter(Tree t) = t; +java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc index 23b3af5d2f5..5464f3e9777 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc @@ -15,6 +15,7 @@ module lang::rascal::tests::concrete::recovery::BasicRecoveryTests import ParseTree; +import util::Maybe; layout Layout = [\ ]* !>> [\ ]; @@ -33,15 +34,23 @@ test bool basicOk() { test bool abx() { Tree t = parseS("a b x $"); - return getErrorText(findBestError(t)) == "x "; + return getErrorText(findBestError(t).val) == "x "; } test bool axc() { Tree t = parseS("a x c $"); - return getErrorText(findBestError(t)) == "x c"; + return getErrorText(findBestError(t).val) == "x c"; } test bool ax() { - Tree t = parseS("a x $"); - return getErrorText(findBestError(t)) == "x "; + str input = "a x $"; + + Tree t = parseS(input); + assert size(findAllErrors(t)) == 3; + assert getErrorText(findBestError(t).val) == "x "; + + Tree autoDisambiguated = parser(#S, allowRecovery=true, allowAmbiguity=false)(input, |unknown:///|); + assert size(findAllErrors(autoDisambiguated)) == 1; + + return getErrorText(findFirstError(autoDisambiguated)) == getErrorText(findBestError(t).val); } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ErrorRecoveryBenchmark.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ErrorRecoveryBenchmark.rsc index 6642b08d605..777afb171ec 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ErrorRecoveryBenchmark.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ErrorRecoveryBenchmark.rsc @@ -33,26 +33,37 @@ void runLanguageTests() { testRecoveryRascal(); } -void runRascalBatchTest(int maxFiles=1000, int maxFileSize=4000) { +void runRascalBatchTest(int maxFiles=1000, int minFileSize=0, int maxFileSize=4000, int fromFile=0) { int startTime = realTime(); - TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, maxFileSize); + TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, minFileSize, maxFileSize, fromFile, |cwd:///rascal-recovery-stats.csv|); int duration = realTime() - startTime; println(); - println("========================im========================================"); + println("================================================================"); println("Rascal batch test done in seconds, total result:"); printStats(stats); } int main(list[str] args) { int maxFiles = 1000; - int maxFileSize = 4000; - if (size(args) == 2) { + int maxFileSize = 1000000; + int minFileSize = 0; + int fromFile = 0; + if (size(args) > 0) { maxFiles = toInt(args[0]); - maxFileSize = toInt(args[1]); - } else if (size(args) != 0) { - println("Usage: ErrorRecoveryBenchmark "); } + if (size(args) > 1) { + minFileSize = toInt(args[1]); + } + if (size(args) > 2) { + maxFileSize = toInt(args[2]); + } + if (size(args) > 3) { + fromFile = toInt(args[3]); + } else { + println("Usage: ErrorRecoveryBenchmark [\ [\ [\ [\]]]]"); + } + + runRascalBatchTest(maxFiles=maxFiles, minFileSize=minFileSize, maxFileSize=maxFileSize, fromFile=fromFile); - runRascalBatchTest(maxFiles=maxFiles, maxFileSize=maxFileSize); return 0; } \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc index 528c20d9eb3..4d4804711b2 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc @@ -15,6 +15,7 @@ module lang::rascal::tests::concrete::recovery::NestedRecoveryTests import ParseTree; +import util::Maybe; layout Layout = [\ ]* !>> [\ ]; @@ -35,5 +36,5 @@ test bool nestedOk() { test bool nestedTypo() { Tree t = parseS("a b x c"); - return getErrorText(findFirstError(defaultErrorDisambiguationFilter(t))) == "x "; + return getErrorText(findBestError(t).val) == "x "; } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc index 668315da4c9..a596eab9c3f 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc @@ -20,12 +20,13 @@ import ParseTree; import IO; import String; +import util::Maybe; Tree parsePico(str input, bool visualize=false) = parser(#Program, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); bool checkError(Tree t, str expectedError) { - str bestError = getErrorText(findBestError(t)); + str bestError = getErrorText(findBestError(t).val); //println("best error: , expected: "); return size(bestError) == size(expectedError); } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc index 23e8e175533..6a936f98dcf 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc @@ -18,6 +18,7 @@ import lang::rascal::\syntax::Rascal; import ParseTree; import IO; +import util::Maybe; bool debugging = false; @@ -31,8 +32,7 @@ Tree parseRascal(type[&T] t, str input, bool visualize=false) { println("- "); } - Tree disambiguated = defaultErrorDisambiguationFilter(result); - println("Best error: "); + println("Best error: "); } } @@ -96,7 +96,7 @@ test bool rascalMissingCloseParen() { Tree t = parseRascal("module A void f({} void g(){}"); assert getErrorText(findFirstError(t)) == "void g("; - assert getErrorText(findFirstError(defaultErrorDisambiguationFilter(t))) == "("; + assert getErrorText(findBestError(t).val) == "("; return true; } @@ -106,7 +106,7 @@ test bool rascalFunctionDeclarationMissingCloseParen() { assert getErrorText(findFirstError(t)) == "void g("; - Tree error = findFirstError(defaultErrorDisambiguationFilter(t)); + Tree error = findBestError(t).val; assert getErrorText(error) == "("; loc location = getSkipped(error).src; assert location.begin.column == 16 && location.length == 1; @@ -116,14 +116,14 @@ test bool rascalFunctionDeclarationMissingCloseParen() { test bool rascalIfMissingExpr() { Tree t = parseFunctionDeclaration("void f(){if(){1;}}", visualize=false); - return getErrorText(findFirstError(t)) == ")"; + return getErrorText(findBestError(t).val) == ")"; } test bool rascalIfBodyEmpty() { Tree t = parseRascal("module A void f(){1;} void g(){if(1){}} void h(){1;}"); println("error: "); - assert getErrorText(findBestError(t)) == "} void h(){1"; + assert getErrorText(findBestError(t).val) == "} void h(){1"; return true; } @@ -164,4 +164,4 @@ test bool rascalIfMissingSemi() { println("error text: "); return getErrorText(findFirstError(t)) == ";"; } -*/ \ No newline at end of file +*/ diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index ac73a8f53f9..2bf0689fdb3 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -8,6 +8,7 @@ import util::Benchmark; import Grammar; import analysis::statistics::Descriptive; import util::Math; +import util::Maybe; import Set; import List; @@ -15,34 +16,50 @@ import lang::rascal::grammar::definition::Modules; alias FrequencyTable = map[int val, int count]; -public data TestMeasurement(loc source=|unknown:///|, int duration=0) = successfulParse() | recovered(int errorSize=0) | parseError(); -public data FileStats = fileStats(int totalParses = 0, int successfulParses=0, int successfulRecoveries=0, int failedRecoveries=0, int parseErrors=0, int slowParses=0, FrequencyTable parseTimeRatios=()); +public data TestMeasurement(loc source=|unknown:///|, int duration=0) = successfulParse() | recovered(int errorSize=0) | parseError() | successfulDisambiguation(); +public data FileStats = fileStats(int totalParses = 0, int successfulParses=0, int successfulRecoveries=0, int successfulDisambiguations=0, int failedRecoveries=0, int parseErrors=0, int slowParses=0, FrequencyTable parseTimeRatios=()); -public data TestStats = testStats(int filesTested=0, int testCount=0, FrequencyTable successfulParsePct=(), FrequencyTable successfulRecoveryPct=(), FrequencyTable failedRecoveryPct=(), FrequencyTable parseErrorPct=(), FrequencyTable slowParsePct=(), FrequencyTable parseTimeRatios=()); +public data TestStats = testStats(int filesTested=0, int testCount=0, FrequencyTable successfulParses=(), FrequencyTable successfulRecoveries=(), FrequencyTable successfulDisambiguations=(), FrequencyTable failedRecoveries=(), FrequencyTable parseErrors=(), FrequencyTable slowParses=(), FrequencyTable parseTimeRatios=()); -private TestMeasurement testRecovery(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, loc source) { +private TestMeasurement testRecovery(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, loc source, loc statFile) { int startTime = 0; int duration = 0; + int disambDuration = -1; + int errorSize=0; + str result = "?"; TestMeasurement measurement = successfulParse(); try { startTime = realTime(); Tree t = standardParser(input, source); duration = realTime() - startTime; measurement = successfulParse(source=source, duration=duration); + result = "success"; } catch ParseError(_): { startTime = realTime(); try { Tree t = recoveryParser(input, source); - duration = realTime() - startTime; - Tree best = findBestError(t); - errorSize = size(getErrorText(best)); - measurement = recovered(source=source, duration=duration, errorSize=errorSize); + int parseEndTime = realTime(); + duration = realTime() - parseEndTime; + Maybe[Tree] best = findBestError(t); + disambDuration = realTime() - parseEndTime; + result = "recovery"; + if (best == nothing()) { + measurement = successfulDisambiguation(source=source, duration=duration); + } else { + errorSize = size(getErrorText(best.val)); + measurement = recovered(source=source, duration=duration, errorSize=errorSize); + } } catch ParseError(_): { + result = "error"; duration = realTime() - startTime; measurement = parseError(source=source, duration=duration); } } + if (statFile != |unknown:///|) { + appendToFile(statFile, ",,,,,\n"); + } + return measurement; } @@ -67,6 +84,11 @@ FileStats updateStats(FileStats stats, TestMeasurement measurement, int referenc stats.failedRecoveries += 1; } } + case successfulDisambiguation(): { + stats.parseTimeRatios = increment(stats.parseTimeRatios, parseTimeRatio); + print("&"); + stats.successfulDisambiguations += 1; + } case parseError(): { stats.parseTimeRatios = increment(stats.parseTimeRatios, parseTimeRatio); print("?"); @@ -87,6 +109,7 @@ FileStats mergeFileStats(FileStats stats1, FileStats stats2) { totalParses = stats1.totalParses + stats2.totalParses, successfulParses = stats1.successfulParses + stats2.successfulParses, successfulRecoveries = stats1.successfulRecoveries + stats2.successfulRecoveries, + successfulDisambiguations = stats1.successfulDisambiguations + stats2.successfulDisambiguations, failedRecoveries = stats1.failedRecoveries + stats2.failedRecoveries, parseErrors = stats1.parseErrors + stats2.parseErrors, slowParses = stats1.slowParses + stats2.slowParses, @@ -107,11 +130,12 @@ FrequencyTable increment(FrequencyTable frequencyTable, int val) { TestStats consolidateStats(TestStats cumulativeStats, FileStats fileStats) { int totalFailed = fileStats.totalParses - fileStats.successfulParses; - cumulativeStats.successfulParsePct = increment(cumulativeStats.successfulParsePct, percentage(fileStats.successfulParses, fileStats.totalParses)); - cumulativeStats.successfulRecoveryPct = increment(cumulativeStats.successfulRecoveryPct, percentage(fileStats.successfulRecoveries, totalFailed)); - cumulativeStats.failedRecoveryPct = increment(cumulativeStats.failedRecoveryPct, percentage(fileStats.failedRecoveries, totalFailed)); - cumulativeStats.parseErrorPct = increment(cumulativeStats.parseErrorPct, percentage(fileStats.parseErrors, totalFailed)); - cumulativeStats.slowParsePct = increment(cumulativeStats.slowParsePct, percentage(fileStats.slowParses, totalFailed)); + cumulativeStats.successfulParses = increment(cumulativeStats.successfulParses, percentage(fileStats.successfulParses, fileStats.totalParses)); + cumulativeStats.successfulRecoveries = increment(cumulativeStats.successfulRecoveries, percentage(fileStats.successfulRecoveries, totalFailed)); + cumulativeStats.successfulDisambiguations = increment(cumulativeStats.successfulDisambiguations, percentage(fileStats.successfulDisambiguations, totalFailed)); + cumulativeStats.failedRecoveries = increment(cumulativeStats.failedRecoveries, percentage(fileStats.failedRecoveries, totalFailed)); + cumulativeStats.parseErrors = increment(cumulativeStats.parseErrors, percentage(fileStats.parseErrors, totalFailed)); + cumulativeStats.slowParses = increment(cumulativeStats.slowParses, percentage(fileStats.slowParses, totalFailed)); cumulativeStats.parseTimeRatios = mergeFrequencyTables(cumulativeStats.parseTimeRatios, fileStats.parseTimeRatios); cumulativeStats.filesTested += 1; @@ -135,24 +159,26 @@ map[int,int] mergeFrequencyTables(map[int,int] hist1, map[int,int] hist2) { TestStats mergeStats(TestStats stats, TestStats stats2) { stats.filesTested += stats2.filesTested; stats.testCount += stats2.testCount; - stats.successfulParsePct = mergeFrequencyTables(stats.successfulParsePct, stats2.successfulParsePct); - stats.successfulRecoveryPct = mergeFrequencyTables(stats.successfulRecoveryPct, stats2.successfulRecoveryPct); - stats.failedRecoveryPct = mergeFrequencyTables(stats.failedRecoveryPct, stats2.failedRecoveryPct); - stats.parseErrorPct = mergeFrequencyTables(stats.parseErrorPct, stats2.parseErrorPct); - stats.slowParsePct = mergeFrequencyTables(stats.slowParsePct, stats2.slowParsePct); + stats.successfulParses = mergeFrequencyTables(stats.successfulParses, stats2.successfulParses); + stats.successfulRecoveries = mergeFrequencyTables(stats.successfulRecoveries, stats2.successfulRecoveries); + stats.successfulDisambiguations = mergeFrequencyTables(stats.successfulDisambiguations, stats2.successfulDisambiguations); + stats.failedRecoveries = mergeFrequencyTables(stats.failedRecoveries, stats2.failedRecoveries); + stats.parseErrors = mergeFrequencyTables(stats.parseErrors, stats2.parseErrors); + stats.slowParses = mergeFrequencyTables(stats.slowParses, stats2.slowParses); stats.parseTimeRatios = mergeFrequencyTables(stats.parseTimeRatios, stats2.parseTimeRatios); return stats; } -FileStats testSingleCharDeletions(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, int referenceParseTime, int recoverySuccessLimit) { +FileStats testSingleCharDeletions(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, loc source, str input, int referenceParseTime, int recoverySuccessLimit, int begin=0, int end=-1, loc statFile=|unknown:///|) { FileStats stats = fileStats(); int len = size(input); - int i = 0; + int i = begin; - while (i < len) { + while (i < len && (end == -1 || i<=end)) { str modifiedInput = substring(input, 0, i) + substring(input, i+1); - TestMeasurement measurement = testRecovery(standardParser, recoveryParser, modifiedInput, |unknown:///?deleted=<"">|); + source.query = "deletedChar="; + TestMeasurement measurement = testRecovery(standardParser, recoveryParser, modifiedInput, source, statFile); stats = updateStats(stats, measurement, referenceParseTime, recoverySuccessLimit); if (i < len && substring(input, i, i+1) == "\n") { println(); @@ -163,15 +189,15 @@ FileStats testSingleCharDeletions(&T (value input, loc origin) standardParser, & return stats; } -FileStats testDeleteUntilEol(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, int referenceParseTime, int recoverySuccessLimit, int begin=0, int end=-1) { +FileStats testDeleteUntilEol(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, loc source, str input, int referenceParseTime, int recoverySuccessLimit, int begin=0, int end=-1, loc statFile=|unknown:///|) { FileStats stats = fileStats(); int lineStart = begin; list[int] lineEndings = findAll(input, "\n"); + int line = 1; for (int lineEnd <- lineEndings) { lineLength = lineEnd - lineStart; for (int pos <- [lineStart..lineEnd]) { - // Check boundaries (only used for quick bug testing) if (end != -1 && end < pos) { return stats; @@ -179,13 +205,14 @@ FileStats testDeleteUntilEol(&T (value input, loc origin) standardParser, &T (va if (pos < begin) { continue; } - modifiedInput = substring(input, 0, pos) + substring(input, lineEnd); - TestMeasurement measurement = testRecovery(standardParser, recoveryParser, modifiedInput, |unknown:///?deletedUntilEol=<",">|); + source.query = "deletedUntilEol=,,"; + TestMeasurement measurement = testRecovery(standardParser, recoveryParser, modifiedInput, source, statFile); stats = updateStats(stats, measurement, referenceParseTime, recoverySuccessLimit); } lineStart = lineEnd+1; println(); + line = line+1; } return stats; @@ -196,14 +223,14 @@ private int percentage(int number, int total) { } int statLabelWidth = 40; -int statFieldWidth = 7; +int statFieldWidth = 10; void printFileStats(FileStats fileStats) { - void printStat(str label, int stat, int total, bool printPct=true) { + void printStat(str label, int stat, int total, bool prints=true) { int pct = total == 0 ? 0 : stat*100/total; print(left(label + ":", statLabelWidth)); - str pctStr = printPct ? " (%)" : ""; + str pctStr = prints ? " (%)" : ""; println(left("", statFieldWidth)); } @@ -212,11 +239,12 @@ void printFileStats(FileStats fileStats) { printStat("Successful parses", fileStats.successfulParses, fileStats.totalParses); int failedParses = fileStats.totalParses - fileStats.successfulParses; printStat("Successful recoveries", fileStats.successfulRecoveries, failedParses); + printStat("Successful disambiguations", fileStats.successfulDisambiguations, failedParses); printStat("Failed recoveries", fileStats.failedRecoveries, failedParses); printStat("Parse errors", fileStats.parseErrors, failedParses); printStat("Slow parses", fileStats.slowParses, failedParses); printFrequencyTableHeader(); - printFrequencyTableStats("Parse time ratios", fileStats.parseTimeRatios, unit = "log2(ratio)"); + printFrequencyTableStats("Parse time ratios", fileStats.parseTimeRatios, unit = "log2(ratio)", printTotal=false); } void printFrequencyTableHeader() { @@ -225,10 +253,11 @@ void printFrequencyTableHeader() { print(right("median", statFieldWidth)); print(right("95%", statFieldWidth)); print(right("min", statFieldWidth)); - println(right("max", statFieldWidth)); + print(right("max", statFieldWidth)); + println(right("total", statFieldWidth)); } -void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit = "%") { +void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit = "%", bool printTotal=true) { print(left(label + " ():", statLabelWidth)); int totalCount = (0 | it+frequencyTable[val] | val <- frequencyTable); @@ -269,7 +298,8 @@ void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit print(right("", statFieldWidth)); print(right("", statFieldWidth)); print(right("", statFieldWidth)); - println(right("", statFieldWidth)); + print(right("", statFieldWidth)); + println(right("", statFieldWidth)); } } @@ -279,12 +309,13 @@ void printStats(TestStats stats) { } println("Total parses: "); printFrequencyTableHeader(); - printFrequencyTableStats("Succesful parses", stats.successfulParsePct); - printFrequencyTableStats("Succesful recoveries", stats.successfulRecoveryPct); - printFrequencyTableStats("Failed recoveries", stats.failedRecoveryPct); - printFrequencyTableStats("Parse errors", stats.parseErrorPct); - printFrequencyTableStats("Slow parses", stats.slowParsePct); - printFrequencyTableStats("Parse time ratios", stats.parseTimeRatios, unit = "log2/%"); + printFrequencyTableStats("Succesful parses", stats.successfulParses); + printFrequencyTableStats("Succesful recoveries", stats.successfulRecoveries); + printFrequencyTableStats("Succesful disambiguations", stats.successfulDisambiguations); + printFrequencyTableStats("Failed recoveries", stats.failedRecoveries); + printFrequencyTableStats("Parse errors", stats.parseErrors); + printFrequencyTableStats("Slow parses", stats.slowParses); + printFrequencyTableStats("Parse time ratios", stats.parseTimeRatios, unit = "log2/%", printTotal=false); println(); } @@ -302,7 +333,7 @@ loc zippedFile(str zip, str path) { FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput) = testErrorRecovery(syntaxFile, topSort, testInput, readFile(testInput)); -FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str input) { +FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str input, loc statFile=|unknown:///|) { Module \module = parse(#start[Module], syntaxFile).top; str modName = syntaxLocToModuleName(syntaxFile); Grammar gram = modules2grammar(modName, {\module}); @@ -322,12 +353,12 @@ FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str inpu println(); println("Single char deletions:"); - FileStats singleCharDeletionStats = testSingleCharDeletions(standardParser, recoveryParser, input, referenceParseTime, recoverySuccessLimit); + FileStats singleCharDeletionStats = testSingleCharDeletions(standardParser, recoveryParser, testInput, input, referenceParseTime, recoverySuccessLimit, statFile=statFile); printFileStats(singleCharDeletionStats); println(); println("Deletes until end-of-line:"); - FileStats deleteUntilEolStats = testDeleteUntilEol(standardParser, recoveryParser, input, referenceParseTime, recoverySuccessLimit); + FileStats deleteUntilEolStats = testDeleteUntilEol(standardParser, recoveryParser, testInput, input, referenceParseTime, recoverySuccessLimit, statFile=statFile); printFileStats(deleteUntilEolStats); FileStats stats = mergeFileStats(singleCharDeletionStats, deleteUntilEolStats); @@ -341,17 +372,34 @@ FileStats testErrorRecovery(loc syntaxFile, str topSort, loc testInput, str inpu } } -TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int maxFileSize, TestStats cumulativeStats=testStats()) { - println("Batch testing in directory (maxFiles=, maxFileSize=)"); +private int fileNr = 0; +private int fromFile = 0; + +TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, int from, loc statFile) { + fileNr = 0; + fromFile = from; + + return runBatchRecoveryTest(syntaxFile, topSort, dir, ext, maxFiles, minFileSize, maxFileSize, statFile, testStats()); +} + +TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int maxFiles, int minFileSize, int maxFileSize, loc statFile, TestStats cumulativeStats) { + println("Batch testing in directory (maxFiles=, maxFileSize=, fromFile=)"); + writeFile(statFile, "source,size,result,duration,disambiguationDuration,errorSize\n"); for (entry <- listEntries(dir)) { loc file = dir + entry; if (isFile(file)) { if (endsWith(file.path, ext)) { str content = readFile(file); - if (size(content) <= maxFileSize) { + int contentSize = size(content); + if (contentSize >= minFileSize && contentSize < maxFileSize) { + fileNr += 1; + if (fileNr < fromFile) { + println("Skipping file #: , (\< )"); + continue; + } println("========================================================================"); - println("Testing file # ( of left)"); - FileStats fileStats = testErrorRecovery(syntaxFile, topSort, file, content); + println("Testing file # ( of left)"); + FileStats fileStats = testErrorRecovery(syntaxFile, topSort, file, content, statFile=statFile); cumulativeStats = consolidateStats(cumulativeStats, fileStats); println(); println("------------------------------------------------------------------------"); @@ -360,7 +408,7 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m } } } else if (isDirectory(file)) { - cumulativeStats = batchRecoveryTest(syntaxFile, topSort, file, ext, maxFiles, maxFileSize, cumulativeStats=cumulativeStats); + cumulativeStats = runBatchRecoveryTest(syntaxFile, topSort, file, ext, maxFiles, minFileSize, maxFileSize, statFile, cumulativeStats); } if (cumulativeStats.filesTested >= maxFiles) { @@ -370,4 +418,3 @@ TestStats batchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, int m return cumulativeStats; } - diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc index 75c7f478996..a248631e7a1 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc @@ -18,6 +18,7 @@ import lang::rascal::tests::concrete::recovery::ToyRascal; import ParseTree; import IO; +import util::Maybe; Tree parseToyRascal(str input, bool visualize=false) { Tree result = parser(#start[FunctionDeclaration], allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); @@ -28,8 +29,7 @@ Tree parseToyRascal(str input, bool visualize=false) { println("- "); } - Tree disambiguated = defaultErrorDisambiguationFilter(result); - println("Best error: "); + println("Best error: "); } return result; @@ -42,15 +42,15 @@ test bool toyRascalOk() { test bool toyRascalMissingOpenParen() { Tree t = parseToyRascal("f){}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t)) == ")"; + return hasErrors(t) && getErrorText(findBestError(t).val) == ")"; } test bool toyRascalMissingCloseParen() { Tree t = parseToyRascal("f({}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t)) == "("; + return hasErrors(t) && getErrorText(findBestError(t).val) == "("; } test bool toyRascalMissingIfBody() { Tree t = parseToyRascal("f(){if(1){}}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t)) == "}"; + return hasErrors(t) && getErrorText(findBestError(t).val) == "}"; } \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/LostSkipBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/LostSkipBug.rsc new file mode 100644 index 00000000000..173af413afa --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/LostSkipBug.rsc @@ -0,0 +1,14 @@ +module lang::rascal::tests::concrete::recovery::bugs::LostSkipBug + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + input = readFile(|std:///analysis/diff/edits/ExecuteTextEdits.rsc|); + testSingleCharDeletions(standardParser, recoveryParser, input, 200, 100, begin=235, end=235); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/NoErrorsAfterDisambBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/NoErrorsAfterDisambBug.rsc new file mode 100644 index 00000000000..190e1799414 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/NoErrorsAfterDisambBug.rsc @@ -0,0 +1,14 @@ +module lang::rascal::tests::concrete::recovery::bugs::NoErrorsAfterDisambBug + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + input = readFile(|std:///lang/rascal/tests/basic/ListRelations.rsc|); + testSingleCharDeletions(standardParser, recoveryParser, input, 200, 100, begin=1916, end=1916); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/OutOfMemoryBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/OutOfMemoryBug.rsc new file mode 100644 index 00000000000..cd0e54775df --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/OutOfMemoryBug.rsc @@ -0,0 +1,15 @@ +module lang::rascal::tests::concrete::recovery::bugs::OutOfMemoryBug + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///lang/rascal/tests/functionality/PatternSet3.rsc|; + input = readFile(source); + testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 150, begin=581, end=581); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/OvertakenNullableBug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/OvertakenNullableBug.rsc new file mode 100644 index 00000000000..6d82aafd0f9 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/OvertakenNullableBug.rsc @@ -0,0 +1,14 @@ +module lang::rascal::tests::concrete::recovery::bugs::OvertakenNullableBug + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testBug() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + input = readFile(|std:///lang/rascal/tests/library/analysis/statistics/DescriptiveTests.rsc|); + testDeleteUntilEol(standardParser, recoveryParser, input, 200, 100, begin=561, end=561); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoop2Bug.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowDisambiguationBug.rsc similarity index 86% rename from src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoop2Bug.rsc rename to src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowDisambiguationBug.rsc index 4beae613ec8..a7b6759a64e 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/InfiniteLoop2Bug.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/SlowDisambiguationBug.rsc @@ -1,4 +1,4 @@ -module lang::rascal::tests::concrete::recovery::bugs::InfiniteLoop2Bug +module lang::rascal::tests::concrete::recovery::bugs::SlowDisambiguationBug import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; import lang::rascal::\syntax::Rascal; diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index 2abcfd836f1..0125814206c 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -10,6 +10,9 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.URI; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import org.rascalmpl.parser.gtd.debug.IDebugListener; import org.rascalmpl.parser.gtd.exception.ParseError; @@ -140,6 +143,7 @@ public abstract class SGTDBF implements IGTD { // Error recovery private IRecoverer

recoverer; + private Map processedTrees = new java.util.HashMap<>(); // Used to preserve sharing during error node introduction // Debugging private IDebugListener

debugListener; @@ -883,18 +887,8 @@ private void move(AbstractStackNode

node, AbstractNode result) { } if (node.isEndNode()) { - if (!result.isEmpty() || node.getId() == AbstractExpandableStackNode.DEFAULT_LIST_EPSILON_ID) { // Only go - // into the - // nullable - // fix path - // for - // nullables - // (special - // list - // epsilons - // can be - // ignored - // as well). + // Only go into the nullable fix path for nullables (special list epsilons can be ignored as well). + if (!result.isEmpty() || node.getId() == AbstractExpandableStackNode.DEFAULT_LIST_EPSILON_ID) { updateEdges(node, result); } else { @@ -1361,8 +1355,8 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] * Initiates parsing. */ @SuppressWarnings("unchecked") - protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] input, IRecoverer

recoverer, - IDebugListener

debugListener) { + protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] input, + IRecoverer

recoverer, IDebugListener

debugListener) { if (debugListener == null) { debugListener = new NopDebugListener<>(); } @@ -1640,7 +1634,11 @@ private T introduceErrorNodes(T tree, INodeConstructorFactory nodeConstruc private IConstructor introduceErrorNodes(IConstructor tree, INodeConstructorFactory nodeConstructorFactory) { - IConstructor result; + IConstructor result = processedTrees.get(tree); + if (result != null) { + return result; + } + Type type = tree.getConstructorType(); if (type == RascalValueFactory.Tree_Appl) { result = fixErrorAppl((ITree) tree, nodeConstructorFactory); @@ -1663,11 +1661,14 @@ else if (type == RascalValueFactory.Tree_Cycle) { result = result.asWithKeywordParameters().setParameter(RascalValueFactory.Location, loc); } + processedTrees.put(tree, result); + return result; } private IConstructor fixErrorAppl(ITree tree, INodeConstructorFactory nodeConstructorFactory) { + IValue prod = TreeAdapter.getProduction(tree); IList childList = TreeAdapter.getArgs(tree); @@ -1688,13 +1689,14 @@ private IConstructor fixErrorAppl(ITree tree, newChild = introduceErrorNodes(child, nodeConstructorFactory); } - if (newChild != child || errorTree) { - if (newChildren == null) { - newChildren = new ArrayList<>(childCount); - for (int j=0; j(childCount); + for (int j=0; j()).tree; + } + + private ScoredTree disambiguate(IConstructor tree, boolean allowAmbiguity, Map processedTrees) { + Type type = tree.getConstructorType(); + ScoredTree result; + + if (type == RascalValueFactory.Tree_Appl) { + result = disambiguateAppl((ITree) tree, allowAmbiguity, processedTrees); + } else if (type == RascalValueFactory.Tree_Amb) { + result = disambiguateAmb((ITree) tree, allowAmbiguity, processedTrees); + } else { + // Other trees (cycle, char) do not have subtrees so they have a score of 0 + result = new ScoredTree(tree, 0); + } + + return result; + } + + private ScoredTree disambiguateAppl(ITree appl, boolean allowAmbiguity, Map processedTrees) { + ScoredTree result = processedTrees.get(appl); + if (result != null) { + return result; + } + + if (ProductionAdapter.isSkipped(appl.getProduction())) { + result = new ScoredTree(appl, ((IList) appl.get(1)).length()); + } else { + IList args = TreeAdapter.getArgs(appl); + int totalScore = 0; + IListWriter disambiguatedArgs = null; + + // Disambiguate and score all children + for (int i=0; i processedTrees) { + ScoredTree result = processedTrees.get(amb); + if (result != null) { + return result; + } + + ISet originalAlts = (ISet) amb.get(0); + + ISetWriter alternativesWithoutErrors = null; + ScoredTree errorAltWithBestScore = null; + for (IValue alt : originalAlts) { + ScoredTree disambiguatedAlt = disambiguate((IConstructor) alt, allowAmbiguity, processedTrees); + if (disambiguatedAlt.score == 0) { + // Non-error tree + if (alternativesWithoutErrors == null) { + alternativesWithoutErrors = rascalValues.setWriter(); + } + alternativesWithoutErrors.insert(disambiguatedAlt.tree); + } else { + // Only keep the best of the error trees + if (errorAltWithBestScore == null || errorAltWithBestScore.score > disambiguatedAlt.score) { + errorAltWithBestScore = disambiguatedAlt; + } + } + } + + if (alternativesWithoutErrors == null) { + assert errorAltWithBestScore != null : "No trees with and no trees without errors?"; + processedTrees.put(amb, errorAltWithBestScore); + return errorAltWithBestScore; + } + + ISet remainingAlts = alternativesWithoutErrors.done(); + + ITree resultTree; + if (remainingAlts.size() == originalAlts.size()) { + // All children are without errors, return the original tree + resultTree = amb; + } else if (remainingAlts.size() == 1) { + // One child without errors remains, dissolve the amb tree + resultTree = (ITree) remainingAlts.iterator().next(); + } else { + // Create a new amb tree with the remaining non-error trees + resultTree = rascalValues.amb(remainingAlts); + + // We have an ambiguity between non-error trees + if (!allowAmbiguity) { + throw new Ambiguous(resultTree); + } + } + + result = new ScoredTree(resultTree, 0); + processedTrees.put(amb, result); + + return result; + } + +} diff --git a/src/org/rascalmpl/parser/gtd/stack/AbstractStackNode.java b/src/org/rascalmpl/parser/gtd/stack/AbstractStackNode.java index fee21a394a7..67cd564b52e 100644 --- a/src/org/rascalmpl/parser/gtd/stack/AbstractStackNode.java +++ b/src/org/rascalmpl/parser/gtd/stack/AbstractStackNode.java @@ -590,6 +590,9 @@ public int updateOvertakenNode(AbstractStackNode

predecessor, AbstractNode re // Initialize the prefixes map. int edgesMapSize = edgesMap.size(); + // Before error recovery: int possibleMaxSize = edgesMapSize + edgesMapSize; + // It is unclear why error recovery can cause more edges to be added than previously accounted for, + // although this might just have been a bug. int possibleMaxSize = edgesMapSize + edgesMapToAdd.size(); if(prefixesMap == null){ prefixesMap = new ArrayList[possibleMaxSize]; @@ -651,7 +654,9 @@ public int updateOvertakenNullableNode(AbstractStackNode

predecessor, Abstrac // Initialize the prefixes map. int edgesMapSize = edgesMap.size(); - int possibleMaxSize = edgesMapSize + potentialNewEdges; + // Before error recovery: int possibleMaxSize = edgesMapSize + potentialNewEdges; + // It is unclear why error recovery can cause more edges to be added than previously accounted for. + int possibleMaxSize = edgesMapSize + edgesMapToAdd.size(); if(prefixesMap == null){ prefixesMap = new ArrayList[possibleMaxSize]; diff --git a/src/org/rascalmpl/values/RascalFunctionValueFactory.java b/src/org/rascalmpl/values/RascalFunctionValueFactory.java index 1725085649d..3cb60fd7909 100644 --- a/src/org/rascalmpl/values/RascalFunctionValueFactory.java +++ b/src/org/rascalmpl/values/RascalFunctionValueFactory.java @@ -47,6 +47,7 @@ import org.rascalmpl.parser.gtd.exception.UndeclaredNonTerminalException; import org.rascalmpl.parser.gtd.io.InputConverter; import org.rascalmpl.parser.gtd.recovery.IRecoverer; +import org.rascalmpl.parser.gtd.recovery.ParseErrorDisambiguator; import org.rascalmpl.parser.gtd.result.action.IActionExecutor; import org.rascalmpl.parser.gtd.result.out.DefaultNodeFlattener; import org.rascalmpl.parser.gtd.util.StackNodeIdDispenser; @@ -581,9 +582,16 @@ private ITree parseObject(String methodName, ISourceLocation location, char[] in URI uri = location.getURI(); if (allowRecovery) { recoverer = new ToTokenRecoverer(uri, parserInstance, new StackNodeIdDispenser(parserInstance)); - //debugListener = new DebugLogger(new PrintWriter(System.out, true)); } - return (ITree) parserInstance.parse(methodName, uri, input, exec, new DefaultNodeFlattener<>(), new UPTRNodeFactory(allowAmbiguity), recoverer, debugListener); + ITree parseForest = (ITree) parserInstance.parse(methodName, uri, input, exec, new DefaultNodeFlattener<>(), new UPTRNodeFactory(allowRecovery || allowAmbiguity), recoverer, debugListener); + + if (!allowAmbiguity && allowRecovery && filters.isEmpty()) { + // Filter error-induced ambiguities + RascalValueFactory valueFactory = (RascalValueFactory) ValueFactoryFactory.getValueFactory(); + parseForest = (ITree) new ParseErrorDisambiguator(valueFactory).disambiguateErrors(parseForest, valueFactory.bool(false)); + } + + return parseForest; } }