diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc index 91153b09f0a..692bf97f308 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc @@ -16,7 +16,8 @@ module lang::rascal::tests::concrete::recovery::BasicRecoveryTests import ParseTree; import util::ErrorRecovery; -import util::Maybe; + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; layout Layout = [\ ]* !>> [\ ]; @@ -26,32 +27,17 @@ syntax T = ABC End; syntax ABC = 'a' 'b' 'c'; syntax End = "$"; -private Tree parseS(str input, bool visualize=false) - = parser(#S, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); - -test bool basicOk() { - return !hasErrors(parseS("a b c $")); -} +test bool basicOk() = checkRecovery(#S, "a b c $", []); -test bool abx() { - Tree t = parseS("a b x $"); - return getErrorText(findBestError(t).val) == "x "; -} +test bool abx() = checkRecovery(#S, "a b x $", ["x "]); -test bool axc() { - Tree t = parseS("a x c $"); - return getErrorText(findBestError(t).val) == "x c"; -} +test bool axc() = checkRecovery(#S, "a x c $", ["x c"]); -test bool ax() { +test bool autoDisambiguation() { str input = "a x $"; - Tree t = parseS(input); - assert size(findAllErrors(t)) == 3; - assert getErrorText(findBestError(t).val) == "x "; + assert checkRecovery(#S, input, ["x "]); Tree autoDisambiguated = parser(#S, allowRecovery=true, allowAmbiguity=false)(input, |unknown:///|); - assert size(findAllErrors(autoDisambiguated)) == 1; - - return getErrorText(findFirstError(autoDisambiguated)) == getErrorText(findBestError(t).val); + return size(findAllErrors(autoDisambiguated)) == 1; } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc index 2a98f97fb7b..a91ee40cb2c 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc @@ -17,6 +17,8 @@ module lang::rascal::tests::concrete::recovery::ListRecoveryTests import ParseTree; import util::ErrorRecovery; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + layout Layout = [\ ]* !>> [\ ]; syntax S = T End; @@ -29,16 +31,8 @@ Tree parseList(str s, bool visualize=false) { return parser(#S, allowRecovery=true, allowAmbiguity=true)(s, |unknown:///?visualize=<"">|); } -test bool listOk() { - return !hasErrors(parseList("a b , a b , a b $", visualize=true)); -} +test bool listOk() = checkRecovery(#S, "a b , a b , a b $", []); -test bool listTypo() { - Tree t = parseList("a b, a x, ab $", visualize=true); - return hasErrors(t); -} +test bool listTypo() = checkRecovery(#S, "a b, a x, ab $", ["x"]); -test bool listTypoWs() { - Tree t = parseList("a b , a x , a b $", visualize=true); - return hasErrors(t); -} +test bool listTypoWs() = checkRecovery(#S, "a b , a x , a b $", ["x "]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc index ac20179b211..13cc49373cf 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc @@ -14,9 +14,7 @@ module lang::rascal::tests::concrete::recovery::NestedRecoveryTests -import ParseTree; -import util::ErrorRecovery; -import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; layout Layout = [\ ]* !>> [\ ]; @@ -28,14 +26,6 @@ syntax A = "a"; syntax B = "b" "b"; syntax C = "c"; -private Tree parseS(str input, bool visualize=false) - = parser(#S, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); +test bool nestedOk() = checkRecovery(#S, "a b b c", []); -test bool nestedOk() { - return !hasErrors(parseS("a b b c")); -} - -test bool nestedTypo() { - Tree t = parseS("a b x c"); - return getErrorText(findBestError(t).val) == "x "; -} +test bool nestedTypo() = checkRecovery(#S, "a b x c", ["x "]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc index c32e0b39cd3..a8bfb24cace 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc @@ -18,22 +18,12 @@ import lang::pico::\syntax::Main; import ParseTree; import util::ErrorRecovery; - -import IO; -import String; -import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; Tree parsePico(str input, bool visualize=false) = parser(#Program, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); -bool checkError(Tree t, str expectedError) { - str bestError = getErrorText(findBestError(t).val); - //println("best error: , expected: "); - return size(bestError) == size(expectedError); -} - -test bool picoOk() { - t = parsePico("begin declare input : natural, +test bool picoOk() = checkRecovery(#Program, "begin declare input : natural, output : natural, repnr : natural, rep : natural; @@ -48,12 +38,9 @@ test bool picoOk() { od; input := input - 1 od -end"); - return !hasErrors(t); -} +end", []); -test bool picoTypo() { - t = parsePico("begin declare input : natural, +test bool picoTypo() = checkRecovery(#Program, "begin declare input : natural, output : natural, repnr : natural, rep : natural; @@ -68,13 +55,9 @@ test bool picoTypo() { od; input := input - 1 od -end"); - - return checkError(t, "output x rep"); -} +end", ["output x rep"]); -test bool picoMissingSemi() { - t = parsePico("begin declare input : natural, +test bool picoMissingSemi() = checkRecovery(#Program, "begin declare input : natural, output : natural, repnr : natural, rep : natural; @@ -89,32 +72,24 @@ test bool picoMissingSemi() { od input := input - 1 od -end"); - return checkError(t, "input := input - 1 - od"); -} +end", ["input := input - 1 + od"]); -test bool picoTypoSmall() { - t = parsePico( -"begin declare; +test bool picoTypoSmall() = checkRecovery(#Program, "begin declare; while input do input x= 14; output := 0 od -end"); +end", ["x= 14"]); - return checkError(t, "x= 14"); -} - -test bool picoMissingSemiSmall() { - t = parsePico( -"begin declare; +test bool picoMissingSemiSmall() = checkRecovery(#Program, "begin declare; while input do input := 14 output := 0 od -end"); +end", ["output := 0 + od"]); + +test bool picoEof() = checkRecovery(#Program, "begin declare; input := 0;", ["input := 0;"]); - return checkError(t, "output := 0 - od"); -} +test bool picoEofError() = checkRecovery(#Program, "begin declare x y; input := 0;", ["x y;", "input := 0;"]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc index 239eb8d1902..cc73efc103a 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc @@ -21,113 +21,40 @@ import util::ErrorRecovery; import IO; import util::Maybe; -bool debugging = false; - -Tree parseRascal(type[&T] t, str input, bool visualize=false) { - Tree result = parser(t, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); - if (debugging) { - list[Tree] errors = findAllErrors(result); - if (errors != []) { - println("Tree has errors"); - for (error <- errors) { - println("- "); - } - - println("Best error: "); - } - } - - return result; -} - -Tree parseRascal(str input, bool visualize=false) = parseRascal(#start[Module], input, visualize=visualize); +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; -Tree parseFunctionDeclaration(str input, bool visualize=false) = parseRascal(#FunctionDeclaration, input, visualize=visualize); - -Tree parseStatement(str input, bool visualize=false) = parseRascal(#Statement, input, visualize=visualize); +bool debugging = false; -test bool rascalOk() { - Tree t = parseRascal(" +test bool rascalOk() = checkRecovery(#start[Module], " module A int inc(int i) { return i+1; } - "); - return !hasErrors(t); -} - -test bool rascalFunctionDeclarationOk() { - Tree t = parseFunctionDeclaration("void f(){}"); - return !hasErrors(t); -} + ", []); +test bool rascalFunctionDeclarationOk() = checkRecovery(#FunctionDeclaration, "void f(){}", []); -test bool rascalModuleFollowedBySemi() { - Tree t = parseRascal(" +test bool rascalModuleFollowedBySemi() = checkRecovery(#start[Module], " module A ; - "); - - // There are a lot of productions in Rascal that have a ; as terminator. - // The parser assumes the user has only entered the ; on one of them, - // so the error list contains them all. - list[Tree] errors = findAllErrors(t); - assert size(errors) == 10; - - return getErrorText(findFirstError(t)) == ";"; -} + ", [";"]); -test bool rascalOperatorTypo() { - Tree t = parseRascal(" +test bool rascalOperatorTypo() = checkRecovery(#start[Module], " module A int f() = 1 x 1; - "); + ", ["x 1;"]); - return getErrorText(findFirstError(t)) == "x 1;"; -} - -test bool rascalIllegalStatement() { - Tree t = parseRascal("module A void f(){a}"); - return getErrorText(findFirstError(t)) == "a}"; -} - -test bool rascalMissingCloseParen() { - Tree t = parseRascal("module A void f({} void g(){}"); - - assert getErrorText(findFirstError(t)) == "void g("; - assert getErrorText(findBestError(t).val) == "("; - - return true; -} +test bool rascalIllegalStatement() = checkRecovery(#start[Module], "module A void f(){a}", ["a}"]); -test bool rascalFunctionDeclarationMissingCloseParen() { - Tree t = parseFunctionDeclaration("void f({} void g() {}"); +test bool rascalMissingCloseParen() = checkRecovery(#start[Module], "module A void f({} void g(){}", ["("]); - assert getErrorText(findFirstError(t)) == "void g("; +test bool rascalFunctionDeclarationMissingCloseParen() = checkRecovery(#FunctionDeclaration, "void f({} void g() {}", ["("]); - Tree error = findBestError(t).val; - assert getErrorText(error) == "("; - loc location = getSkipped(error).src; - assert location.begin.column == 16 && location.length == 1; +test bool rascalIfMissingExpr() = checkRecovery(#FunctionDeclaration, "void f(){if(){1;}}", [")"]); - return true; -} - -test bool rascalIfMissingExpr() { - Tree t = parseFunctionDeclaration("void f(){if(){1;}}", visualize=false); - return getErrorText(findBestError(t).val) == ")"; -} - -test bool rascalIfBodyEmpty() { - Tree t = parseRascal("module A void f(){1;} void g(){if(1){}} void h(){1;}"); - - println("error: "); - assert getErrorText(findBestError(t).val) == "} void h(){1"; - - return true; -} +test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["} void h(){1"]); // Not working yet: /* diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 36b81d00dfb..196643a9c33 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -1,3 +1,17 @@ +/** + * Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **/ + module lang::rascal::tests::concrete::recovery::RecoveryTestSupport import lang::rascal::\syntax::Rascal; @@ -9,7 +23,6 @@ import util::Benchmark; import Grammar; import analysis::statistics::Descriptive; import util::Math; -import util::Maybe; import Set; import List; @@ -17,15 +30,37 @@ import lang::rascal::grammar::definition::Modules; alias FrequencyTable = map[int val, int count]; -public data TestMeasurement(loc source=|unknown:///|, int duration=0) = successfulParse() | recovered(int errorSize=0) | parseError() | successfulDisambiguation(); -public data FileStats = fileStats(int totalParses = 0, int successfulParses=0, int successfulRecoveries=0, int successfulDisambiguations=0, int failedRecoveries=0, int parseErrors=0, int slowParses=0, FrequencyTable parseTimeRatios=()); - -public data TestStats = testStats(int filesTested=0, int testCount=0, FrequencyTable successfulParses=(), FrequencyTable successfulRecoveries=(), FrequencyTable successfulDisambiguations=(), FrequencyTable failedRecoveries=(), FrequencyTable parseErrors=(), FrequencyTable slowParses=(), FrequencyTable parseTimeRatios=()); +public data TestMeasurement(loc source=|unknown:///|, int duration=0) = successfulParse() | recovered(int errorCount=0, int errorSize=0) | parseError() | successfulDisambiguation(); +public data FileStats = fileStats( + int totalParses = 0, + int successfulParses=0, + int successfulRecoveries=0, + int successfulDisambiguations=0, + int failedRecoveries=0, + int parseErrors=0, + int slowParses=0, + FrequencyTable parseTimeRatios=(), + FrequencyTable errorCounts=(), + FrequencyTable errorSizes=()); + +public data TestStats = testStats( + int filesTested=0, + int testCount=0, + FrequencyTable successfulParses=(), + FrequencyTable successfulRecoveries=(), + FrequencyTable successfulDisambiguations=(), + FrequencyTable failedRecoveries=(), + FrequencyTable parseErrors=(), + FrequencyTable slowParses=(), + FrequencyTable parseTimeRatios=(), + FrequencyTable errorCounts=(), + FrequencyTable errorSizes=()); private TestMeasurement testRecovery(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, loc source, loc statFile) { int startTime = 0; int duration = 0; int disambDuration = -1; + int errorCount = 0; int errorSize=0; str result = "?"; TestMeasurement measurement = successfulParse(); @@ -41,14 +76,15 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser Tree t = recoveryParser(input, source); int parseEndTime = realTime(); duration = parseEndTime - startTime; - Maybe[Tree] best = findBestError(t); + list[Tree] errors = findBestErrors(t); + errorCount = size(errors); disambDuration = realTime() - parseEndTime; result = "recovery"; - if (best == nothing()) { + if (errors == []) { measurement = successfulDisambiguation(source=source, duration=duration); } else { - errorSize = size(getErrorText(best.val)); - measurement = recovered(source=source, duration=duration, errorSize=errorSize); + errorSize = (0 | it + size(getErrorText(err)) | err <- errors); + measurement = recovered(source=source, duration=duration, errorCount=errorCount, errorSize=errorSize); } } catch ParseError(_): { result = "error"; @@ -58,7 +94,7 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser } if (statFile != |unknown:///|) { - appendToFile(statFile, ",,,,,\n"); + appendToFile(statFile, ",,,,,,\n"); } return measurement; @@ -75,8 +111,10 @@ FileStats updateStats(FileStats stats, TestMeasurement measurement, int referenc print("."); stats.successfulParses += 1; } - case recovered(errorSize=errorSize): { + case recovered(errorCount=errorCount, errorSize=errorSize): { stats.parseTimeRatios = increment(stats.parseTimeRatios, parseTimeRatio); + stats.errorCounts = increment(stats.errorCounts, errorCount); + stats.errorSizes = increment(stats.errorSizes, errorSize); if (errorSize <= recoverySuccessLimit) { print("+"); stats.successfulRecoveries += 1; @@ -114,7 +152,9 @@ FileStats mergeFileStats(FileStats stats1, FileStats stats2) { failedRecoveries = stats1.failedRecoveries + stats2.failedRecoveries, parseErrors = stats1.parseErrors + stats2.parseErrors, slowParses = stats1.slowParses + stats2.slowParses, - parseTimeRatios = mergeFrequencyTables(stats1.parseTimeRatios, stats2.parseTimeRatios) + parseTimeRatios = mergeFrequencyTables(stats1.parseTimeRatios, stats2.parseTimeRatios), + errorCounts = mergeFrequencyTables(stats1.errorCounts, stats2.errorCounts), + errorSizes = mergeFrequencyTables(stats1.errorSizes, stats2.errorSizes) ); } @@ -138,6 +178,8 @@ TestStats consolidateStats(TestStats cumulativeStats, FileStats fileStats) { cumulativeStats.parseErrors = increment(cumulativeStats.parseErrors, percentage(fileStats.parseErrors, totalFailed)); cumulativeStats.slowParses = increment(cumulativeStats.slowParses, percentage(fileStats.slowParses, totalFailed)); cumulativeStats.parseTimeRatios = mergeFrequencyTables(cumulativeStats.parseTimeRatios, fileStats.parseTimeRatios); + cumulativeStats.errorCounts = mergeFrequencyTables(cumulativeStats.errorCounts, fileStats.errorCounts); + cumulativeStats.errorSizes = mergeFrequencyTables(cumulativeStats.errorSizes, fileStats.errorSizes); cumulativeStats.filesTested += 1; cumulativeStats.testCount += fileStats.totalParses; @@ -167,6 +209,8 @@ TestStats mergeStats(TestStats stats, TestStats stats2) { stats.parseErrors = mergeFrequencyTables(stats.parseErrors, stats2.parseErrors); stats.slowParses = mergeFrequencyTables(stats.slowParses, stats2.slowParses); stats.parseTimeRatios = mergeFrequencyTables(stats.parseTimeRatios, stats2.parseTimeRatios); + stats.errorCounts = mergeFrequencyTables(stats.errorCounts, stats2.errorCounts); + stats.errorSizes = mergeFrequencyTables(stats.errorSizes, stats2.errorSizes); return stats; } @@ -246,6 +290,8 @@ void printFileStats(FileStats fileStats) { printStat("Slow parses", fileStats.slowParses, failedParses); printFrequencyTableHeader(); printFrequencyTableStats("Parse time ratios", fileStats.parseTimeRatios, unit = "log2(ratio)", printTotal=false); + printFrequencyTableStats("Parse error count", fileStats.errorCounts, unit="errors"); + printFrequencyTableStats("Error size", fileStats.errorSizes, unit="chars"); } void printFrequencyTableHeader() { @@ -258,7 +304,7 @@ void printFrequencyTableHeader() { println(right("total", statFieldWidth)); } -void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit = "%", bool printTotal=true) { +void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit = "%", bool printTotal=true, bool ignoreZero=false) { print(left(label + " ():", statLabelWidth)); int totalCount = (0 | it+frequencyTable[val] | val <- frequencyTable); @@ -285,7 +331,7 @@ void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit total += val*count; - if (count > medianCount) { + if (!(val == 0 && ignoreZero) && count > medianCount) { medianCount = count; median = val; } @@ -317,6 +363,8 @@ void printStats(TestStats stats) { printFrequencyTableStats("Parse errors", stats.parseErrors); printFrequencyTableStats("Slow parses", stats.slowParses); printFrequencyTableStats("Parse time ratios", stats.parseTimeRatios, unit = "log2/%", printTotal=false); + printFrequencyTableStats("Parse error counts", stats.errorCounts, unit = "errors", ignoreZero=true); + printFrequencyTableStats("Parse error sizes", stats.errorSizes, unit = "chars", ignoreZero=true); println(); } @@ -419,3 +467,40 @@ TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, in return cumulativeStats; } + +bool checkRecovery(type[&T<:Tree] begin, str input, list[str] expectedErrors, bool visualize=false) { + Tree t = parser(begin, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); + return checkErrors(t, expectedErrors); +} + +// Print a list of errors +void printErrors(list[Tree] errors) { + for (Tree error <- errors) { + println("\'\'"); + } +} + +// Check a tree contains exactly the expected error +bool checkError(Tree t, str expectedError) = checkErrors(t, [expectedError]); + +// Check if a tree contains exactly the expected errors +bool checkErrors(Tree t, list[str] expectedErrors) { + list[Tree] errors = findBestErrors(t); + if (size(errors) != size(expectedErrors)) { + println("Expected errors, found "); + printErrors(errors); + return false; + } + + for (error <- errors) { + str errorText = getErrorText(error); + if (errorText notin expectedErrors) { + println("Unexpected error: "); + println("All errors found:"); + printErrors(errors); + return false; + } + } + + return true; +} \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc index df94ce76692..fe213811137 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc @@ -21,6 +21,8 @@ import util::ErrorRecovery; import IO; import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + Tree parseToyRascal(str input, bool visualize=false) { Tree result = parser(#start[FunctionDeclaration], allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); list[Tree] errors = findAllErrors(result); @@ -30,7 +32,7 @@ Tree parseToyRascal(str input, bool visualize=false) { println("- "); } - println("Best error: "); + println("Best error: "); } return result; @@ -42,16 +44,16 @@ test bool toyRascalOk() { } test bool toyRascalMissingOpenParen() { - Tree t = parseToyRascal("f){}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t).val) == ")"; + Tree t = parseToyRascal("f){}", visualize=false); + return hasErrors(t) && getErrorText(findBestErrors(t)[0]) == ")"; } test bool toyRascalMissingCloseParen() { - Tree t = parseToyRascal("f({}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t).val) == "("; + Tree t = parseToyRascal("f({}", visualize=false); + return hasErrors(t) && getErrorText(findBestErrors(t)[0]) == "("; } test bool toyRascalMissingIfBody() { - Tree t = parseToyRascal("f(){if(1){}}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t).val) == "}"; + Tree t = parseToyRascal("f(){if(1){}}", visualize=false); + return hasErrors(t) && getErrorText(findBestErrors(t)[0]) == "}"; } \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc new file mode 100644 index 00000000000..bc75db43f44 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc @@ -0,0 +1,16 @@ +module lang::rascal::tests::concrete::recovery::bugs::DisambiguationPerformanceTest + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testPerformance() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///lang/box/util/Box2Text.rsc|; + input = readFile(source); + FileStats stats = testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 100, begin=17496, end=17496); + println(""); +} diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc index ef019e1fae4..5db022f5428 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.rsc +++ b/src/org/rascalmpl/library/util/ErrorRecovery.rsc @@ -1,29 +1,32 @@ -module util::ErrorRecovery +/** + * Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **/ + + module util::ErrorRecovery import ParseTree; import String; -import util::Maybe; @synopsis{Check if a parse tree contains any error nodes, the result of error recovery.} bool hasErrors(Tree tree) = /appl(error(_, _, _), _) := tree; @javaClass{org.rascalmpl.library.util.ErrorRecovery} -@synopsis{Find all error productions in a parse tree.} +@synopsis{Find all error productions in a parse tree. The list is created by an outermost visit of the parse tree so if an error tree contains other errors the outermost tree is returned first.} java list[Tree] findAllErrors(Tree tree); -@synopsis{Find the first production containing an error.} -Tree findFirstError(/err:appl(error(_, _, _), _)) = err; - -@synopsis{Find the best error from a tree containing errors. This function will fail if `tree` does not contain an error.} -Maybe[Tree] findBestError(Tree tree) { - Tree disambiguated = disambiguateErrors(tree); - if (/err:appl(error(_, _, _), _) := disambiguated) { - return just(err); - } - - // All errors have disappeared - return nothing(); -} +@synopsis{Disambiguate the error ambiguities in a tree and return the list of remaining errors. +The list is created by an outermost visit of the parse tree so if an error tree contains other errors the outermost tree is returned first.} +list[Tree] findBestErrors(Tree tree) = findAllErrors(disambiguateErrors(tree)); @synopsis{Get the symbol (sort) of the failing production} Symbol getErrorSymbol(appl(error(Symbol sym, _, _), _)) = sym; diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index 30d5dd11ecd..8753376c6ef 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -936,27 +936,7 @@ private boolean findFirstStacksToReduce() { } } - if (recoverer != null) { - debugListener.reviving(input, location, unexpandableNodes, unmatchableLeafNodes, - unmatchableMidProductionNodes, filteredNodes); - visualize("Recovering", ParseStateVisualizer.ERROR_TRACKING_ID); - DoubleArrayList, AbstractNode> recoveredNodes = recoverer.reviveStacks(input, location, - unexpandableNodes, unmatchableLeafNodes, unmatchableMidProductionNodes, filteredNodes); - debugListener.revived(recoveredNodes); - if (recoveredNodes.size() > 0) { // TODO Do something with the revived node. Is this the right location to - // do this? - for (int i = 0; i < recoveredNodes.size(); i++) { - AbstractStackNode

recovered = recoveredNodes.getFirst(i); - queueMatchableNode(recovered, recovered.getLength(), recoveredNodes.getSecond(i)); - } - parseErrorRecovered = true; - return findStacksToReduce(); - } - - parseErrorEncountered = true; - } - - return false; + return attemptRecovery(); } /** @@ -984,7 +964,11 @@ private boolean findStacksToReduce() { } } - if (recoverer != null && location < input.length) { + return false; + } + + private boolean attemptRecovery() { + if (recoverer != null) { debugListener.reviving(input, location, unexpandableNodes, unmatchableLeafNodes, unmatchableMidProductionNodes, filteredNodes); visualize("Recovering", ParseStateVisualizer.ERROR_TRACKING_ID); @@ -1018,6 +1002,7 @@ private boolean findStacksToReduce() { return false; } + public boolean parseErrorHasOccurred() { return parseErrorEncountered; } @@ -1394,10 +1379,10 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] expand(); + AbstractContainerNode

result = null; if (findFirstStacksToReduce()) { boolean shiftedLevel = (location != 0); - - do { + while (true) { lookAheadChar = (location < input.length) ? input[location] : 0; if (shiftedLevel) { // Nullable fix for the first level. sharedNextNodes.clear(); @@ -1423,23 +1408,30 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] while (!stacksWithNonTerminalsToReduce.isEmpty() || !stacksWithTerminalsToReduce.isEmpty()); shiftedLevel = true; + + if (!findStacksToReduce()) { + if (location == input.length) { + EdgesSet

startNodeEdgesSet = startNode.getIncomingEdges(); + int resultStoreId = getResultStoreId(startNode.getId()); + if (startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) { + result = startNodeEdgesSet.getLastResult(resultStoreId); // Success. + break; + } + } + if (!attemptRecovery()) { + // Unsuccessful parse + break; + } + } } - while (findStacksToReduce()); } visualize("Done", ParseStateVisualizer.PARSER_ID); - // Check if we were successful. - if (location == input.length) { - EdgesSet

startNodeEdgesSet = startNode.getIncomingEdges(); - int resultStoreId = getResultStoreId(startNode.getId()); - if (startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) { - // Parsing succeeded. - return startNodeEdgesSet.getLastResult(resultStoreId); // Success. - } + if (result != null) { + return result; } - } - finally { + } finally { checkTime("Parsing"); } diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java index 98b5e14490f..c4c0da3f011 100644 --- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java +++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java @@ -74,10 +74,10 @@ public DoubleArrayList, AbstractNode> reviveStac collectUnexpandableNodes(unexpandableNodes, failedNodes); collectUnmatchableMidProductionNodes(location, unmatchableMidProductionNodes, failedNodes); - return reviveFailedNodes(input, failedNodes); + return reviveFailedNodes(input, location, failedNodes); } - private DoubleArrayList, AbstractNode> reviveNodes(int[] input, + private DoubleArrayList, AbstractNode> reviveNodes(int[] input, int location, DoubleArrayList, ArrayList> recoveryNodes) { DoubleArrayList, AbstractNode> recoveredNodes = new DoubleArrayList<>(); @@ -102,7 +102,7 @@ private DoubleArrayList, AbstractNode> reviveNod IConstructor prod = prods.get(j); List> skippingNodes = - findSkippingNodes(input, recoveryNode, prod, startLocation); + findSkippingNodes(input, location, recoveryNode, prod, startLocation); for (SkippingStackNode skippingNode : skippingNodes) { AbstractStackNode continuer = new RecoveryPointStackNode<>(stackNodeIdDispenser.dispenseId(), prod, recoveryNode); @@ -122,12 +122,19 @@ private DoubleArrayList, AbstractNode> reviveNod return recoveredNodes; } - private List> findSkippingNodes(int[] input, + private List> findSkippingNodes(int[] input, int location, AbstractStackNode recoveryNode, IConstructor prod, int startLocation) { List> nodes = new java.util.ArrayList<>(); SkippedNode result; + // If we are at the end of the input, skip nothing + if (location >= input.length) { + result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); + nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + return nodes; // No other nodes would be useful + } + // If we are the top-level node, just skip the rest of the input if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) { result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); @@ -342,7 +349,9 @@ private AbstractStackNode getSinglePredecessor(AbstractStackNode, AbstractNode> reviveFailedNodes(int[] input, + private DoubleArrayList, AbstractNode> reviveFailedNodes( + int[] input, + int location, ArrayList> failedNodes) { DoubleArrayList, ArrayList> recoveryNodes = new DoubleArrayList<>(); @@ -359,7 +368,7 @@ private DoubleArrayList, AbstractNode> reviveFai findRecoveryNodes(failedNodes.get(i), recoveryNodes); } - return reviveNodes(input, recoveryNodes); + return reviveNodes(input, location, recoveryNodes); } private static void collectUnexpandableNodes(Stack> unexpandableNodes, diff --git a/src/org/rascalmpl/parser/util/DebugUtil.java b/src/org/rascalmpl/parser/util/DebugUtil.java index 0f31c246fed..9932c456c7e 100644 --- a/src/org/rascalmpl/parser/util/DebugUtil.java +++ b/src/org/rascalmpl/parser/util/DebugUtil.java @@ -14,6 +14,8 @@ package org.rascalmpl.parser.util; +import org.rascalmpl.values.parsetrees.ProductionAdapter; + import io.usethesource.vallang.IConstructor; import io.usethesource.vallang.IList; import io.usethesource.vallang.IValue; @@ -29,12 +31,12 @@ private DebugUtil() { public static String prodToString(IConstructor prod) { StringBuilder builder = new StringBuilder("'"); - IConstructor sort = (IConstructor) prod.get(0); - builder.append(quotedStringToPlain(String.valueOf(sort.get(0)))); + builder.append(quotedStringToPlain(ProductionAdapter.getSortName(prod))); builder.append(" ->"); if (prod.getName().equals("prod")) { + ProductionAdapter.getConstructorName(prod); IList children = (IList) prod.get(1); for (IValue child : children) { builder.append(" "); @@ -52,6 +54,10 @@ public static String prodToString(IConstructor prod) { } private static String quotedStringToPlain(String s) { + if (s.length() == 0) { + return s; + } + if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"') { return s.substring(1, s.length()-1).replace("\\", ""); } diff --git a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java index bbe3d573b90..c99537b0f91 100644 --- a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java +++ b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java @@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils; import org.rascalmpl.parser.gtd.SGTDBF; +import org.rascalmpl.parser.gtd.result.AbstractContainerNode; import org.rascalmpl.parser.gtd.result.AbstractNode; import org.rascalmpl.parser.gtd.result.CharNode; import org.rascalmpl.parser.gtd.result.EpsilonNode; @@ -42,6 +43,7 @@ import org.rascalmpl.parser.gtd.result.RecoveredNode; import org.rascalmpl.parser.gtd.result.SkippedNode; import org.rascalmpl.parser.gtd.result.SortContainerNode; +import org.rascalmpl.parser.gtd.result.struct.Link; import org.rascalmpl.parser.gtd.stack.AbstractStackNode; import org.rascalmpl.parser.gtd.stack.edge.EdgesSet; import org.rascalmpl.parser.gtd.util.ArrayList; @@ -164,6 +166,10 @@ public void visualizeProductionTrees(AbstractStackNode[] nodes) { writeGraph(createProductionGraph(nodes)); } + public void visualizeNode(AbstractNode node) { + writeGraph(createGraph(node)); + } + public int getFrame() { return frame; } @@ -194,6 +200,13 @@ private synchronized DotGraph createGraph(AbstractStackNode stackN return graph; } + private synchronized DotGraph createGraph(AbstractNode parserNode) { + reset(); + graph = new DotGraph(name, true); + addParserNodes(graph, parserNode); + return graph; + } + private DotGraph createGraph(DoubleArrayList, ArrayList> recoveryNodes) { reset(); graph = new DotGraph(name, true); @@ -368,8 +381,39 @@ private

DotNode createDotNode(AbstractStackNode

stackNode) { return node; } + private NodeId addParserNodes(DotGraph graph, AbstractNode parserNode) { + NodeId id = addParserNode(graph, parserNode); + if (parserNode instanceof AbstractContainerNode) { + @SuppressWarnings("unchecked") + AbstractContainerNode container = (AbstractContainerNode) parserNode; + Link link = container.getFirstAlternative(); + if (link != null) { + NodeId firstPrefix = addPrefixes(graph, link); + graph.addEdge(id, firstPrefix); + } + } + return id; + } + + private NodeId addPrefixes(DotGraph graph, Link link) { + NodeId id = addParserNodes(graph, link.getNode()); + ArrayList prefixes = link.getPrefixes(); + if (prefixes != null) { + for (int i=0; i void addTodoLists(SGTDBF parser, DotGraph graph) { DoubleStack, AbstractNode>[] todoLists = parser.getTodoLists(); int start = parser.getQueueIndex(); - DotNode todoListsNode = DotNode.createArrayNode(TODO_LISTS_ID, todoLists.length); + int todos = Math.min(todoLists.length, 50); + + DotNode todoListsNode = DotNode.createArrayNode(TODO_LISTS_ID, todos); - for (int tokenLength=1; tokenLength<=todoLists.length; tokenLength++) { + for (int tokenLength=1; tokenLength<=todos+1; tokenLength++) { int index = (start + tokenLength - 1) % todoLists.length; DoubleStack, AbstractNode> todoList = todoLists[index]; if (todoList != null && !todoList.isEmpty()) {