From 3119c72b8d2dc18f42e6341e185dbdbbea39eb46 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 12 Oct 2024 11:33:26 +0200 Subject: [PATCH 01/10] Refactored test support --- .../concrete/recovery/BasicRecoveryTests.rsc | 30 ++----- .../concrete/recovery/ListRecoveryTests.rsc | 16 ++-- .../concrete/recovery/NestedRecoveryTests.rsc | 16 +--- .../concrete/recovery/PicoRecoveryTests.rsc | 57 ++++--------- .../concrete/recovery/RascalRecoveryTests.rsc | 83 +++++-------------- .../concrete/recovery/RecoveryTestSupport.rsc | 51 ++++++++++++ .../recovery/ToyRascalRecoveryTests.rsc | 2 + 7 files changed, 104 insertions(+), 151 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc index 91153b09f0a..692bf97f308 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/BasicRecoveryTests.rsc @@ -16,7 +16,8 @@ module lang::rascal::tests::concrete::recovery::BasicRecoveryTests import ParseTree; import util::ErrorRecovery; -import util::Maybe; + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; layout Layout = [\ ]* !>> [\ ]; @@ -26,32 +27,17 @@ syntax T = ABC End; syntax ABC = 'a' 'b' 'c'; syntax End = "$"; -private Tree parseS(str input, bool visualize=false) - = parser(#S, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); - -test bool basicOk() { - return !hasErrors(parseS("a b c $")); -} +test bool basicOk() = checkRecovery(#S, "a b c $", []); -test bool abx() { - Tree t = parseS("a b x $"); - return getErrorText(findBestError(t).val) == "x "; -} +test bool abx() = checkRecovery(#S, "a b x $", ["x "]); -test bool axc() { - Tree t = parseS("a x c $"); - return getErrorText(findBestError(t).val) == "x c"; -} +test bool axc() = checkRecovery(#S, "a x c $", ["x c"]); -test bool ax() { +test bool autoDisambiguation() { str input = "a x $"; - Tree t = parseS(input); - assert size(findAllErrors(t)) == 3; - assert getErrorText(findBestError(t).val) == "x "; + assert checkRecovery(#S, input, ["x "]); Tree autoDisambiguated = parser(#S, allowRecovery=true, allowAmbiguity=false)(input, |unknown:///|); - assert size(findAllErrors(autoDisambiguated)) == 1; - - return getErrorText(findFirstError(autoDisambiguated)) == getErrorText(findBestError(t).val); + return size(findAllErrors(autoDisambiguated)) == 1; } diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc index 2a98f97fb7b..a91ee40cb2c 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ListRecoveryTests.rsc @@ -17,6 +17,8 @@ module lang::rascal::tests::concrete::recovery::ListRecoveryTests import ParseTree; import util::ErrorRecovery; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + layout Layout = [\ ]* !>> [\ ]; syntax S = T End; @@ -29,16 +31,8 @@ Tree parseList(str s, bool visualize=false) { return parser(#S, allowRecovery=true, allowAmbiguity=true)(s, |unknown:///?visualize=<"">|); } -test bool listOk() { - return !hasErrors(parseList("a b , a b , a b $", visualize=true)); -} +test bool listOk() = checkRecovery(#S, "a b , a b , a b $", []); -test bool listTypo() { - Tree t = parseList("a b, a x, ab $", visualize=true); - return hasErrors(t); -} +test bool listTypo() = checkRecovery(#S, "a b, a x, ab $", ["x"]); -test bool listTypoWs() { - Tree t = parseList("a b , a x , a b $", visualize=true); - return hasErrors(t); -} +test bool listTypoWs() = checkRecovery(#S, "a b , a x , a b $", ["x "]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc index ac20179b211..13cc49373cf 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/NestedRecoveryTests.rsc @@ -14,9 +14,7 @@ module lang::rascal::tests::concrete::recovery::NestedRecoveryTests -import ParseTree; -import util::ErrorRecovery; -import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; layout Layout = [\ ]* !>> [\ ]; @@ -28,14 +26,6 @@ syntax A = "a"; syntax B = "b" "b"; syntax C = "c"; -private Tree parseS(str input, bool visualize=false) - = parser(#S, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); +test bool nestedOk() = checkRecovery(#S, "a b b c", []); -test bool nestedOk() { - return !hasErrors(parseS("a b b c")); -} - -test bool nestedTypo() { - Tree t = parseS("a b x c"); - return getErrorText(findBestError(t).val) == "x "; -} +test bool nestedTypo() = checkRecovery(#S, "a b x c", ["x "]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc index c32e0b39cd3..a8bfb24cace 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/PicoRecoveryTests.rsc @@ -18,22 +18,12 @@ import lang::pico::\syntax::Main; import ParseTree; import util::ErrorRecovery; - -import IO; -import String; -import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; Tree parsePico(str input, bool visualize=false) = parser(#Program, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); -bool checkError(Tree t, str expectedError) { - str bestError = getErrorText(findBestError(t).val); - //println("best error: , expected: "); - return size(bestError) == size(expectedError); -} - -test bool picoOk() { - t = parsePico("begin declare input : natural, +test bool picoOk() = checkRecovery(#Program, "begin declare input : natural, output : natural, repnr : natural, rep : natural; @@ -48,12 +38,9 @@ test bool picoOk() { od; input := input - 1 od -end"); - return !hasErrors(t); -} +end", []); -test bool picoTypo() { - t = parsePico("begin declare input : natural, +test bool picoTypo() = checkRecovery(#Program, "begin declare input : natural, output : natural, repnr : natural, rep : natural; @@ -68,13 +55,9 @@ test bool picoTypo() { od; input := input - 1 od -end"); - - return checkError(t, "output x rep"); -} +end", ["output x rep"]); -test bool picoMissingSemi() { - t = parsePico("begin declare input : natural, +test bool picoMissingSemi() = checkRecovery(#Program, "begin declare input : natural, output : natural, repnr : natural, rep : natural; @@ -89,32 +72,24 @@ test bool picoMissingSemi() { od input := input - 1 od -end"); - return checkError(t, "input := input - 1 - od"); -} +end", ["input := input - 1 + od"]); -test bool picoTypoSmall() { - t = parsePico( -"begin declare; +test bool picoTypoSmall() = checkRecovery(#Program, "begin declare; while input do input x= 14; output := 0 od -end"); +end", ["x= 14"]); - return checkError(t, "x= 14"); -} - -test bool picoMissingSemiSmall() { - t = parsePico( -"begin declare; +test bool picoMissingSemiSmall() = checkRecovery(#Program, "begin declare; while input do input := 14 output := 0 od -end"); +end", ["output := 0 + od"]); + +test bool picoEof() = checkRecovery(#Program, "begin declare; input := 0;", ["input := 0;"]); - return checkError(t, "output := 0 - od"); -} +test bool picoEofError() = checkRecovery(#Program, "begin declare x y; input := 0;", ["x y;", "input := 0;"]); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc index 239eb8d1902..40cb84f792b 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc @@ -21,6 +21,8 @@ import util::ErrorRecovery; import IO; import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + bool debugging = false; Tree parseRascal(type[&T] t, str input, bool visualize=false) { @@ -40,94 +42,47 @@ Tree parseRascal(type[&T] t, str input, bool visualize=false) { return result; } +bool checkSingleError(Tree t, str expected) { + list[Tree] errors = findBestErrors(t); + return size(errors) == 1 && getErrorText(getFirstFrom(errors)) == expected; +} + Tree parseRascal(str input, bool visualize=false) = parseRascal(#start[Module], input, visualize=visualize); Tree parseFunctionDeclaration(str input, bool visualize=false) = parseRascal(#FunctionDeclaration, input, visualize=visualize); Tree parseStatement(str input, bool visualize=false) = parseRascal(#Statement, input, visualize=visualize); -test bool rascalOk() { - Tree t = parseRascal(" +test bool rascalOk() = checkRecovery(#start[Module], " module A int inc(int i) { return i+1; } - "); - return !hasErrors(t); -} - -test bool rascalFunctionDeclarationOk() { - Tree t = parseFunctionDeclaration("void f(){}"); - return !hasErrors(t); -} + ", []); +test bool rascalFunctionDeclarationOk() = checkRecovery(#FunctionDeclaration, "void f(){}", []); -test bool rascalModuleFollowedBySemi() { - Tree t = parseRascal(" +test bool rascalModuleFollowedBySemi() = checkRecovery(#start[Module], " module A ; - "); - - // There are a lot of productions in Rascal that have a ; as terminator. - // The parser assumes the user has only entered the ; on one of them, - // so the error list contains them all. - list[Tree] errors = findAllErrors(t); - assert size(errors) == 10; + ", [";"]); - return getErrorText(findFirstError(t)) == ";"; -} - -test bool rascalOperatorTypo() { - Tree t = parseRascal(" +test bool rascalOperatorTypo() = checkRecovery(#start[Module], " module A int f() = 1 x 1; - "); + ", ["x 1;"]); - return getErrorText(findFirstError(t)) == "x 1;"; -} +test bool rascalIllegalStatement() = checkRecovery(#start[Module], "module A void f(){a}", ["a}"]); -test bool rascalIllegalStatement() { - Tree t = parseRascal("module A void f(){a}"); - return getErrorText(findFirstError(t)) == "a}"; -} +test bool rascalMissingCloseParen() = checkRecovery(#start[Module], "module A void f({} void g(){}", ["("]); -test bool rascalMissingCloseParen() { - Tree t = parseRascal("module A void f({} void g(){}"); +test bool rascalFunctionDeclarationMissingCloseParen() = checkRecovery(#FunctionDeclaration, "void f({} void g() {}", ["("]); - assert getErrorText(findFirstError(t)) == "void g("; - assert getErrorText(findBestError(t).val) == "("; +test bool rascalIfMissingExpr() = checkRecovery(#FunctionDeclaration, "void f(){if(){1;}}", [")"]); - return true; -} - -test bool rascalFunctionDeclarationMissingCloseParen() { - Tree t = parseFunctionDeclaration("void f({} void g() {}"); - - assert getErrorText(findFirstError(t)) == "void g("; - - Tree error = findBestError(t).val; - assert getErrorText(error) == "("; - loc location = getSkipped(error).src; - assert location.begin.column == 16 && location.length == 1; - - return true; -} - -test bool rascalIfMissingExpr() { - Tree t = parseFunctionDeclaration("void f(){if(){1;}}", visualize=false); - return getErrorText(findBestError(t).val) == ")"; -} - -test bool rascalIfBodyEmpty() { - Tree t = parseRascal("module A void f(){1;} void g(){if(1){}} void h(){1;}"); - - println("error: "); - assert getErrorText(findBestError(t).val) == "} void h(){1"; - - return true; -} +test bool rascalIfBodyEmpty() = checkRecovery(#start[Module], "module A void f(){1;} void g(){if(1){}} void h(){1;}", ["} void h(){1"]); // Not working yet: /* diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 36b81d00dfb..3bfe0288d80 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -1,3 +1,17 @@ +/** + * Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **/ + module lang::rascal::tests::concrete::recovery::RecoveryTestSupport import lang::rascal::\syntax::Rascal; @@ -419,3 +433,40 @@ TestStats runBatchRecoveryTest(loc syntaxFile, str topSort, loc dir, str ext, in return cumulativeStats; } + +bool checkRecovery(type[&T<:Tree] begin, str input, list[str] expectedErrors, bool visualize=false) { + Tree t = parser(begin, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); + return checkErrors(t, expectedErrors); +} + +// Print a list of errors +void printErrors(list[Tree] errors) { + for (Tree error <- errors) { + println("\'\'"); + } +} + +// Check a tree contains exactly the expected error +bool checkError(Tree t, str expectedError) = checkErrors(t, [expectedError]); + +// Check if a tree contains exactly the expected errors +bool checkErrors(Tree t, list[str] expectedErrors) { + list[Tree] errors = findBestErrors(t); + if (size(errors) != size(expectedErrors)) { + println("Expected errors, found "); + printErrors(errors); + return false; + } + + for (error <- errors) { + str errorText = getErrorText(error); + if (!(errorText in expectedErrors)) { + println("Unexpected error: "); + println("All errors found:"); + printErrors(errors); + return false; + } + } + + return true; +} \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc index df94ce76692..ce291c5d819 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc @@ -21,6 +21,8 @@ import util::ErrorRecovery; import IO; import util::Maybe; +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; + Tree parseToyRascal(str input, bool visualize=false) { Tree result = parser(#start[FunctionDeclaration], allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); list[Tree] errors = findAllErrors(result); From 9bc79859986fe3e7557389af5248a8f18e786b26 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 12 Oct 2024 11:34:20 +0200 Subject: [PATCH 02/10] Added support for retrieving all errors instead of just one --- .../rascalmpl/library/util/ErrorRecovery.rsc | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc index ef019e1fae4..b1eaa07983a 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.rsc +++ b/src/org/rascalmpl/library/util/ErrorRecovery.rsc @@ -1,4 +1,18 @@ -module util::ErrorRecovery +/** + * Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **/ + + module util::ErrorRecovery import ParseTree; import String; @@ -8,7 +22,7 @@ import util::Maybe; bool hasErrors(Tree tree) = /appl(error(_, _, _), _) := tree; @javaClass{org.rascalmpl.library.util.ErrorRecovery} -@synopsis{Find all error productions in a parse tree.} +@synopsis{Find all error productions in a parse tree. The list is created by an outermost visit of the parse tree so if an error tree contains other errors the outermost tree is returned first.} java list[Tree] findAllErrors(Tree tree); @synopsis{Find the first production containing an error.} @@ -25,6 +39,10 @@ Maybe[Tree] findBestError(Tree tree) { return nothing(); } +@synopsis{Disambiguate the error ambiguities in a tree and return the list of remaining errors. +The list is created by an outermost visit of the parse tree so if an error tree contains other errors the outermost tree is returned first.} +list[Tree] findBestErrors(Tree tree) = findAllErrors(disambiguateErrors(tree)); + @synopsis{Get the symbol (sort) of the failing production} Symbol getErrorSymbol(appl(error(Symbol sym, _, _), _)) = sym; From d4ad606f8a3b96011c2d7f1b5f009c88144ea5bc Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 12 Oct 2024 11:38:37 +0200 Subject: [PATCH 03/10] Added visualization of AbstractNodes --- src/org/rascalmpl/parser/util/DebugUtil.java | 10 +++- .../parser/util/ParseStateVisualizer.java | 54 +++++++++++++++++-- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/org/rascalmpl/parser/util/DebugUtil.java b/src/org/rascalmpl/parser/util/DebugUtil.java index 0f31c246fed..9932c456c7e 100644 --- a/src/org/rascalmpl/parser/util/DebugUtil.java +++ b/src/org/rascalmpl/parser/util/DebugUtil.java @@ -14,6 +14,8 @@ package org.rascalmpl.parser.util; +import org.rascalmpl.values.parsetrees.ProductionAdapter; + import io.usethesource.vallang.IConstructor; import io.usethesource.vallang.IList; import io.usethesource.vallang.IValue; @@ -29,12 +31,12 @@ private DebugUtil() { public static String prodToString(IConstructor prod) { StringBuilder builder = new StringBuilder("'"); - IConstructor sort = (IConstructor) prod.get(0); - builder.append(quotedStringToPlain(String.valueOf(sort.get(0)))); + builder.append(quotedStringToPlain(ProductionAdapter.getSortName(prod))); builder.append(" ->"); if (prod.getName().equals("prod")) { + ProductionAdapter.getConstructorName(prod); IList children = (IList) prod.get(1); for (IValue child : children) { builder.append(" "); @@ -52,6 +54,10 @@ public static String prodToString(IConstructor prod) { } private static String quotedStringToPlain(String s) { + if (s.length() == 0) { + return s; + } + if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"') { return s.substring(1, s.length()-1).replace("\\", ""); } diff --git a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java index bbe3d573b90..c99537b0f91 100644 --- a/src/org/rascalmpl/parser/util/ParseStateVisualizer.java +++ b/src/org/rascalmpl/parser/util/ParseStateVisualizer.java @@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils; import org.rascalmpl.parser.gtd.SGTDBF; +import org.rascalmpl.parser.gtd.result.AbstractContainerNode; import org.rascalmpl.parser.gtd.result.AbstractNode; import org.rascalmpl.parser.gtd.result.CharNode; import org.rascalmpl.parser.gtd.result.EpsilonNode; @@ -42,6 +43,7 @@ import org.rascalmpl.parser.gtd.result.RecoveredNode; import org.rascalmpl.parser.gtd.result.SkippedNode; import org.rascalmpl.parser.gtd.result.SortContainerNode; +import org.rascalmpl.parser.gtd.result.struct.Link; import org.rascalmpl.parser.gtd.stack.AbstractStackNode; import org.rascalmpl.parser.gtd.stack.edge.EdgesSet; import org.rascalmpl.parser.gtd.util.ArrayList; @@ -164,6 +166,10 @@ public void visualizeProductionTrees(AbstractStackNode[] nodes) { writeGraph(createProductionGraph(nodes)); } + public void visualizeNode(AbstractNode node) { + writeGraph(createGraph(node)); + } + public int getFrame() { return frame; } @@ -194,6 +200,13 @@ private synchronized DotGraph createGraph(AbstractStackNode stackN return graph; } + private synchronized DotGraph createGraph(AbstractNode parserNode) { + reset(); + graph = new DotGraph(name, true); + addParserNodes(graph, parserNode); + return graph; + } + private DotGraph createGraph(DoubleArrayList, ArrayList> recoveryNodes) { reset(); graph = new DotGraph(name, true); @@ -368,8 +381,39 @@ private

DotNode createDotNode(AbstractStackNode

stackNode) { return node; } + private NodeId addParserNodes(DotGraph graph, AbstractNode parserNode) { + NodeId id = addParserNode(graph, parserNode); + if (parserNode instanceof AbstractContainerNode) { + @SuppressWarnings("unchecked") + AbstractContainerNode container = (AbstractContainerNode) parserNode; + Link link = container.getFirstAlternative(); + if (link != null) { + NodeId firstPrefix = addPrefixes(graph, link); + graph.addEdge(id, firstPrefix); + } + } + return id; + } + + private NodeId addPrefixes(DotGraph graph, Link link) { + NodeId id = addParserNodes(graph, link.getNode()); + ArrayList prefixes = link.getPrefixes(); + if (prefixes != null) { + for (int i=0; i void addTodoLists(SGTDBF parser, DotGraph graph) { DoubleStack, AbstractNode>[] todoLists = parser.getTodoLists(); int start = parser.getQueueIndex(); - DotNode todoListsNode = DotNode.createArrayNode(TODO_LISTS_ID, todoLists.length); + int todos = Math.min(todoLists.length, 50); + + DotNode todoListsNode = DotNode.createArrayNode(TODO_LISTS_ID, todos); - for (int tokenLength=1; tokenLength<=todoLists.length; tokenLength++) { + for (int tokenLength=1; tokenLength<=todos+1; tokenLength++) { int index = (start + tokenLength - 1) % todoLists.length; DoubleStack, AbstractNode> todoList = todoLists[index]; if (todoList != null && !todoList.isEmpty()) { From 4cc3928275f2569054a5823c96153f3632b0eed2 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 12 Oct 2024 11:39:25 +0200 Subject: [PATCH 04/10] Implemented recovery when the input is a valid prefix (or is so after recovering) --- .../tests/concrete/recovery/bugs/AfterEof.rsc | 16 ++ .../concrete/recovery/bugs/AfterEofInput.txt | 139 ++++++++++++++++++ src/org/rascalmpl/parser/gtd/SGTDBF.java | 62 ++++---- .../uptr/recovery/ToTokenRecoverer.java | 21 ++- 4 files changed, 197 insertions(+), 41 deletions(-) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc new file mode 100644 index 00000000000..005642a0985 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc @@ -0,0 +1,16 @@ +module lang::rascal::tests::concrete::recovery::bugs::AfterEof + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testEof() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt|; + input = readFile(source); + Tree t = recoveryParser(input, source); + println(""); +} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt new file mode 100644 index 00000000000..58323e83161 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt @@ -0,0 +1,139 @@ +@license{ + Copyright (c) 2009-2015 CWI + All rights reserved. This program and the accompanying materials + are made available under the terms of the Eclipse Public License v1.0 + which accompanies this distribution, and is available at + http://www.eclipse.org/legal/epl-v10.html +} +@contributor{Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI} +@contributor{Arnold Lankamp - Arnold.Lankamp@cwi.nl - CWI} +@contributor{Vadim Zaytsev - Vadim.Zaytsev@cwi.nl - CWI} + +@synopsis{A simple but effective internal format for the representation of context-free grammars.} +module Grammar + +extend ParseTree; + + +@synopsis{The Grammar datatype} +@description{ +Grammar is the internal representation (AST) of syntax definitions used in Rascal. +A grammar is a set of productions and set of start symbols. The productions are +stored in a map for efficient access. +} +data Grammar + = \grammar(set[Symbol] starts, map[Symbol sort, Production def] rules) + ; + +data GrammarModule + = \module(str name, set[str] imports, set[str] extends, Grammar grammar); + +data GrammarDefinition + = \definition(str main, map[str name, GrammarModule \mod] modules); + +public Grammar grammar(set[Symbol] starts, set[Production] prods) { + map[Symbol, Production] rules = (); + + for (p <- prods) { + t = (p.def is label) ? p.def.symbol : p.def; + + if (t in rules) { + if (choice(_, existing) := rules[t]) { + rules[t] = choice(t, existing + p); + } + else { + rules[t] = choice(t, {p, rules[t]}) + } + } + else { + rules[t] = choice(t, {p}); + } + } + return grammar(starts, rules); +} + +Grammar grammar(type[&T <: Tree] sym) + = grammar({sym.symbol}, sym.definitions); + + + +@synopsis{An item is an index into the symbol list of a production rule.} +data Item = item(Production production, int index); + + +@synopsis{Compose two grammars.} +@description{ +Compose two grammars by adding the rules of g2 to the rules of g1. +The start symbols of g1 will be the start symbols of the resulting grammar. +} +public Grammar compose(Grammar g1, Grammar g2) { + for (s <- g2.rules) + if (g1.rules[s]?) + g1.rules[s] = choice(s, {g1.rules[s], g2.rules[s]}); + else + g1.rules[s] = g2.rules[s]; + g1.starts += g2.starts; + + reduced_rules = (); + for(s <- g1.rules){ + c = g1.rules[s]; + if (c is choice) { + c.alternatives -= { *choices | priority(_, choices) <- c.alternatives } + + { *alts | associativity(_, _, alts) <- c.alternatives}; + } + reduced_rules[s] = c; + } + + return grammar(g1.starts, reduced_rules); +} + +// TODO:COMPILER +// The above code (temporarily?) replaces the following code +// Reason: the algorithm is faster and compiled code chokes in the set matches +// for not yet known reason. + +//public Grammar compose(Grammar g1, Grammar g2) { +// set[Production] empty = {}; +// for (s <- g2.rules) +// if (g1.rules[s]?) +// g1.rules[s] = choice(s, {g1.rules[s], g2.rules[s]}); +// else +// g1.rules[s] = g2.rules[s]; +// g1.starts += g2.starts; +// +// return innermost visit(g1) { +// case c:choice(_, {p, *r, Production x:priority(_,/p)}) => c[alternatives = {x, *r}] +// case c:choice(_, {p, *r, Production x:associativity(_,_,/p)}) => c[alternatives = {x, *r}] +// }; +//} + +@synopsis{Compute a relation from extender to extended module for the given grammar} +@description{ +Note that this relation is already transitively closed because that is the semantics of extend. +} +rel[str \module, str extended] extends(GrammarDefinition def) { + return { | m <- def.modules, \module(_, _, exts , _) := def.modules[m], e <- exts}+; +} + +@synopsis{Compute a relation from importer to imported modules for the given grammar} +rel[str \module, str imported] imports(GrammarDefinition def) { + return { | m <- def.modules, \module(_, imps, _ , _) := def.modules[m], i <- imps}; +} + +@synopsis{Compute which modules directly depend on which other modules.} +@description{ +This function computes dependencies via import and extend relations. Every module +X that imports Y or extends Y ends up in the result as . The extends relation +that we use is already transitively closed. Next to this we also add dependencies + for all modules X that import Y which extends Z. Because of the transitive +nature of module extension, a module that extends another module exposes all +rules to any importing module. +} +rel[str \module, str dependency] dependencies(GrammarDefinition def) { + imps = imports(def); + exts = extends(def); + + return imps + exts + imps o exts; +} + + diff --git a/src/org/rascalmpl/parser/gtd/SGTDBF.java b/src/org/rascalmpl/parser/gtd/SGTDBF.java index 30d5dd11ecd..8753376c6ef 100755 --- a/src/org/rascalmpl/parser/gtd/SGTDBF.java +++ b/src/org/rascalmpl/parser/gtd/SGTDBF.java @@ -936,27 +936,7 @@ private boolean findFirstStacksToReduce() { } } - if (recoverer != null) { - debugListener.reviving(input, location, unexpandableNodes, unmatchableLeafNodes, - unmatchableMidProductionNodes, filteredNodes); - visualize("Recovering", ParseStateVisualizer.ERROR_TRACKING_ID); - DoubleArrayList, AbstractNode> recoveredNodes = recoverer.reviveStacks(input, location, - unexpandableNodes, unmatchableLeafNodes, unmatchableMidProductionNodes, filteredNodes); - debugListener.revived(recoveredNodes); - if (recoveredNodes.size() > 0) { // TODO Do something with the revived node. Is this the right location to - // do this? - for (int i = 0; i < recoveredNodes.size(); i++) { - AbstractStackNode

recovered = recoveredNodes.getFirst(i); - queueMatchableNode(recovered, recovered.getLength(), recoveredNodes.getSecond(i)); - } - parseErrorRecovered = true; - return findStacksToReduce(); - } - - parseErrorEncountered = true; - } - - return false; + return attemptRecovery(); } /** @@ -984,7 +964,11 @@ private boolean findStacksToReduce() { } } - if (recoverer != null && location < input.length) { + return false; + } + + private boolean attemptRecovery() { + if (recoverer != null) { debugListener.reviving(input, location, unexpandableNodes, unmatchableLeafNodes, unmatchableMidProductionNodes, filteredNodes); visualize("Recovering", ParseStateVisualizer.ERROR_TRACKING_ID); @@ -1018,6 +1002,7 @@ private boolean findStacksToReduce() { return false; } + public boolean parseErrorHasOccurred() { return parseErrorEncountered; } @@ -1394,10 +1379,10 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] expand(); + AbstractContainerNode

result = null; if (findFirstStacksToReduce()) { boolean shiftedLevel = (location != 0); - - do { + while (true) { lookAheadChar = (location < input.length) ? input[location] : 0; if (shiftedLevel) { // Nullable fix for the first level. sharedNextNodes.clear(); @@ -1423,23 +1408,30 @@ protected AbstractNode parse(AbstractStackNode

startNode, URI inputURI, int[] while (!stacksWithNonTerminalsToReduce.isEmpty() || !stacksWithTerminalsToReduce.isEmpty()); shiftedLevel = true; + + if (!findStacksToReduce()) { + if (location == input.length) { + EdgesSet

startNodeEdgesSet = startNode.getIncomingEdges(); + int resultStoreId = getResultStoreId(startNode.getId()); + if (startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) { + result = startNodeEdgesSet.getLastResult(resultStoreId); // Success. + break; + } + } + if (!attemptRecovery()) { + // Unsuccessful parse + break; + } + } } - while (findStacksToReduce()); } visualize("Done", ParseStateVisualizer.PARSER_ID); - // Check if we were successful. - if (location == input.length) { - EdgesSet

startNodeEdgesSet = startNode.getIncomingEdges(); - int resultStoreId = getResultStoreId(startNode.getId()); - if (startNodeEdgesSet != null && startNodeEdgesSet.getLastVisitedLevel(resultStoreId) == input.length) { - // Parsing succeeded. - return startNodeEdgesSet.getLastResult(resultStoreId); // Success. - } + if (result != null) { + return result; } - } - finally { + } finally { checkTime("Parsing"); } diff --git a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java index 98b5e14490f..c4c0da3f011 100644 --- a/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java +++ b/src/org/rascalmpl/parser/uptr/recovery/ToTokenRecoverer.java @@ -74,10 +74,10 @@ public DoubleArrayList, AbstractNode> reviveStac collectUnexpandableNodes(unexpandableNodes, failedNodes); collectUnmatchableMidProductionNodes(location, unmatchableMidProductionNodes, failedNodes); - return reviveFailedNodes(input, failedNodes); + return reviveFailedNodes(input, location, failedNodes); } - private DoubleArrayList, AbstractNode> reviveNodes(int[] input, + private DoubleArrayList, AbstractNode> reviveNodes(int[] input, int location, DoubleArrayList, ArrayList> recoveryNodes) { DoubleArrayList, AbstractNode> recoveredNodes = new DoubleArrayList<>(); @@ -102,7 +102,7 @@ private DoubleArrayList, AbstractNode> reviveNod IConstructor prod = prods.get(j); List> skippingNodes = - findSkippingNodes(input, recoveryNode, prod, startLocation); + findSkippingNodes(input, location, recoveryNode, prod, startLocation); for (SkippingStackNode skippingNode : skippingNodes) { AbstractStackNode continuer = new RecoveryPointStackNode<>(stackNodeIdDispenser.dispenseId(), prod, recoveryNode); @@ -122,12 +122,19 @@ private DoubleArrayList, AbstractNode> reviveNod return recoveredNodes; } - private List> findSkippingNodes(int[] input, + private List> findSkippingNodes(int[] input, int location, AbstractStackNode recoveryNode, IConstructor prod, int startLocation) { List> nodes = new java.util.ArrayList<>(); SkippedNode result; + // If we are at the end of the input, skip nothing + if (location >= input.length) { + result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); + nodes.add(new SkippingStackNode<>(stackNodeIdDispenser.dispenseId(), prod, result, startLocation)); + return nodes; // No other nodes would be useful + } + // If we are the top-level node, just skip the rest of the input if (!recoveryNode.isEndNode() && isTopLevelProduction(recoveryNode)) { result = SkippingStackNode.createResultUntilEndOfInput(uri, input, startLocation); @@ -342,7 +349,9 @@ private AbstractStackNode getSinglePredecessor(AbstractStackNode, AbstractNode> reviveFailedNodes(int[] input, + private DoubleArrayList, AbstractNode> reviveFailedNodes( + int[] input, + int location, ArrayList> failedNodes) { DoubleArrayList, ArrayList> recoveryNodes = new DoubleArrayList<>(); @@ -359,7 +368,7 @@ private DoubleArrayList, AbstractNode> reviveFai findRecoveryNodes(failedNodes.get(i), recoveryNodes); } - return reviveNodes(input, recoveryNodes); + return reviveNodes(input, location, recoveryNodes); } private static void collectUnexpandableNodes(Stack> unexpandableNodes, From 1c1e6342497484382deb28d02f1280ea1caa40b5 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Sat, 12 Oct 2024 11:44:01 +0200 Subject: [PATCH 05/10] Added a file used to profile error disambiguation --- .../bugs/DisambiguationPerformanceTest.rsc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc new file mode 100644 index 00000000000..bc75db43f44 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/DisambiguationPerformanceTest.rsc @@ -0,0 +1,16 @@ +module lang::rascal::tests::concrete::recovery::bugs::DisambiguationPerformanceTest + + +import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; +import lang::rascal::\syntax::Rascal; +import ParseTree; +import IO; + +void testPerformance() { + standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); + recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); + loc source = |std:///lang/box/util/Box2Text.rsc|; + input = readFile(source); + FileStats stats = testDeleteUntilEol(standardParser, recoveryParser, source, input, 200, 100, begin=17496, end=17496); + println(""); +} From d34fc529a363be2f5fcbc993856e8d6ddb20f6f1 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 14 Oct 2024 08:25:06 +0200 Subject: [PATCH 06/10] Refactored tests, added stats, and removed obsolete function --- .../concrete/recovery/RascalRecoveryTests.rsc | 28 --------- .../concrete/recovery/RecoveryTestSupport.rsc | 62 ++++++++++++++----- .../rascalmpl/library/util/ErrorRecovery.rsc | 15 ----- 3 files changed, 48 insertions(+), 57 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc index 40cb84f792b..cc73efc103a 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RascalRecoveryTests.rsc @@ -25,34 +25,6 @@ import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; bool debugging = false; -Tree parseRascal(type[&T] t, str input, bool visualize=false) { - Tree result = parser(t, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"">|); - if (debugging) { - list[Tree] errors = findAllErrors(result); - if (errors != []) { - println("Tree has errors"); - for (error <- errors) { - println("- "); - } - - println("Best error: "); - } - } - - return result; -} - -bool checkSingleError(Tree t, str expected) { - list[Tree] errors = findBestErrors(t); - return size(errors) == 1 && getErrorText(getFirstFrom(errors)) == expected; -} - -Tree parseRascal(str input, bool visualize=false) = parseRascal(#start[Module], input, visualize=visualize); - -Tree parseFunctionDeclaration(str input, bool visualize=false) = parseRascal(#FunctionDeclaration, input, visualize=visualize); - -Tree parseStatement(str input, bool visualize=false) = parseRascal(#Statement, input, visualize=visualize); - test bool rascalOk() = checkRecovery(#start[Module], " module A diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 3bfe0288d80..2324d6f326d 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -23,7 +23,6 @@ import util::Benchmark; import Grammar; import analysis::statistics::Descriptive; import util::Math; -import util::Maybe; import Set; import List; @@ -31,15 +30,37 @@ import lang::rascal::grammar::definition::Modules; alias FrequencyTable = map[int val, int count]; -public data TestMeasurement(loc source=|unknown:///|, int duration=0) = successfulParse() | recovered(int errorSize=0) | parseError() | successfulDisambiguation(); -public data FileStats = fileStats(int totalParses = 0, int successfulParses=0, int successfulRecoveries=0, int successfulDisambiguations=0, int failedRecoveries=0, int parseErrors=0, int slowParses=0, FrequencyTable parseTimeRatios=()); - -public data TestStats = testStats(int filesTested=0, int testCount=0, FrequencyTable successfulParses=(), FrequencyTable successfulRecoveries=(), FrequencyTable successfulDisambiguations=(), FrequencyTable failedRecoveries=(), FrequencyTable parseErrors=(), FrequencyTable slowParses=(), FrequencyTable parseTimeRatios=()); +public data TestMeasurement(loc source=|unknown:///|, int duration=0) = successfulParse() | recovered(int errorCount=0, int errorSize=0) | parseError() | successfulDisambiguation(); +public data FileStats = fileStats( + int totalParses = 0, + int successfulParses=0, + int successfulRecoveries=0, + int successfulDisambiguations=0, + int failedRecoveries=0, + int parseErrors=0, + int slowParses=0, + FrequencyTable parseTimeRatios=(), + FrequencyTable errorCounts=(), + FrequencyTable errorSizes=()); + +public data TestStats = testStats( + int filesTested=0, + int testCount=0, + FrequencyTable successfulParses=(), + FrequencyTable successfulRecoveries=(), + FrequencyTable successfulDisambiguations=(), + FrequencyTable failedRecoveries=(), + FrequencyTable parseErrors=(), + FrequencyTable slowParses=(), + FrequencyTable parseTimeRatios=(), + FrequencyTable errorCounts=(), + FrequencyTable errorSizes=()); private TestMeasurement testRecovery(&T (value input, loc origin) standardParser, &T (value input, loc origin) recoveryParser, str input, loc source, loc statFile) { int startTime = 0; int duration = 0; int disambDuration = -1; + int errorCount = 0; int errorSize=0; str result = "?"; TestMeasurement measurement = successfulParse(); @@ -55,14 +76,15 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser Tree t = recoveryParser(input, source); int parseEndTime = realTime(); duration = parseEndTime - startTime; - Maybe[Tree] best = findBestError(t); + list[Tree] errors = findBestErrors(t); + errorCount = size(errors); disambDuration = realTime() - parseEndTime; result = "recovery"; - if (best == nothing()) { + if (errors == []) { measurement = successfulDisambiguation(source=source, duration=duration); } else { - errorSize = size(getErrorText(best.val)); - measurement = recovered(source=source, duration=duration, errorSize=errorSize); + errorSize = (0 | it + size(getErrorText(err)) | err <- errors); + measurement = recovered(source=source, duration=duration, errorCount=errorCount, errorSize=errorSize); } } catch ParseError(_): { result = "error"; @@ -72,7 +94,7 @@ private TestMeasurement testRecovery(&T (value input, loc origin) standardParser } if (statFile != |unknown:///|) { - appendToFile(statFile, ",,,,,\n"); + appendToFile(statFile, ",,,,,,\n"); } return measurement; @@ -89,8 +111,10 @@ FileStats updateStats(FileStats stats, TestMeasurement measurement, int referenc print("."); stats.successfulParses += 1; } - case recovered(errorSize=errorSize): { + case recovered(errorCount=errorCount, errorSize=errorSize): { stats.parseTimeRatios = increment(stats.parseTimeRatios, parseTimeRatio); + stats.errorCounts = increment(stats.errorCounts, errorCount); + stats.errorSizes = increment(stats.errorSizes, errorSize); if (errorSize <= recoverySuccessLimit) { print("+"); stats.successfulRecoveries += 1; @@ -128,7 +152,9 @@ FileStats mergeFileStats(FileStats stats1, FileStats stats2) { failedRecoveries = stats1.failedRecoveries + stats2.failedRecoveries, parseErrors = stats1.parseErrors + stats2.parseErrors, slowParses = stats1.slowParses + stats2.slowParses, - parseTimeRatios = mergeFrequencyTables(stats1.parseTimeRatios, stats2.parseTimeRatios) + parseTimeRatios = mergeFrequencyTables(stats1.parseTimeRatios, stats2.parseTimeRatios), + errorCounts = mergeFrequencyTables(stats1.errorCounts, stats2.errorCounts), + errorSizes = mergeFrequencyTables(stats1.errorSizes, stats2.errorSizes) ); } @@ -152,6 +178,8 @@ TestStats consolidateStats(TestStats cumulativeStats, FileStats fileStats) { cumulativeStats.parseErrors = increment(cumulativeStats.parseErrors, percentage(fileStats.parseErrors, totalFailed)); cumulativeStats.slowParses = increment(cumulativeStats.slowParses, percentage(fileStats.slowParses, totalFailed)); cumulativeStats.parseTimeRatios = mergeFrequencyTables(cumulativeStats.parseTimeRatios, fileStats.parseTimeRatios); + cumulativeStats.errorCounts = mergeFrequencyTables(cumulativeStats.errorCounts, fileStats.errorCounts); + cumulativeStats.errorSizes = mergeFrequencyTables(cumulativeStats.errorSizes, fileStats.errorSizes); cumulativeStats.filesTested += 1; cumulativeStats.testCount += fileStats.totalParses; @@ -181,6 +209,8 @@ TestStats mergeStats(TestStats stats, TestStats stats2) { stats.parseErrors = mergeFrequencyTables(stats.parseErrors, stats2.parseErrors); stats.slowParses = mergeFrequencyTables(stats.slowParses, stats2.slowParses); stats.parseTimeRatios = mergeFrequencyTables(stats.parseTimeRatios, stats2.parseTimeRatios); + stats.errorCounts = mergeFrequencyTables(stats.errorCounts, stats2.errorCounts); + stats.errorSizes = mergeFrequencyTables(stats.errorSizes, stats2.errorSizes); return stats; } @@ -260,6 +290,8 @@ void printFileStats(FileStats fileStats) { printStat("Slow parses", fileStats.slowParses, failedParses); printFrequencyTableHeader(); printFrequencyTableStats("Parse time ratios", fileStats.parseTimeRatios, unit = "log2(ratio)", printTotal=false); + printFrequencyTableStats("Parse error count", fileStats.errorCounts, unit="errors"); + printFrequencyTableStats("Error size", fileStats.errorSizes, unit="chars"); } void printFrequencyTableHeader() { @@ -272,7 +304,7 @@ void printFrequencyTableHeader() { println(right("total", statFieldWidth)); } -void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit = "%", bool printTotal=true) { +void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit = "%", bool printTotal=true, bool ignoreZero=false) { print(left(label + " ():", statLabelWidth)); int totalCount = (0 | it+frequencyTable[val] | val <- frequencyTable); @@ -299,7 +331,7 @@ void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit total += val*count; - if (count > medianCount) { + if ((val != 0 || !ignoreZero) && count > medianCount) { medianCount = count; median = val; } @@ -331,6 +363,8 @@ void printStats(TestStats stats) { printFrequencyTableStats("Parse errors", stats.parseErrors); printFrequencyTableStats("Slow parses", stats.slowParses); printFrequencyTableStats("Parse time ratios", stats.parseTimeRatios, unit = "log2/%", printTotal=false); + printFrequencyTableStats("Parse error counts", stats.errorCounts, unit = "errors", ignoreZero=true); + printFrequencyTableStats("Parse error sizes", stats.errorSizes, unit = "chars", ignoreZero=true); println(); } diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc index b1eaa07983a..5db022f5428 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.rsc +++ b/src/org/rascalmpl/library/util/ErrorRecovery.rsc @@ -16,7 +16,6 @@ import ParseTree; import String; -import util::Maybe; @synopsis{Check if a parse tree contains any error nodes, the result of error recovery.} bool hasErrors(Tree tree) = /appl(error(_, _, _), _) := tree; @@ -25,20 +24,6 @@ bool hasErrors(Tree tree) = /appl(error(_, _, _), _) := tree; @synopsis{Find all error productions in a parse tree. The list is created by an outermost visit of the parse tree so if an error tree contains other errors the outermost tree is returned first.} java list[Tree] findAllErrors(Tree tree); -@synopsis{Find the first production containing an error.} -Tree findFirstError(/err:appl(error(_, _, _), _)) = err; - -@synopsis{Find the best error from a tree containing errors. This function will fail if `tree` does not contain an error.} -Maybe[Tree] findBestError(Tree tree) { - Tree disambiguated = disambiguateErrors(tree); - if (/err:appl(error(_, _, _), _) := disambiguated) { - return just(err); - } - - // All errors have disappeared - return nothing(); -} - @synopsis{Disambiguate the error ambiguities in a tree and return the list of remaining errors. The list is created by an outermost visit of the parse tree so if an error tree contains other errors the outermost tree is returned first.} list[Tree] findBestErrors(Tree tree) = findAllErrors(disambiguateErrors(tree)); From 1d609d2fd55654ba8e55c5575cb759251cfa2953 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 14 Oct 2024 10:51:37 +0200 Subject: [PATCH 07/10] Replaced findBestError with findBestErrors --- .../concrete/recovery/ToyRascalRecoveryTests.rsc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc index ce291c5d819..fe213811137 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/ToyRascalRecoveryTests.rsc @@ -32,7 +32,7 @@ Tree parseToyRascal(str input, bool visualize=false) { println("- "); } - println("Best error: "); + println("Best error: "); } return result; @@ -44,16 +44,16 @@ test bool toyRascalOk() { } test bool toyRascalMissingOpenParen() { - Tree t = parseToyRascal("f){}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t).val) == ")"; + Tree t = parseToyRascal("f){}", visualize=false); + return hasErrors(t) && getErrorText(findBestErrors(t)[0]) == ")"; } test bool toyRascalMissingCloseParen() { - Tree t = parseToyRascal("f({}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t).val) == "("; + Tree t = parseToyRascal("f({}", visualize=false); + return hasErrors(t) && getErrorText(findBestErrors(t)[0]) == "("; } test bool toyRascalMissingIfBody() { - Tree t = parseToyRascal("f(){if(1){}}", visualize=true); - return hasErrors(t) && getErrorText(findBestError(t).val) == "}"; + Tree t = parseToyRascal("f(){if(1){}}", visualize=false); + return hasErrors(t) && getErrorText(findBestErrors(t)[0]) == "}"; } \ No newline at end of file From 3877bcd8612bbebfe74510b9e40f018caa1e9eb0 Mon Sep 17 00:00:00 2001 From: Pieter Olivier Date: Mon, 14 Oct 2024 11:09:28 +0200 Subject: [PATCH 08/10] Removed unused test files --- .../tests/concrete/recovery/bugs/AfterEof.rsc | 16 -- .../concrete/recovery/bugs/AfterEofInput.txt | 139 ------------------ 2 files changed, 155 deletions(-) delete mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc delete mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc deleted file mode 100644 index 005642a0985..00000000000 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEof.rsc +++ /dev/null @@ -1,16 +0,0 @@ -module lang::rascal::tests::concrete::recovery::bugs::AfterEof - - -import lang::rascal::tests::concrete::recovery::RecoveryTestSupport; -import lang::rascal::\syntax::Rascal; -import ParseTree; -import IO; - -void testEof() { - standardParser = parser(#start[Module], allowRecovery=false, allowAmbiguity=true); - recoveryParser = parser(#start[Module], allowRecovery=true, allowAmbiguity=true); - loc source = |std:///lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt|; - input = readFile(source); - Tree t = recoveryParser(input, source); - println(""); -} diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt deleted file mode 100644 index 58323e83161..00000000000 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/bugs/AfterEofInput.txt +++ /dev/null @@ -1,139 +0,0 @@ -@license{ - Copyright (c) 2009-2015 CWI - All rights reserved. This program and the accompanying materials - are made available under the terms of the Eclipse Public License v1.0 - which accompanies this distribution, and is available at - http://www.eclipse.org/legal/epl-v10.html -} -@contributor{Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI} -@contributor{Arnold Lankamp - Arnold.Lankamp@cwi.nl - CWI} -@contributor{Vadim Zaytsev - Vadim.Zaytsev@cwi.nl - CWI} - -@synopsis{A simple but effective internal format for the representation of context-free grammars.} -module Grammar - -extend ParseTree; - - -@synopsis{The Grammar datatype} -@description{ -Grammar is the internal representation (AST) of syntax definitions used in Rascal. -A grammar is a set of productions and set of start symbols. The productions are -stored in a map for efficient access. -} -data Grammar - = \grammar(set[Symbol] starts, map[Symbol sort, Production def] rules) - ; - -data GrammarModule - = \module(str name, set[str] imports, set[str] extends, Grammar grammar); - -data GrammarDefinition - = \definition(str main, map[str name, GrammarModule \mod] modules); - -public Grammar grammar(set[Symbol] starts, set[Production] prods) { - map[Symbol, Production] rules = (); - - for (p <- prods) { - t = (p.def is label) ? p.def.symbol : p.def; - - if (t in rules) { - if (choice(_, existing) := rules[t]) { - rules[t] = choice(t, existing + p); - } - else { - rules[t] = choice(t, {p, rules[t]}) - } - } - else { - rules[t] = choice(t, {p}); - } - } - return grammar(starts, rules); -} - -Grammar grammar(type[&T <: Tree] sym) - = grammar({sym.symbol}, sym.definitions); - - - -@synopsis{An item is an index into the symbol list of a production rule.} -data Item = item(Production production, int index); - - -@synopsis{Compose two grammars.} -@description{ -Compose two grammars by adding the rules of g2 to the rules of g1. -The start symbols of g1 will be the start symbols of the resulting grammar. -} -public Grammar compose(Grammar g1, Grammar g2) { - for (s <- g2.rules) - if (g1.rules[s]?) - g1.rules[s] = choice(s, {g1.rules[s], g2.rules[s]}); - else - g1.rules[s] = g2.rules[s]; - g1.starts += g2.starts; - - reduced_rules = (); - for(s <- g1.rules){ - c = g1.rules[s]; - if (c is choice) { - c.alternatives -= { *choices | priority(_, choices) <- c.alternatives } + - { *alts | associativity(_, _, alts) <- c.alternatives}; - } - reduced_rules[s] = c; - } - - return grammar(g1.starts, reduced_rules); -} - -// TODO:COMPILER -// The above code (temporarily?) replaces the following code -// Reason: the algorithm is faster and compiled code chokes in the set matches -// for not yet known reason. - -//public Grammar compose(Grammar g1, Grammar g2) { -// set[Production] empty = {}; -// for (s <- g2.rules) -// if (g1.rules[s]?) -// g1.rules[s] = choice(s, {g1.rules[s], g2.rules[s]}); -// else -// g1.rules[s] = g2.rules[s]; -// g1.starts += g2.starts; -// -// return innermost visit(g1) { -// case c:choice(_, {p, *r, Production x:priority(_,/p)}) => c[alternatives = {x, *r}] -// case c:choice(_, {p, *r, Production x:associativity(_,_,/p)}) => c[alternatives = {x, *r}] -// }; -//} - -@synopsis{Compute a relation from extender to extended module for the given grammar} -@description{ -Note that this relation is already transitively closed because that is the semantics of extend. -} -rel[str \module, str extended] extends(GrammarDefinition def) { - return { | m <- def.modules, \module(_, _, exts , _) := def.modules[m], e <- exts}+; -} - -@synopsis{Compute a relation from importer to imported modules for the given grammar} -rel[str \module, str imported] imports(GrammarDefinition def) { - return { | m <- def.modules, \module(_, imps, _ , _) := def.modules[m], i <- imps}; -} - -@synopsis{Compute which modules directly depend on which other modules.} -@description{ -This function computes dependencies via import and extend relations. Every module -X that imports Y or extends Y ends up in the result as . The extends relation -that we use is already transitively closed. Next to this we also add dependencies - for all modules X that import Y which extends Z. Because of the transitive -nature of module extension, a module that extends another module exposes all -rules to any importing module. -} -rel[str \module, str dependency] dependencies(GrammarDefinition def) { - imps = imports(def); - exts = extends(def); - - return imps + exts + imps o exts; -} - - From fc48b50fd90a21705a1094e75b6ca9ee9f3a7582 Mon Sep 17 00:00:00 2001 From: PieterOlivier <103408614+PieterOlivier@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:43:07 +0200 Subject: [PATCH 09/10] Update src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc Co-authored-by: sungshik <16154899+sungshik@users.noreply.github.com> --- .../lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 2324d6f326d..530e74c1222 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -331,7 +331,7 @@ void printFrequencyTableStats(str label, FrequencyTable frequencyTable, str unit total += val*count; - if ((val != 0 || !ignoreZero) && count > medianCount) { + if (!(val == 0 && ignoreZero) && count > medianCount) { medianCount = count; median = val; } From 10ce970f3f981ad3bfc908d35f1e664ecc49eeca Mon Sep 17 00:00:00 2001 From: PieterOlivier <103408614+PieterOlivier@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:43:15 +0200 Subject: [PATCH 10/10] Update src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc Co-authored-by: sungshik <16154899+sungshik@users.noreply.github.com> --- .../lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc index 530e74c1222..196643a9c33 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/recovery/RecoveryTestSupport.rsc @@ -494,7 +494,7 @@ bool checkErrors(Tree t, list[str] expectedErrors) { for (error <- errors) { str errorText = getErrorText(error); - if (!(errorText in expectedErrors)) { + if (errorText notin expectedErrors) { println("Unexpected error: "); println("All errors found:"); printErrors(errors);