From 4656c4be92f385f217c7f9e7a77fc33dae343d6e Mon Sep 17 00:00:00 2001 From: Renata Hodovan Date: Sat, 9 Dec 2023 22:28:25 +0100 Subject: [PATCH] Ignore the EOF rule EOF is not a real rule in ANTLR, it works as a parsing instruction to ensure to match the entire file. However, during generation it doesn't have any importance, hence it can be safely ignored. --- examples/fuzzer/HTMLGenerator.py | 8 ++----- grammarinator/tool/parser.py | 2 +- grammarinator/tool/processor.py | 6 ++--- .../codegen/GeneratorTemplate.py.jinja | 4 ---- tests/grammars/Eof.g4 | 22 ------------------- 5 files changed, 6 insertions(+), 36 deletions(-) delete mode 100644 tests/grammars/Eof.g4 diff --git a/examples/fuzzer/HTMLGenerator.py b/examples/fuzzer/HTMLGenerator.py index d334143..ad0b558 100644 --- a/examples/fuzzer/HTMLGenerator.py +++ b/examples/fuzzer/HTMLGenerator.py @@ -1,4 +1,4 @@ -# Generated by Grammarinator 23.7.post72+gccf98cc +# Generated by Grammarinator 23.7.post71+g23fe545 from math import inf from grammarinator.runtime import * @@ -37,9 +37,6 @@ def _style_sheet(self): def _endOfHtmlElement(self): pass - def EOF(self, parent=None): - return None - def HTML_COMMENT(self, parent=None): with UnlexerRuleContext(self, 'HTML_COMMENT', parent) as rule: current = rule.current @@ -727,10 +724,9 @@ def style(self, parent=None): _default_rule = htmlDocument - _immutable_rules = ('EOF', 'TAG_CLOSE', 'TAG_EQUALS', 'TAG_OPEN', 'TAG_SLASH', 'TAG_SLASH_CLOSE') + _immutable_rules = ('TAG_CLOSE', 'TAG_EQUALS', 'TAG_OPEN', 'TAG_SLASH', 'TAG_SLASH_CLOSE') _rule_sizes = { - 'EOF': RuleSize(0, 0), 'HTML_COMMENT': RuleSize(0, 0), 'HTML_CONDITIONAL_COMMENT': RuleSize(0, 0), 'XML_DECLARATION': RuleSize(0, 0), diff --git a/grammarinator/tool/parser.py b/grammarinator/tool/parser.py index f16fad2..d7d6bc2 100644 --- a/grammarinator/tool/parser.py +++ b/grammarinator/tool/parser.py @@ -168,7 +168,7 @@ def _antlr_to_grammarinator_tree(self, antlr_node, parser, visited=None): depth = max(depth, child_depth + 1) else: assert isinstance(antlr_node, TerminalNode), f'An ANTLR node must either be a ParserRuleContext or a TerminalNode but {antlr_node.__class__.__name__} was found.' - name, text = (parser.symbolicNames[antlr_node.symbol.type], antlr_node.symbol.text) if antlr_node.symbol.type != Token.EOF else ('EOF', '') + name, text = (parser.symbolicNames[antlr_node.symbol.type], antlr_node.symbol.text) assert name, f'{name} is None or empty' if not self._hidden: diff --git a/grammarinator/tool/processor.py b/grammarinator/tool/processor.py index b479bd3..8699049 100644 --- a/grammarinator/tool/processor.py +++ b/grammarinator/tool/processor.py @@ -989,7 +989,7 @@ def build_expr(node, parent_id): if '_dot' not in graph.vertices: # Create an artificial `_dot` rule with an alternation of all the lexer rules. parser_dot_id = graph.add_node(UnparserRuleNode(name='_dot', label=None)) - unlexer_ids = [v.name for vid, v in graph.vertices.items() if isinstance(v, UnlexerRuleNode) and v.id != 'EOF'] + unlexer_ids = [v.name for vid, v in graph.vertices.items() if isinstance(v, UnlexerRuleNode)] alt_id = graph.add_node(AlternationNode(rule_id=parser_dot_id, idx=0, conditions=[1] * len(unlexer_ids))) graph.add_edge(frm=parser_dot_id, to=alt_id) for i, lexer_id in enumerate(unlexer_ids): @@ -1033,7 +1033,8 @@ def build_expr(node, parent_id): elif isinstance(node, ANTLRv4Parser.TerminalContext): if node.TOKEN_REF(): - graph.add_edge(frm=parent_id, to=str(node.TOKEN_REF())) + if str(node.TOKEN_REF() != 'EOF'): + graph.add_edge(frm=parent_id, to=str(node.TOKEN_REF())) elif node.STRING_LITERAL(): src = unescape_string(str(node.STRING_LITERAL())[1:-1]) @@ -1136,7 +1137,6 @@ def build_rules(node): graph = GrammarGraph() lambda_id = graph.add_node(LambdaNode()) - graph.add_node(UnlexerRuleNode(name='EOF')) for root in [lexer_root, parser_root]: if root: diff --git a/grammarinator/tool/resources/codegen/GeneratorTemplate.py.jinja b/grammarinator/tool/resources/codegen/GeneratorTemplate.py.jinja index e0eaced..3ed0b38 100644 --- a/grammarinator/tool/resources/codegen/GeneratorTemplate.py.jinja +++ b/grammarinator/tool/resources/codegen/GeneratorTemplate.py.jinja @@ -146,7 +146,6 @@ class {{ graph.name }}({{ graph.superclass }}): {% for rule in graph.rules %} def {{ rule.id }}(self, {% for t, k, v in rule.args %}{{ k }}{% if t %}:{{ t }}{% endif %}{% if v %}={{ resolveVarRefs(v) }}{% endif %}, {% endfor %}parent=None): - {% if rule.id != 'EOF' %} {% if rule.labels or rule.args or rule.locals or rule.returns %} local_ctx = { {%- for _, k, _ in rule.args -%} @@ -169,9 +168,6 @@ class {{ graph.name }}({{ graph.superclass }}): current.{{ k }} = local_ctx['{{ k }}'] {% endfor %} return current - {% else %} - return None - {% endif %} {% endfor %} diff --git a/tests/grammars/Eof.g4 b/tests/grammars/Eof.g4 deleted file mode 100644 index 099bc42..0000000 --- a/tests/grammars/Eof.g4 +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2017-2023 Renata Hodovan, Akos Kiss. - * - * Licensed under the BSD 3-Clause License - * . - * This file may not be copied, modified, or distributed except - * according to those terms. - */ - -/* - * This test checks whether EOF token is handled properly. It should be - * available without being declared. - */ - -// TEST-PROCESS: {grammar}.g4 -o {tmpdir} -// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r start -j 1 -o {tmpdir}/{grammar}%d.txt -// TEST-ANTLR: {grammar}.g4 -o {tmpdir} -// TEST-REPARSE: -p {grammar}Parser -l {grammar}Lexer -r start {tmpdir}/{grammar}%d.txt - -grammar Eof; - -start: 'pass' EOF;