Skip to content

Commit

Permalink
Ensure that alternatives and quantifiers within recurring labeled alt…
Browse files Browse the repository at this point in the history
…ernatives are numbered uniquely (#240)

Until now, the indices of alternatives and quantifiers were
restarted from 0 within every function of a recurring labeled
alternative (i.e., in `rule_Label_1`, `rule_Label_2`, etc.). This
made alternatives and quantifiers within recurring labeled
alternatives indistinguishable from each other when it comes to
decision models.

This commit changes this by making the indices run continuously
through functions belonging to the same label.
  • Loading branch information
akosthekiss authored Oct 28, 2024
1 parent f3c0d14 commit b10b085
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 18 deletions.
30 changes: 13 additions & 17 deletions grammarinator/tool/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,6 @@ def isfloat(s):

def build_rule(rule, node):
lexer_rule = isinstance(rule, UnlexerRuleNode)
alt_idx, quant_idx, chr_idx = 0, 0, 0 # pylint: disable=unused-variable

def build_expr(node, parent_id):
if isinstance(node, ANTLRv4Parser.ParserRuleSpecContext):
Expand Down Expand Up @@ -903,16 +902,15 @@ def build_expr(node, parent_id):
build_expr(children[0], parent_id)
return

nonlocal alt_idx
conditions = [find_conditions(child) for child in children]
labels = [str(child.identifier().TOKEN_REF() or child.identifier().RULE_REF()) for child in children if child.identifier()] if isinstance(node, ANTLRv4Parser.RuleAltListContext) else []
# Ensure to start labels with capital letter, since ANTLR will also create a context with capital start character.
# It's important to keep them in sync since grammarinator-parse will use this graph for comparison.
labels = [label[0].upper() + label[1:] for label in labels]
recurring_labels = {name for name, cnt in Counter(labels).items() if cnt > 1}
assert len(labels) == 0 or len(labels) == len(children)
alt_id = graph.add_node(AlternationNode(idx=alt_idx, conditions=append_unique(graph.alt_conds, conditions) if all(isfloat(cond) for cond in conditions) else conditions, rule_id=rule.id))
alt_idx += 1
alt_id = graph.add_node(AlternationNode(idx=alt_idx[rule.name], conditions=append_unique(graph.alt_conds, conditions) if all(isfloat(cond) for cond in conditions) else conditions, rule_id=rule.id))
alt_idx[rule.name] += 1
graph.add_edge(frm=parent_id, to=alt_id)

for i, child in enumerate(children):
Expand Down Expand Up @@ -977,11 +975,10 @@ def build_expr(node, parent_id):
build_expr(node.children[0], parent_id)
return

nonlocal quant_idx
suffix = str(suffix.children[0])
quant_ranges = {'?': {'start': 0, 'stop': 1}, '*': {'start': 0, 'stop': 'inf'}, '+': {'start': 1, 'stop': 'inf'}}
quant_id = graph.add_node(QuantifierNode(rule_id=rule.id, idx=quant_idx, **quant_ranges[suffix]))
quant_idx += 1
quant_id = graph.add_node(QuantifierNode(rule_id=rule.id, idx=quant_idx[rule.name], **quant_ranges[suffix]))
quant_idx[rule.name] += 1
graph.add_edge(frm=parent_id, to=quant_id)
build_expr(node.children[0], quant_id)

Expand All @@ -1001,12 +998,10 @@ def build_expr(node, parent_id):
graph.add_edge(frm=parent_id, to=str(node.RULE_REF()), args=parse_arg_action_block(node, 'call') if actions else None)

elif isinstance(node, (ANTLRv4Parser.LexerAtomContext, ANTLRv4Parser.AtomContext)):
nonlocal chr_idx

if node.DOT():
if isinstance(node, ANTLRv4Parser.LexerAtomContext):
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx, charset=dot_charset)))
chr_idx += 1
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx[rule.name], charset=dot_charset)))
chr_idx[rule.name] += 1
else:
if '_dot' not in graph.vertices:
# Create an artificial `_dot` rule with an alternation of all the lexer rules.
Expand All @@ -1029,26 +1024,26 @@ def build_expr(node, parent_id):
not_ranges.extend(chars_from_set(set_element))

charset = unique_charset(multirange_diff(graph.charsets[dot_charset], sorted(not_ranges, key=lambda x: x[0])))
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx, charset=charset)))
chr_idx += 1
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx[rule.name], charset=charset)))
chr_idx[rule.name] += 1

elif isinstance(node, ANTLRv4Parser.LexerAtomContext) and node.characterRange():
start, end = character_range_interval(node)
if lexer_rule:
rule.start_ranges.append((start, end))

charset = unique_charset([(start, end)])
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx, charset=charset)))
chr_idx += 1
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx[rule.name], charset=charset)))
chr_idx[rule.name] += 1

elif isinstance(node, ANTLRv4Parser.LexerAtomContext) and node.LEXER_CHAR_SET():
ranges = lexer_charset_interval(str(node.LEXER_CHAR_SET())[1:-1])
if lexer_rule:
rule.start_ranges.extend(ranges)

charset = unique_charset(sorted(ranges, key=lambda x: x[0]))
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx, charset=charset)))
chr_idx += 1
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.id, idx=chr_idx[rule.name], charset=charset)))
chr_idx[rule.name] += 1

for child in node.children:
build_expr(child, parent_id)
Expand Down Expand Up @@ -1168,6 +1163,7 @@ def build_rules(node):
dot_charset = unique_charset(dot_ranges[graph.dot])

literal_lookup = {}
alt_idx, quant_idx, chr_idx = Counter(), Counter(), Counter()

for root in [lexer_root, parser_root]:
if root:
Expand Down
44 changes: 44 additions & 0 deletions tests/grammars/RecurringLabeledAlternatives.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) 2024 Renata Hodovan, Akos Kiss.
*
* Licensed under the BSD 3-Clause License
* <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
* This file may not be copied, modified, or distributed except
* according to those terms.
*/

/*
* This test checks whether alternatives and quantifiers within recurring
* labeled alternatives are numbered uniquely.
*/

// TEST-PROCESS: {grammar}.g4 -o {tmpdir}
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r start -m {grammar}Generator.CustomModel -j 1 -o {tmpdir}/{grammar}%d.txt

grammar RecurringLabeledAlternatives;

@header {
from grammarinator.runtime import DefaultModel
class CustomModel(DefaultModel):
def choice(self, node, idx, weights):
assert node.name in ['start', 'start_Binary'], node.name
if node.name == 'start_Binary':
assert idx == 1, idx
return super().choice(node, idx, weights)
def quantify(self, node, idx, cnt, start, stop):
assert node.name == 'start_Binary', node.name
assert idx == 1, idx
return super().quantify(node, idx, cnt, start, stop)
}

start
: {0}? ID (('+' | '-') ID)+ # Binary
| {0}? ('++' | '--') ID # Unary
| ID (('*'|'/') ID)+ # Binary
| {0}? ID ('++' | '--') # Unary
;

ID : [a-z] ;
2 changes: 1 addition & 1 deletion tests/parser/exp4.grtj
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"t": "p", "n": "start", "c": [{"t": "a", "ai": 0, "i": 1, "c": [{"t": "p", "n": "start_Quantifiers_test", "c": [{"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1]}, {"t": "q", "i": 0, "b": 0, "e": 1, "c": [{"t": "qd", "c": [{"t": "a", "ai": 0, "i": 0, "c": [{"t": "l", "n": "<INVALID>", "s": "?", "z": [1, 1]}]}]}]}]}, {"t": "q", "i": 0, "b": 1, "e": Infinity, "c": [{"t": "qd", "c": [{"t": "l", "n": "<INVALID>", "s": " | ", "z": [1, 1]}, {"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1]}, {"t": "q", "i": 0, "b": 0, "e": 1, "c": []}]}]}]}]}]}]}
{"t": "p", "n": "start", "c": [{"t": "a", "ai": 0, "i": 1, "c": [{"t": "p", "n": "start_Quantifiers_test", "c": [{"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1]}, {"t": "q", "i": 0, "b": 0, "e": 1, "c": [{"t": "qd", "c": [{"t": "a", "ai": 0, "i": 0, "c": [{"t": "l", "n": "<INVALID>", "s": "?", "z": [1, 1]}]}]}]}]}, {"t": "q", "i": 1, "b": 1, "e": Infinity, "c": [{"t": "qd", "c": [{"t": "l", "n": "<INVALID>", "s": " | ", "z": [1, 1]}, {"t": "p", "n": "element", "c": [{"t": "l", "n": "<INVALID>", "s": "pass", "z": [1, 1]}, {"t": "q", "i": 0, "b": 0, "e": 1, "c": []}]}]}]}]}]}]}

0 comments on commit b10b085

Please sign in to comment.