From 501eb7736759628be80b73ee28dbece671e2c491 Mon Sep 17 00:00:00 2001 From: MegaIng Date: Thu, 16 May 2024 15:25:05 +0200 Subject: [PATCH] Fix issue #1414 by correctly putting patterns into non-capturing groups --- lark/load_grammar.py | 6 +++--- tests/test_grammar.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 362a845d2..2cc7a81f3 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -625,7 +625,7 @@ def expansion(self, items: List[Pattern]) -> Pattern: if len(items) == 1: return items[0] - pattern = ''.join(i.to_regexp() for i in items) + pattern = ''.join(f'(?:{i.to_regexp()})' for i in items) return _make_joined_pattern(pattern, {i.flags for i in items}) def expansions(self, exps: List[Pattern]) -> Pattern: @@ -636,7 +636,7 @@ def expansions(self, exps: List[Pattern]) -> Pattern: # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match) exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value))) - pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps)) + pattern = '(?:%s)' % ('|'.join(f'(?:{i.to_regexp()})' for i in exps)) return _make_joined_pattern(pattern, {i.flags for i in exps}) def expr(self, args) -> Pattern: @@ -652,7 +652,7 @@ def expr(self, args) -> Pattern: op = "{%d,%d}" % (mn, mx) else: assert len(args) == 2 - return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) + return PatternRE(f'(?:{inner.to_regexp()}){op}', inner.flags) def maybe(self, expr): return self.expr(expr + ['?']) diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 624b0799a..737e08651 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -257,6 +257,19 @@ def test_ranged_repeat_large(self): self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190) self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192) + def test_term_combine(self): + # Issue #1414 + g = """ + start: START + START: A B C + A: "a" + B: "b" + C: /c|d/ + """ + l = Lark(g, parser='lalr') + self.assertEqual(l.parse('abc'), Tree('start', ['abc'])) + self.assertEqual(l.parse('abd'), Tree('start', ['abd'])) + def test_large_terminal(self): g = "start: NUMBERS\n" g += "NUMBERS: " + '|'.join('"%s"' % i for i in range(0, 1000))