diff --git a/plare/lexer.py b/plare/lexer.py index 99bc024..f7fa2f8 100644 --- a/plare/lexer.py +++ b/plare/lexer.py @@ -69,11 +69,13 @@ def lex(self, var: str, src: str) -> Generator[Token]: yield from token case _: var = token - newlines = matched.count("\n") - lineno += newlines - if newlines > 0: + matched_lines = matched.split("\n") + n_matched_new_lines = len(matched_lines) - 1 + lineno += n_matched_new_lines + if n_matched_new_lines > 0: offset = 0 - offset = len(matched) - matched.rfind("\n") + last_matched_line = matched_lines[-1] + offset += len(last_matched_line) break else: if len(src) == 0: diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 6a9aa30..45a4b4f 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -118,3 +118,27 @@ def test_lex_multiple_tokens_for_single_match(): assert isinstance(tokens[2], SPACE) assert tokens[2].lineno == 1 assert tokens[2].offset == 2 + + +class EOF(Token): + pass + + +def test_lex_end_of_file(): + lexer = Lexer( + { + "start": [ + (r"\d+", NUM), + (r"$", EOF), + ] + } + ) + tokens = list(lexer.lex("start", "123")) + assert len(tokens) == 2 + assert isinstance(tokens[0], NUM) + assert tokens[0].value == 123 + assert tokens[0].lineno == 1 + assert tokens[0].offset == 0 + assert isinstance(tokens[1], EOF) + assert tokens[1].lineno == 1 + assert tokens[1].offset == 3