Skip to content

Commit

Permalink
bmw-software-engineering#43 Improve performance
Browse files Browse the repository at this point in the history
Replace the char classification functions with more efficient, but
equivalent, implementations.

This reduces token() runtime from 18.2s to 15.1 which is a 17%
improvement.
  • Loading branch information
florianschanda committed Oct 23, 2023
1 parent 5b0f823 commit d08ad1d
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ generated in the following situations:

### 1.2.3-dev

* [TRLC] Various performance improvements when parsing large files.

* [TRLC] Add `--version` flag that can be used to figure out the
installed TRLC version.

Expand Down
62 changes: 62 additions & 0 deletions tests-unit/test_lexer_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import unittest
import re

from trlc.errors import Message_Handler
from trlc.lexer import Lexer_Base


class Potato(Lexer_Base):
def file_location(self):
pass

def token(self):
pass


class Test_Lexer_Base(unittest.TestCase):
def setUp(self):
self.lexer = Potato(mh = Message_Handler(),
content = "")
self.test_range = 0xffff

def tearDown(self):
pass

@staticmethod
def reference_is_alpha(char):
assert isinstance(char, str) and len(char) == 1
return ord('a') <= ord(char) <= ord('z') or \
ord('A') <= ord(char) <= ord('Z')

@staticmethod
def reference_is_numeric(char):
assert isinstance(char, str) and len(char) == 1
return ord('0') <= ord(char) <= ord('9')

@staticmethod
def reference_is_alnum(char):
assert isinstance(char, str) and len(char) == 1
return ord('a') <= ord(char) <= ord('z') or \
ord('A') <= ord(char) <= ord('Z') or \
ord('0') <= ord(char) <= ord('9')

def testIsAlpha(self):
for i in range(self.test_range):
c = chr(i)
self.assertEqual(self.reference_is_alpha(c),
self.lexer.is_alpha(c),
"mismatch for codepoint %u (%s)" % (i, repr(c)))

def testIsDigit(self):
for i in range(self.test_range):
c = chr(i)
self.assertEqual(self.reference_is_numeric(c),
self.lexer.is_numeric(c),
"mismatch for codepoint %u (%s)" % (i, repr(c)))

def testIsAlnum(self):
for i in range(self.test_range):
c = chr(i)
self.assertEqual(self.reference_is_alnum(c),
self.lexer.is_alnum(c),
"mismatch for codepoint %u (%s)" % (i, repr(c)))
12 changes: 3 additions & 9 deletions trlc/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,25 +201,19 @@ def __init__(self, mh, content):
def is_alpha(char):
# lobster-trace: LRM.Identifier
# lobster-trace: LRM.Builtin_Identifier
assert isinstance(char, str) and len(char) == 1
return ord('a') <= ord(char) <= ord('z') or \
ord('A') <= ord(char) <= ord('Z')
return char.isascii() and char.isalpha()

@staticmethod
def is_numeric(char):
# lobster-trace: LRM.Integers
# lobster-trace: LRM.Decimals
assert isinstance(char, str) and len(char) == 1
return ord('0') <= ord(char) <= ord('9')
return char.isascii() and char.isdigit()

@staticmethod
def is_alnum(char):
# lobster-trace: LRM.Identifier
# lobster-trace: LRM.Builtin_Identifier
assert isinstance(char, str) and len(char) == 1
return ord('a') <= ord(char) <= ord('z') or \
ord('A') <= ord(char) <= ord('Z') or \
ord('0') <= ord(char) <= ord('9')
return char.isascii() and char.isalnum()

@abstractmethod
def file_location(self):
Expand Down

0 comments on commit d08ad1d

Please sign in to comment.