From e1929178944dca3712dc7c1d5e0fe28a3c8ade3c Mon Sep 17 00:00:00 2001 From: David Chu Date: Sat, 13 Jul 2024 18:40:12 -0400 Subject: [PATCH] Change main file location --- .github/workflows/test.yml | 3 +- examples/invalid/bad_token.lol | 5 +++ src/compiler/lexer/lol_lexer.py | 15 ++++++--- src/compiler/lexer/lol_lexer_types.py | 44 +++++++++++++++++++++++++-- src/{compiler => }/main.py | 0 test/compiler/test_lolc.py | 2 +- 6 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 examples/invalid/bad_token.lol rename src/{compiler => }/main.py (100%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 09567fa..e25f6ff 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,10 +17,9 @@ jobs: - name: Compile run: | # For some reason, if we change to the compiler directory, Python complains. - export PYTHONPATH="${PYTHONPATH}:/home/runner/work/dolang/dolang/src/" for x in fibonacci helloworld math_ops nested_if sum_three do - python src/compiler/lol.py -i examples/$x.lol -o results + python src/main.py -i examples/$x.lol -o results gcc results/$x-*.c ./a.out done diff --git a/examples/invalid/bad_token.lol b/examples/invalid/bad_token.lol new file mode 100644 index 0000000..05e3b9f --- /dev/null +++ b/examples/invalid/bad_token.lol @@ -0,0 +1,5 @@ +/* Bad token */ +function main() -> i32 { + let a: i32 = 1 ++ 2; + return 0; +} diff --git a/src/compiler/lexer/lol_lexer.py b/src/compiler/lexer/lol_lexer.py index 3704cd4..b2fc911 100644 --- a/src/compiler/lexer/lol_lexer.py +++ b/src/compiler/lexer/lol_lexer.py @@ -206,13 +206,11 @@ def _is_punctuation_implemented(token_type: LolTokenType) -> bool: and len(token_type.value) >= 2 and token_type.value[1] in { - LolTokenType.NOT_YET_IMPLEMENTED, - LolTokenType.WONT_BE_IMPLEMENTED, + LolTokenType.NOT_YET_IMPLEMENTED.value, + LolTokenType.WONT_BE_IMPLEMENTED.value, } ): - raise NotImplementedError( - f"token_type {token_type.n} not implemented" - ) + return False return True @staticmethod @@ -246,6 +244,13 @@ def lex_punctuation(stream: CharacterStream): ) if not Lexer._is_punctuation_implemented(token_type): + err = LolError( + stream.get_text(), + start_pos, + start_pos + len(lexeme), + "unimplemented token", + ) + print(err) raise NotImplementedError return LolToken( diff --git a/src/compiler/lexer/lol_lexer_types.py b/src/compiler/lexer/lol_lexer_types.py index eaf2f8e..a351734 100644 --- a/src/compiler/lexer/lol_lexer_types.py +++ b/src/compiler/lexer/lol_lexer_types.py @@ -98,6 +98,44 @@ class LolTokenType(Enum): NOT = auto() +# UNIMPLEMENTED_TOKEN_TYPES: set[LolTokenType] = { +# # Unimplemented in tokenizer +# EXCLAMATION, # ! +# AT, # @ +# PERCENT, # % +# CIRCUMFLEX, # ^ +# AMPERSAND, # & +# QUESTION, # ? +# VBAR, # | +# # Doubled characters +# RSHIFT, # >> +# LSHIFT, # << +# GREATER_EQUAL, # >= +# LESSER_EQUAL, # <= +# EQUAL_EQUAL, # == +# NOT_EQUAL, # != +# # Unimplemented in tokenizer (no plan to implement these yet) +# STAR_STAR, # ** +# PLUS_PLUS, # ++ +# MINUS_MINUS, # -- +# SLASH_SLASH, # // +# # COLON_EQUAL = auto() # := +# # STAR_EQUAL = WONT_BE_IMPLEMENTED # *= +# # PLUS_EQUAL = WONT_BE_IMPLEMENTED # += +# # MINUS_EQUAL = WONT_BE_IMPLEMENTED # -= +# # SLASH_EQUAL = WONT_BE_IMPLEMENTED # /= +# # RSHIFT_EQUAL = WONT_BE_IMPLEMENTED # >>= +# # LSHIFT_EQUAL = WONT_BE_IMPLEMENTED # <<= +# # PERCENT_EQUAL = WONT_BE_IMPLEMENTED # %= +# # CIRCUMFLEX_EQUAL = WONT_BE_IMPLEMENTED # ^= +# # AMPERSAND_EQUAL = WONT_BE_IMPLEMENTED # &= +# # QUESTION_EQUAL = WONT_BE_IMPLEMENTED # ?= +# # VBAR_EQUAL = WONT_BE_IMPLEMENTED # |= +# # AT_EQUAL = WONT_BE_IMPLEMENTED # @= +# # BSLASH = auto(), WONT_BE_IMPLEMENTED # \ +# } + + SYMBOL_CONTROL: Dict[Optional[str], Union[Dict, LolTokenType]] = { "(": {None: LolTokenType.LPAREN}, ")": {None: LolTokenType.RPAREN}, @@ -109,7 +147,7 @@ class LolTokenType(Enum): ".": {None: LolTokenType.DOT}, ";": {None: LolTokenType.SEMICOLON}, "?": {None: LolTokenType.QUESTION}, - "|": {None: LolTokenType.QUESTION}, + "|": {None: LolTokenType.VBAR}, "&": {None: LolTokenType.AMPERSAND}, "^": {None: LolTokenType.CIRCUMFLEX}, "@": {None: LolTokenType.AT}, @@ -155,7 +193,7 @@ class LolTokenType(Enum): } -class Token: +class LolToken: def __init__( self, lexeme: str, @@ -185,7 +223,7 @@ def get_token_type_as_str(self): def __repr__(self): """Pretty print the token. This is NOT for serialization, because the token type should be an integer id so that it's easier to parse.""" - return f"Token(lexeme={repr(self.lexeme)}, token_type={self.get_token_type_as_str()}, start_idx={self.start_position}, full_text?={isinstance(self.full_text, str)})" + return f"LolToken(lexeme={repr(self.lexeme)}, token_type={self.get_token_type_as_str()}, start_idx={self.start_position}, full_text?={isinstance(self.full_text, str)})" def get_line_and_column_numbers(self) -> Optional[Tuple[int, int]]: if self.start_position is None or self.full_text is None: diff --git a/src/compiler/main.py b/src/main.py similarity index 100% rename from src/compiler/main.py rename to src/main.py diff --git a/test/compiler/test_lolc.py b/test/compiler/test_lolc.py index 13a812a..507e773 100644 --- a/test/compiler/test_lolc.py +++ b/test/compiler/test_lolc.py @@ -2,7 +2,7 @@ from common import add_compiler_to_sys_path add_compiler_to_sys_path() -from compiler.main import LolModule +from main import LolModule def lol_compile(input_file: str, output_dir: str = "results"):