-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Refactor parser * Create analyzer * Create emitter
- Loading branch information
1 parent
3c7b15c
commit 8da9aba
Showing
27 changed files
with
3,246 additions
and
949 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
venv\Scripts\activate.bat | ||
black prototype |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Light Operational Language | ||
|
||
Formerly `Light Object Language`, but then I decided that I wasn't going to use | ||
OOP patterns. | ||
|
||
## Intent | ||
|
||
Create a transpiler that can rewrite a modern language to C. | ||
|
||
There are many limiting things in C89 and C99 that can be made by a more | ||
intelligent preprocessing step. This is it. | ||
|
||
## Eventual Features | ||
|
||
In no particular order, here are some fun features that I may add. | ||
|
||
1. Generics. | ||
2. Traits/Interfaces. | ||
3. Lambdas/Closure. (???) | ||
4. Borrow checker. (???) | ||
5. Closures. (???) | ||
|
||
## Ecosystem | ||
|
||
1. VS Code extension with syntax highlighting. | ||
2. Bootstrap this into its own language. What's nice is that once we write it, | ||
it will generate C code so we can bootstrap it somewhat continuously (if you | ||
know what I mean... I wasn't very clear). |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# from typing import Dict, Union | ||
# | ||
# from analyzer.lol_analyzer_types import TypeDef, FunctionDef | ||
# | ||
# | ||
# def create_builtin_type(): | ||
# namespace_type = TypeDef("namespace") | ||
# int_type = TypeDef("int") | ||
# bool_type = TypeDef("bool") | ||
# float_type = TypeDef("float") | ||
# str_type = TypeDef("str") | ||
# | ||
# # Create int ops | ||
# int_type.add_binop("+", int_type, int_type) | ||
# int_type.add_binop("-", int_type, int_type) | ||
# int_type.add_binop("*", int_type, int_type) | ||
# int_type.add_binop("/", int_type, int_type) | ||
# | ||
# int_type.add_binop("<", int_type, bool_type) | ||
# int_type.add_binop("<=", int_type, bool_type) | ||
# int_type.add_binop("==", int_type, bool_type) | ||
# int_type.add_binop("!=", int_type, bool_type) | ||
# int_type.add_binop(">=", int_type, bool_type) | ||
# int_type.add_binop(">", int_type, bool_type) | ||
# | ||
# BUILTINS: Dict[str, Union[TypeDef, FunctionDef]] = { | ||
# "int": TypeDef("int") | ||
# } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"""This file aids with the libc libraries.""" | ||
|
||
|
||
class CLibrary: | ||
def __init__(self, name): | ||
pass | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
""" | ||
Add to list of used symbols (up to and including C99 standard). | ||
TODO | ||
---- | ||
1. Each symbol from C should have: | ||
a. its source, | ||
b. its type, | ||
c. etc. | ||
attached to make it easy to add to the symbol table. | ||
""" | ||
from enum import Enum, auto, unique | ||
from typing import Dict | ||
|
||
|
||
@unique | ||
class SymbolSource(Enum): | ||
# C Language | ||
C_BUILTIN = auto() | ||
C_STDLIB = auto() | ||
C_STDIO = auto() | ||
# LOL | ||
LOL_BUILTIN = auto() | ||
# User | ||
USER = auto() | ||
|
||
|
||
@unique | ||
class CSymbolType(Enum): | ||
MACRO = auto() | ||
TYPE = auto() | ||
CONSTANT = auto() | ||
FUNCTION = auto() | ||
OTHER = auto() | ||
|
||
|
||
################################################################################ | ||
### LANGUAGE KEYWORDS | ||
################################################################################ | ||
_C89_KEYWORDS: Dict[str, SymbolSource] = { | ||
"auto": SymbolSource.C_BUILTIN, | ||
"break": SymbolSource.C_BUILTIN, | ||
"case": SymbolSource.C_BUILTIN, | ||
"char": SymbolSource.C_BUILTIN, | ||
"const": SymbolSource.C_BUILTIN, | ||
"continue": SymbolSource.C_BUILTIN, | ||
"default": SymbolSource.C_BUILTIN, | ||
"do": SymbolSource.C_BUILTIN, | ||
"double": SymbolSource.C_BUILTIN, | ||
"else": SymbolSource.C_BUILTIN, | ||
"enum": SymbolSource.C_BUILTIN, | ||
"extern": SymbolSource.C_BUILTIN, | ||
"float": SymbolSource.C_BUILTIN, | ||
"for": SymbolSource.C_BUILTIN, | ||
"goto": SymbolSource.C_BUILTIN, | ||
"if": SymbolSource.C_BUILTIN, | ||
"int": SymbolSource.C_BUILTIN, | ||
"long": SymbolSource.C_BUILTIN, | ||
"register": SymbolSource.C_BUILTIN, | ||
"return": SymbolSource.C_BUILTIN, | ||
"short": SymbolSource.C_BUILTIN, | ||
"signed": SymbolSource.C_BUILTIN, | ||
"sizeof": SymbolSource.C_BUILTIN, | ||
"static": SymbolSource.C_BUILTIN, | ||
"struct": SymbolSource.C_BUILTIN, | ||
"switch": SymbolSource.C_BUILTIN, | ||
"typedef": SymbolSource.C_BUILTIN, | ||
"union": SymbolSource.C_BUILTIN, | ||
"unsigned": SymbolSource.C_BUILTIN, | ||
"void": SymbolSource.C_BUILTIN, | ||
"volatile": SymbolSource.C_BUILTIN, | ||
"while": SymbolSource.C_BUILTIN, | ||
} | ||
_C99_KEYWORDS: Dict[str, SymbolSource] = { | ||
"inline": SymbolSource.C_BUILTIN, | ||
"restrict": SymbolSource.C_BUILTIN, | ||
"_Bool": SymbolSource.C_BUILTIN, | ||
"_Complex": SymbolSource.C_BUILTIN, | ||
"_Imaginary": SymbolSource.C_BUILTIN, | ||
} | ||
C_KEYWORDS = {**_C89_KEYWORDS, **_C99_KEYWORDS} | ||
|
||
|
||
################################################################################ | ||
### LANGUAGE KEYWORDS | ||
################################################################################ | ||
C_STDIO_KEYWORDS: Dict[str, SymbolSource] = { | ||
# According to https://www.tutorialspoint.com/c_standard_library/stdio_h.htm | ||
# Types | ||
"size_t": SymbolSource.C_STDIO, | ||
"FILE": SymbolSource.C_STDIO, | ||
"fpost_t": SymbolSource.C_STDIO, | ||
# Macros | ||
"NULL": SymbolSource.C_STDIO, | ||
"_IOFBF": SymbolSource.C_STDIO, | ||
"_IOLBF": SymbolSource.C_STDIO, | ||
"_IONBF": SymbolSource.C_STDIO, | ||
"BUFSIZ": SymbolSource.C_STDIO, | ||
"EOF": SymbolSource.C_STDIO, | ||
"FOPEN_MAX": SymbolSource.C_STDIO, | ||
"FILENAME_MAX": SymbolSource.C_STDIO, | ||
"L_tmpnam": SymbolSource.C_STDIO, | ||
"SEEK_CUR": SymbolSource.C_STDIO, | ||
"SEEK_END": SymbolSource.C_STDIO, | ||
"SEEK_SET": SymbolSource.C_STDIO, | ||
"TMP_MAX": SymbolSource.C_STDIO, | ||
"stderr": SymbolSource.C_STDIO, | ||
"stdin": SymbolSource.C_STDIO, | ||
"stdout": SymbolSource.C_STDIO, | ||
# Functions | ||
"fclose": SymbolSource.C_STDIO, | ||
"clearerr": SymbolSource.C_STDIO, | ||
"feof": SymbolSource.C_STDIO, | ||
"ferror": SymbolSource.C_STDIO, | ||
"fflush": SymbolSource.C_STDIO, | ||
"fgetpos": SymbolSource.C_STDIO, | ||
"fopen": SymbolSource.C_STDIO, | ||
"fread": SymbolSource.C_STDIO, | ||
"freopen": SymbolSource.C_STDIO, | ||
"fseek": SymbolSource.C_STDIO, | ||
"fsetpos": SymbolSource.C_STDIO, | ||
"ftell": SymbolSource.C_STDIO, | ||
"fwrite": SymbolSource.C_STDIO, | ||
"remove": SymbolSource.C_STDIO, | ||
"rename": SymbolSource.C_STDIO, | ||
"rewind": SymbolSource.C_STDIO, | ||
"setbuf": SymbolSource.C_STDIO, | ||
"setvbuf": SymbolSource.C_STDIO, | ||
"tmpfile": SymbolSource.C_STDIO, | ||
"tmpnam": SymbolSource.C_STDIO, | ||
"fprintf": SymbolSource.C_STDIO, | ||
"printf": SymbolSource.C_STDIO, | ||
"sprintf": SymbolSource.C_STDIO, | ||
"vfprintf": SymbolSource.C_STDIO, | ||
"vprintf": SymbolSource.C_STDIO, | ||
"vsprintf": SymbolSource.C_STDIO, | ||
"fscanf": SymbolSource.C_STDIO, | ||
"scanf": SymbolSource.C_STDIO, | ||
"sscanf": SymbolSource.C_STDIO, | ||
"fgetc": SymbolSource.C_STDIO, | ||
"fgets": SymbolSource.C_STDIO, | ||
"fputc": SymbolSource.C_STDIO, | ||
"fputs": SymbolSource.C_STDIO, | ||
"getc": SymbolSource.C_STDIO, | ||
"getchar": SymbolSource.C_STDIO, | ||
"gets": SymbolSource.C_STDIO, # NOTE: very dangerous function! | ||
"putc": SymbolSource.C_STDIO, | ||
"putchar": SymbolSource.C_STDIO, | ||
"puts": SymbolSource.C_STDIO, | ||
"ungetc": SymbolSource.C_STDIO, | ||
"perror": SymbolSource.C_STDIO, | ||
} | ||
|
||
C_STDLIB_KEYWORDS = { | ||
# According to https://www.tutorialspoint.com/c_standard_library/stdlib_h.htm | ||
# Types | ||
"size_t": SymbolSource.C_STDLIB, | ||
"wchar_t": SymbolSource.C_STDLIB, | ||
"div_t": SymbolSource.C_STDLIB, | ||
"ldiv_t": SymbolSource.C_STDLIB, | ||
# Macros | ||
"NULL": SymbolSource.C_STDLIB, | ||
"EXIT_FAILURE": SymbolSource.C_STDLIB, | ||
"EXIT_SUCCESS": SymbolSource.C_STDLIB, | ||
"RAND_MAX": SymbolSource.C_STDLIB, | ||
"MB_CUR_MAX": SymbolSource.C_STDLIB, | ||
# Functions | ||
"atof": SymbolSource.C_STDLIB, | ||
"atoi": SymbolSource.C_STDLIB, | ||
"atol": SymbolSource.C_STDLIB, | ||
"strtod": SymbolSource.C_STDLIB, | ||
"strtol": SymbolSource.C_STDLIB, | ||
"strtoul": SymbolSource.C_STDLIB, | ||
"calloc": SymbolSource.C_STDLIB, | ||
"free": SymbolSource.C_STDLIB, | ||
"malloc": SymbolSource.C_STDLIB, | ||
"realloc": SymbolSource.C_STDLIB, | ||
"abort": SymbolSource.C_STDLIB, | ||
"atexit": SymbolSource.C_STDLIB, | ||
"exit": SymbolSource.C_STDLIB, | ||
"getenv": SymbolSource.C_STDLIB, | ||
"system": SymbolSource.C_STDLIB, | ||
"bsearch": SymbolSource.C_STDLIB, | ||
"qsort": SymbolSource.C_STDLIB, | ||
"abs": SymbolSource.C_STDLIB, | ||
"div": SymbolSource.C_STDLIB, | ||
"labs": SymbolSource.C_STDLIB, | ||
"ldiv": SymbolSource.C_STDLIB, | ||
"rand": SymbolSource.C_STDLIB, | ||
"srand": SymbolSource.C_STDLIB, | ||
"mblen": SymbolSource.C_STDLIB, | ||
"mbstowcs": SymbolSource.C_STDLIB, | ||
"mbtowc": SymbolSource.C_STDLIB, | ||
"wcstombs": SymbolSource.C_STDLIB, | ||
"wctomb": SymbolSource.C_STDLIB, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from typing import Dict, List | ||
|
||
from prototype.analyzer.lol_analyzer_types import ( | ||
LolAnalysisObj, | ||
LolModule, | ||
) | ||
from prototype.parser.lol_parser import ( | ||
ASTNode, | ||
FunctionDefinitionNode, | ||
ImportModuleNode, | ||
VariableDefinitionNode, | ||
) | ||
|
||
|
||
### HELPER FUNCTIONS | ||
def extract_names_in_module( | ||
ast_nodes: List[ASTNode], raw_text: str | ||
) -> LolModule: | ||
""" | ||
Extract names (only) of function definitions, global definitions, and | ||
imports. | ||
TODO | ||
---- | ||
1. Add struct/enum/monad | ||
""" | ||
module = LolModule("", raw_text) | ||
|
||
for i, node in enumerate(ast_nodes): | ||
if isinstance(node, FunctionDefinitionNode): | ||
module.add_function_name(node) | ||
elif isinstance(node, VariableDefinitionNode): | ||
module.add_variable_definition_name(node) | ||
elif isinstance(node, ImportModuleNode): | ||
# TODO(dchu) - recursively add members to this submodule! | ||
module.add_submodule(node) | ||
# TODO(dchu): accept data structures | ||
else: | ||
# We will ignore anything outside of functions! This is an error | ||
raise ValueError(f"{node} cannot be outside of functions!") | ||
return module | ||
|
||
|
||
def get_prototypes(module: LolModule, ast_nodes: List[ASTNode], raw_text: str): | ||
"""Get function and variable prototypes.""" | ||
for i, node in enumerate(ast_nodes): | ||
if isinstance(node, FunctionDefinitionNode): | ||
module.add_function_prototype(node) | ||
elif isinstance(node, VariableDefinitionNode): | ||
module.add_variable_definition_prototype(node) | ||
elif isinstance(node, ImportModuleNode): | ||
pass | ||
else: | ||
# We will ignore anything outside of functions! This is an error | ||
raise ValueError(f"{node} cannot be outside of functions!") | ||
|
||
|
||
def get_bodies(module: LolModule, ast_nodes: List[ASTNode], raw_text: str): | ||
for i, node in enumerate(ast_nodes): | ||
if isinstance(node, FunctionDefinitionNode): | ||
module.add_function_body(node) | ||
elif isinstance(node, VariableDefinitionNode): | ||
module.add_variable_definition_body(node) | ||
elif isinstance(node, ImportModuleNode): | ||
pass | ||
else: | ||
# We will ignore anything outside of functions! This is an error | ||
raise ValueError(f"{node} cannot be outside of functions!") | ||
|
||
|
||
def analyze(asts: List[ASTNode], raw_text: str) -> Dict[str, LolAnalysisObj]: | ||
# Get names for functions, etc | ||
module: LolModule = extract_names_in_module(asts, raw_text) | ||
# Get prototypes for functions | ||
get_prototypes(module, asts, raw_text) | ||
|
||
return module |
Oops, something went wrong.