Skip to content

Commit

Permalink
Compile "Hello, World!" (#61)
Browse files Browse the repository at this point in the history
* Refactor parser
* Create analyzer
* Create emitter
  • Loading branch information
thedavidchu authored Jan 2, 2024
1 parent 3c7b15c commit 8da9aba
Show file tree
Hide file tree
Showing 27 changed files with 3,246 additions and 949 deletions.
2 changes: 2 additions & 0 deletions lint.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
venv\Scripts\activate.bat
black prototype
28 changes: 28 additions & 0 deletions prototype/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Light Operational Language

Formerly `Light Object Language`, but then I decided that I wasn't going to use
OOP patterns.

## Intent

Create a transpiler that can rewrite a modern language to C.

There are many limiting things in C89 and C99 that can be made by a more
intelligent preprocessing step. This is it.

## Eventual Features

In no particular order, here are some fun features that I may add.

1. Generics.
2. Traits/Interfaces.
3. Lambdas/Closure. (???)
4. Borrow checker. (???)
5. Closures. (???)

## Ecosystem

1. VS Code extension with syntax highlighting.
2. Bootstrap this into its own language. What's nice is that once we write it,
it will generate C code so we can bootstrap it somewhat continuously (if you
know what I mean... I wasn't very clear).
Empty file added prototype/__init__.py
Empty file.
28 changes: 28 additions & 0 deletions prototype/analyzer/deprecated/builtins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# from typing import Dict, Union
#
# from analyzer.lol_analyzer_types import TypeDef, FunctionDef
#
#
# def create_builtin_type():
# namespace_type = TypeDef("namespace")
# int_type = TypeDef("int")
# bool_type = TypeDef("bool")
# float_type = TypeDef("float")
# str_type = TypeDef("str")
#
# # Create int ops
# int_type.add_binop("+", int_type, int_type)
# int_type.add_binop("-", int_type, int_type)
# int_type.add_binop("*", int_type, int_type)
# int_type.add_binop("/", int_type, int_type)
#
# int_type.add_binop("<", int_type, bool_type)
# int_type.add_binop("<=", int_type, bool_type)
# int_type.add_binop("==", int_type, bool_type)
# int_type.add_binop("!=", int_type, bool_type)
# int_type.add_binop(">=", int_type, bool_type)
# int_type.add_binop(">", int_type, bool_type)
#
# BUILTINS: Dict[str, Union[TypeDef, FunctionDef]] = {
# "int": TypeDef("int")
# }
8 changes: 8 additions & 0 deletions prototype/analyzer/deprecated/c_builtins/libc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""This file aids with the libc libraries."""


class CLibrary:
def __init__(self, name):
pass


198 changes: 198 additions & 0 deletions prototype/analyzer/deprecated/c_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
"""
Add to list of used symbols (up to and including C99 standard).
TODO
----
1. Each symbol from C should have:
a. its source,
b. its type,
c. etc.
attached to make it easy to add to the symbol table.
"""
from enum import Enum, auto, unique
from typing import Dict


@unique
class SymbolSource(Enum):
# C Language
C_BUILTIN = auto()
C_STDLIB = auto()
C_STDIO = auto()
# LOL
LOL_BUILTIN = auto()
# User
USER = auto()


@unique
class CSymbolType(Enum):
MACRO = auto()
TYPE = auto()
CONSTANT = auto()
FUNCTION = auto()
OTHER = auto()


################################################################################
### LANGUAGE KEYWORDS
################################################################################
_C89_KEYWORDS: Dict[str, SymbolSource] = {
"auto": SymbolSource.C_BUILTIN,
"break": SymbolSource.C_BUILTIN,
"case": SymbolSource.C_BUILTIN,
"char": SymbolSource.C_BUILTIN,
"const": SymbolSource.C_BUILTIN,
"continue": SymbolSource.C_BUILTIN,
"default": SymbolSource.C_BUILTIN,
"do": SymbolSource.C_BUILTIN,
"double": SymbolSource.C_BUILTIN,
"else": SymbolSource.C_BUILTIN,
"enum": SymbolSource.C_BUILTIN,
"extern": SymbolSource.C_BUILTIN,
"float": SymbolSource.C_BUILTIN,
"for": SymbolSource.C_BUILTIN,
"goto": SymbolSource.C_BUILTIN,
"if": SymbolSource.C_BUILTIN,
"int": SymbolSource.C_BUILTIN,
"long": SymbolSource.C_BUILTIN,
"register": SymbolSource.C_BUILTIN,
"return": SymbolSource.C_BUILTIN,
"short": SymbolSource.C_BUILTIN,
"signed": SymbolSource.C_BUILTIN,
"sizeof": SymbolSource.C_BUILTIN,
"static": SymbolSource.C_BUILTIN,
"struct": SymbolSource.C_BUILTIN,
"switch": SymbolSource.C_BUILTIN,
"typedef": SymbolSource.C_BUILTIN,
"union": SymbolSource.C_BUILTIN,
"unsigned": SymbolSource.C_BUILTIN,
"void": SymbolSource.C_BUILTIN,
"volatile": SymbolSource.C_BUILTIN,
"while": SymbolSource.C_BUILTIN,
}
_C99_KEYWORDS: Dict[str, SymbolSource] = {
"inline": SymbolSource.C_BUILTIN,
"restrict": SymbolSource.C_BUILTIN,
"_Bool": SymbolSource.C_BUILTIN,
"_Complex": SymbolSource.C_BUILTIN,
"_Imaginary": SymbolSource.C_BUILTIN,
}
C_KEYWORDS = {**_C89_KEYWORDS, **_C99_KEYWORDS}


################################################################################
### LANGUAGE KEYWORDS
################################################################################
C_STDIO_KEYWORDS: Dict[str, SymbolSource] = {
# According to https://www.tutorialspoint.com/c_standard_library/stdio_h.htm
# Types
"size_t": SymbolSource.C_STDIO,
"FILE": SymbolSource.C_STDIO,
"fpost_t": SymbolSource.C_STDIO,
# Macros
"NULL": SymbolSource.C_STDIO,
"_IOFBF": SymbolSource.C_STDIO,
"_IOLBF": SymbolSource.C_STDIO,
"_IONBF": SymbolSource.C_STDIO,
"BUFSIZ": SymbolSource.C_STDIO,
"EOF": SymbolSource.C_STDIO,
"FOPEN_MAX": SymbolSource.C_STDIO,
"FILENAME_MAX": SymbolSource.C_STDIO,
"L_tmpnam": SymbolSource.C_STDIO,
"SEEK_CUR": SymbolSource.C_STDIO,
"SEEK_END": SymbolSource.C_STDIO,
"SEEK_SET": SymbolSource.C_STDIO,
"TMP_MAX": SymbolSource.C_STDIO,
"stderr": SymbolSource.C_STDIO,
"stdin": SymbolSource.C_STDIO,
"stdout": SymbolSource.C_STDIO,
# Functions
"fclose": SymbolSource.C_STDIO,
"clearerr": SymbolSource.C_STDIO,
"feof": SymbolSource.C_STDIO,
"ferror": SymbolSource.C_STDIO,
"fflush": SymbolSource.C_STDIO,
"fgetpos": SymbolSource.C_STDIO,
"fopen": SymbolSource.C_STDIO,
"fread": SymbolSource.C_STDIO,
"freopen": SymbolSource.C_STDIO,
"fseek": SymbolSource.C_STDIO,
"fsetpos": SymbolSource.C_STDIO,
"ftell": SymbolSource.C_STDIO,
"fwrite": SymbolSource.C_STDIO,
"remove": SymbolSource.C_STDIO,
"rename": SymbolSource.C_STDIO,
"rewind": SymbolSource.C_STDIO,
"setbuf": SymbolSource.C_STDIO,
"setvbuf": SymbolSource.C_STDIO,
"tmpfile": SymbolSource.C_STDIO,
"tmpnam": SymbolSource.C_STDIO,
"fprintf": SymbolSource.C_STDIO,
"printf": SymbolSource.C_STDIO,
"sprintf": SymbolSource.C_STDIO,
"vfprintf": SymbolSource.C_STDIO,
"vprintf": SymbolSource.C_STDIO,
"vsprintf": SymbolSource.C_STDIO,
"fscanf": SymbolSource.C_STDIO,
"scanf": SymbolSource.C_STDIO,
"sscanf": SymbolSource.C_STDIO,
"fgetc": SymbolSource.C_STDIO,
"fgets": SymbolSource.C_STDIO,
"fputc": SymbolSource.C_STDIO,
"fputs": SymbolSource.C_STDIO,
"getc": SymbolSource.C_STDIO,
"getchar": SymbolSource.C_STDIO,
"gets": SymbolSource.C_STDIO, # NOTE: very dangerous function!
"putc": SymbolSource.C_STDIO,
"putchar": SymbolSource.C_STDIO,
"puts": SymbolSource.C_STDIO,
"ungetc": SymbolSource.C_STDIO,
"perror": SymbolSource.C_STDIO,
}

C_STDLIB_KEYWORDS = {
# According to https://www.tutorialspoint.com/c_standard_library/stdlib_h.htm
# Types
"size_t": SymbolSource.C_STDLIB,
"wchar_t": SymbolSource.C_STDLIB,
"div_t": SymbolSource.C_STDLIB,
"ldiv_t": SymbolSource.C_STDLIB,
# Macros
"NULL": SymbolSource.C_STDLIB,
"EXIT_FAILURE": SymbolSource.C_STDLIB,
"EXIT_SUCCESS": SymbolSource.C_STDLIB,
"RAND_MAX": SymbolSource.C_STDLIB,
"MB_CUR_MAX": SymbolSource.C_STDLIB,
# Functions
"atof": SymbolSource.C_STDLIB,
"atoi": SymbolSource.C_STDLIB,
"atol": SymbolSource.C_STDLIB,
"strtod": SymbolSource.C_STDLIB,
"strtol": SymbolSource.C_STDLIB,
"strtoul": SymbolSource.C_STDLIB,
"calloc": SymbolSource.C_STDLIB,
"free": SymbolSource.C_STDLIB,
"malloc": SymbolSource.C_STDLIB,
"realloc": SymbolSource.C_STDLIB,
"abort": SymbolSource.C_STDLIB,
"atexit": SymbolSource.C_STDLIB,
"exit": SymbolSource.C_STDLIB,
"getenv": SymbolSource.C_STDLIB,
"system": SymbolSource.C_STDLIB,
"bsearch": SymbolSource.C_STDLIB,
"qsort": SymbolSource.C_STDLIB,
"abs": SymbolSource.C_STDLIB,
"div": SymbolSource.C_STDLIB,
"labs": SymbolSource.C_STDLIB,
"ldiv": SymbolSource.C_STDLIB,
"rand": SymbolSource.C_STDLIB,
"srand": SymbolSource.C_STDLIB,
"mblen": SymbolSource.C_STDLIB,
"mbstowcs": SymbolSource.C_STDLIB,
"mbtowc": SymbolSource.C_STDLIB,
"wcstombs": SymbolSource.C_STDLIB,
"wctomb": SymbolSource.C_STDLIB,
}
77 changes: 77 additions & 0 deletions prototype/analyzer/deprecated/lol_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from typing import Dict, List

from prototype.analyzer.lol_analyzer_types import (
LolAnalysisObj,
LolModule,
)
from prototype.parser.lol_parser import (
ASTNode,
FunctionDefinitionNode,
ImportModuleNode,
VariableDefinitionNode,
)


### HELPER FUNCTIONS
def extract_names_in_module(
ast_nodes: List[ASTNode], raw_text: str
) -> LolModule:
"""
Extract names (only) of function definitions, global definitions, and
imports.
TODO
----
1. Add struct/enum/monad
"""
module = LolModule("", raw_text)

for i, node in enumerate(ast_nodes):
if isinstance(node, FunctionDefinitionNode):
module.add_function_name(node)
elif isinstance(node, VariableDefinitionNode):
module.add_variable_definition_name(node)
elif isinstance(node, ImportModuleNode):
# TODO(dchu) - recursively add members to this submodule!
module.add_submodule(node)
# TODO(dchu): accept data structures
else:
# We will ignore anything outside of functions! This is an error
raise ValueError(f"{node} cannot be outside of functions!")
return module


def get_prototypes(module: LolModule, ast_nodes: List[ASTNode], raw_text: str):
"""Get function and variable prototypes."""
for i, node in enumerate(ast_nodes):
if isinstance(node, FunctionDefinitionNode):
module.add_function_prototype(node)
elif isinstance(node, VariableDefinitionNode):
module.add_variable_definition_prototype(node)
elif isinstance(node, ImportModuleNode):
pass
else:
# We will ignore anything outside of functions! This is an error
raise ValueError(f"{node} cannot be outside of functions!")


def get_bodies(module: LolModule, ast_nodes: List[ASTNode], raw_text: str):
for i, node in enumerate(ast_nodes):
if isinstance(node, FunctionDefinitionNode):
module.add_function_body(node)
elif isinstance(node, VariableDefinitionNode):
module.add_variable_definition_body(node)
elif isinstance(node, ImportModuleNode):
pass
else:
# We will ignore anything outside of functions! This is an error
raise ValueError(f"{node} cannot be outside of functions!")


def analyze(asts: List[ASTNode], raw_text: str) -> Dict[str, LolAnalysisObj]:
# Get names for functions, etc
module: LolModule = extract_names_in_module(asts, raw_text)
# Get prototypes for functions
get_prototypes(module, asts, raw_text)

return module
Loading

0 comments on commit 8da9aba

Please sign in to comment.