-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.py
65 lines (47 loc) · 1.33 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from rply import LexerGenerator, Token
from collections import OrderedDict
reserved = ["true", "false", "and", "or", "not", "kek", "lol", "kdone"]#["if", "else", "int", "end", "decls", "has"]
operators = OrderedDict([
# ("COMMA", ","),
("PAREN_L", r"\("),
("PAREN_R", r"\)"),
# ("ASSIGN", r"="),
("PLUS", r"\+"),
("MINUS", r"-"),
("MULTIPLY", r"\*"),
("DIVIDE", r"\/"),
("MOD", r"%"),
("CURLY_L", r"\{"),
("CURLY_R", r"\}"),
("COMMA", r","),
("BRACKET_L", r"\["),
("BRACKET_R", r"\]")
# ("SPACE", r" "),
])
lg = LexerGenerator()
lg.add("NUM", r"\d+")
lg.add("ID", r"[a-zA-Z_][a-zA-Z0-9_]*")
lg.add("STRING", r"\"[^\\]*\"")
for key, value in operators.items():
lg.add(key, value)
def id_reserved(token):
if token.value.lower() in reserved:
return Token(token.value, token.value)
return token
callbacks = {
"ID":(id_reserved, )
}
lg.ignore(r"\s+")
lg.ignore(r"#.*")
lexer = lg.build()
token_names = [rule.name for rule in lg.rules] + [name for name in reserved]
def lex(buf):
for token in lexer.lex(buf):
for callback in callbacks.get(token.name, []):
token = callback(token)
yield token
if __name__ == "__main__":
from pprint import pprint
from sys import stdin
for token in lex(stdin.read()):
pprint(token)