-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimp_lexer.py
76 lines (68 loc) · 1.68 KB
/
imp_lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# coding=utf-8
# Created by deserts at 1/8/17
import sys
import re
def lex(characters, token_exprs):
pos = 0
tokens = []
while pos < len(characters):
match = None
for token_expr in token_exprs:
pattern, tag = token_expr
regex = re.compile(pattern)
match = regex.match(characters, pos)
if match:
text = match.group(0)
if tag:
token = (text, tag)
tokens.append(token)
break
if not match:
sys.stderr.write('Illegal characters: %s at %d' % (characters[pos], pos))
sys.exit(1)
else:
pos = match.end(0)
return tokens
def imp_lexer(characters):
return lex(characters, token_exprs)
RESERVED = 'RESERVED'
INT = 'INT'
ID = 'ID'
token_exprs = [
(r'[ \n\t]+', None),
(r'#[^\n]*', None),
(r':', RESERVED),
(r'=', RESERVED),
(r'\(', RESERVED),
(r'\)', RESERVED),
(r'\[', RESERVED),
(r'\]', RESERVED),
(r',', RESERVED),
(r';', RESERVED),
(r'\+', RESERVED),
(r'-', RESERVED),
(r'\*', RESERVED),
(r'/', RESERVED),
(r'<=', RESERVED),
(r'<', RESERVED),
(r'>=', RESERVED),
(r'>', RESERVED),
(r'==', RESERVED),
(r'!=', RESERVED),
(r'and', RESERVED),
(r'or', RESERVED),
(r'not', RESERVED),
(r'if', RESERVED),
(r'then', RESERVED),
(r'else', RESERVED),
(r'while', RESERVED),
(r'do', RESERVED),
(r'for', RESERVED),
(r'in', RESERVED),
(r'end', RESERVED),
(r'def', RESERVED),
(r'return', RESERVED),
(r'print', RESERVED),
(r'[0-9]+', INT),
(r'[A-Za-z_][\w_]*', ID),
]