-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompiler.py
196 lines (161 loc) · 5.57 KB
/
compiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# Seyyed Alireza Hashemi and Erfan Moeini
input = open('input.txt', 'r').read()
errors = open('lexical_errors.txt', 'a')
tokens = open('tokens.txt', 'a')
symbols = open('symbol_table.txt', 'a')
input_size = len(input)
iterator = 0
last_lines = [0, 0, 0]
lineno = 1
all_IDS_or_KEYWORDS = []
WHITESPACES = [' ', '\n', '\r', '\t', '\v', '\f']
SYMBOL = [';', ':', ',', '[', ']', '(', ')', '{', '}', '+', '-', '*', '=', '<']
KEYWORD = ['if', 'else', 'void', 'int', 'while', 'break', 'switch', 'default', 'case', 'return', ]
ALPHABET = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
COMMENT = ['/']
DIGIT = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
VALID_CHARACTERS = WHITESPACES + SYMBOL + ALPHABET + DIGIT + COMMENT
###############################
def get_char():
global iterator, input_size, input
if iterator == input_size:
return 'EOF'
iterator += 1
return input[iterator - 1]
def star_char():
global iterator
iterator -= 1
def handle_whitespace(white_space):
global lineno
if white_space == '\n':
lineno += 1
def handle_keyword_and_id(char):
global ALPHABET, DIGIT, KEYWORD, VALID_CHARACTERS
keyword_or_id = ''
while char in ALPHABET or char in DIGIT:
keyword_or_id += char
char = get_char()
if not char in VALID_CHARACTERS:
keyword_or_id += char
return 'error', 'Invalid input', keyword_or_id,
star_char()
if keyword_or_id in KEYWORD:
return 'KEYWORD', keyword_or_id
else:
return 'ID', keyword_or_id
def handle_symbol(char):
global SYMBOL, VALID_CHARACTERS
if char != '=' and char != '*':
return 'SYMBOL', char
if char == '=':
char = get_char()
if char == '=':
return 'SYMBOL', '=='
if not char in VALID_CHARACTERS:
return 'error', 'Invalid input', '=' + char
star_char()
return 'SYMBOL', '='
if char == '*':
char = get_char()
if not char in VALID_CHARACTERS:
return 'error', 'Invalid input', '*' + char
if char == '/':
return 'error', 'Unmatched comment', '*' + char
star_char()
return 'SYMBOL', '*'
def handle_digit(char):
global DIGIT, ALPHABET, VALID_CHARACTERS
number = ''
while char in DIGIT:
number += char
char = get_char()
if char in ALPHABET:
number += char
return 'error', 'Invalid number', number
if not char in VALID_CHARACTERS:
number += char
return 'error', 'Invalid input', number
star_char()
return 'NUM', number
def handle_comment(char):
char = get_char()
if char == '/':
char = get_char()
while char != '\n':
char = get_char()
star_char()
return
if char == '*':
char = get_char()
comment = ''
while char != 'EOF':
comment += char
if len(comment) >= 2 and comment[len(comment) - 2:] == '*/':
return
char = get_char()
return 'error', 'Unclosed comment', '/*' + comment[0:min(5, len(comment) -1)] + '...'
star_char()
return 'error', 'Invalid input', '/'
def handle_invalid_input(char):
return 'error', 'Invalid input', char
def get_next_token(char):
token = ''
if char in WHITESPACES:
handle_whitespace(char)
elif char in ALPHABET:
token = handle_keyword_and_id(char)
elif char in SYMBOL:
token = handle_symbol(char)
elif char in DIGIT:
token = handle_digit(char)
elif char in COMMENT:
token = handle_comment(char)
else:
token = handle_invalid_input(char)
return token
###############################
def handle_next_line(index, lineno, writer):
global last_lines
if lineno != last_lines[index]:
if last_lines[index] != 0:
writer.write('\n')
last_lines[index] = lineno
writer.write(f'{lineno}.\t')
return True
return False
def handle_space(is_needed, writer):
if is_needed:
writer.write(f' ')
errors.truncate(0)
tokens.truncate(0)
symbols.truncate(0)
if __name__ == '__main__':
there_is_lexical_errors = False
char = get_char()
while char != 'EOF':
token = get_next_token(char)
if token:
if token[0] == 'error':
is_new_line = handle_next_line(0, lineno, errors)
handle_space(not is_new_line, errors)
errors.write(f'({token[2]}, {token[1]})')
there_is_lexical_errors = True
if token[0] != 'error':
is_new_line = handle_next_line(1, lineno, tokens)
handle_space(not is_new_line, tokens)
tokens.write(f'({token[0]}, {token[1]})')
if token[0] == 'ID' or token[0] == 'KEYWORD':
if not token[1] in all_IDS_or_KEYWORDS:
all_IDS_or_KEYWORDS.append(token[1])
handle_next_line(2, len(all_IDS_or_KEYWORDS), symbols)
symbols.write(f'{token[1]}')
char = get_char()
if not there_is_lexical_errors:
errors.write(f'There is no lexical error.')
for keyword in KEYWORD:
if not keyword in all_IDS_or_KEYWORDS:
all_IDS_or_KEYWORDS.append(keyword)
handle_next_line(2, len(all_IDS_or_KEYWORDS), symbols)
symbols.write(f'{keyword}')