-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.py
160 lines (122 loc) · 5.82 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import sys
def print_error(error: str):
print("\033[91mError: Could not process the file. There are existing errors.\033[0m")
if not isinstance(error, dict):
print("\033[91m" + error + "\033[0m")
print("\033[91m" + "FALSE" + "\033[0m")
sys.exit()
def are_there_known_errors(tokens: dict) -> bool:
return len(tokens['known_errors']) > 0
def handle_nested_parenthesis(text: str) -> list:
# Si encuentra una keyword entonces va a poner un token hasta que encuentre un parentesis (ubicacion anterior)
# if it finds a keyword it is going to put a token until it finds a parenthesis (one position before the parenthesis)
result = []
current_word = ""
open_count = 0
for i, char in enumerate(text):
if char == '(':
open_count += 1
if open_count == 1: # Start of a new inner expression or word
if current_word:
result.append(current_word)
current_word = ""
else: # Already within a word
current_word += char
elif char == ')':
open_count -= 1
if open_count == 0: # End of an inner expression
result.append(handle_nested_parenthesis(current_word)) # Process inner expression
current_word = ""
else:
current_word += char
elif char == ' ' and open_count == 0: # Word separator outside parenthesis
if current_word:
result.append(current_word)
current_word = ""
else:
current_word += char
if current_word: # Add any remaining part
result.append(current_word)
return result
def process_text(text: str, tokens: dict):
"""It receives a text and returns a list of the expressions that composes the text.
The expressions are separated by parenthesis.
Keyword arguments:
expression -- can be also part of an expression. It has to be a string.
Return: an array of strings that are the elements of the expression.
"""
expressions = []
current_expression = ""
open_parenthesis = 0
close_parenthesis = 0
# We iterate through the text and separate the expressions. We do this by counting the parenthesis.
# We also remove doble spaces
for character in text:
character = character.lower()
if open_parenthesis == close_parenthesis and open_parenthesis != 0 and close_parenthesis != 0:
# Check if there are more than one expression in the same line if
# We want to separate the expression into a bunch of expressions
if close_parenthesis > 1: # If close_parenthesis > 1 so will open_parethesis. We have to separate the expressions
expressions.append(handle_nested_parenthesis(current_expression))
else:
expressions.append(current_expression)
current_expression = ""
open_parenthesis = 0
close_parenthesis = 0
elif character != '\n' and character != '\t' and character != '\r' and character != '\v' and character != '\f' and character :
# Removes the double spaces
if character == ' ' and current_expression[-1] == ' ':
continue
else:
current_expression += character
# If there where nested parenthesis we have to
# Add the first item in the array a parenthesis and the last item a parenthesis
if character == '(':
open_parenthesis += 1
elif character == ')':
close_parenthesis += 1
# if expressions[0] in tokens['reserved_words']:
# Check if there are any open parenthesis
if open_parenthesis != close_parenthesis:
# find the last open parenthesis in the expressions array and return its position
tokens['known_errors'] = 'Error: There are open statements. You have to close them.'
return expressions
# Check the type of the expression
def tokenizer(expression: list, tokens: dict):
from process import process
process(expression, tokens)
def lexer(file_path: str):
# Read the file
try:
with open(file_path, 'r') as file:
text = file.read()
except FileNotFoundError:
print("\033[91mError: The file does not exist. Please check the file path and try again.\033[0m")
sys.exit()
# We create and object that will store the tokens, their values and their types.
tokens = {
'known_errors': {}, # This will store the known errors
'variables': {}, # This will store the variables and their values
'functions': {}, # This will store the functions, their arguments and their body.
'scoped_variables': [] , # This will store the variables and their on scope (only variables that are inside a function (works as a stack)
}
# We separate the text into expressions
expressions = process_text(text, tokens)
# if there is a blank text
if len(expressions) == 0:
tokens['known_errors'] = 'Error: The file is empty. Did you forget to write something?'
print_error(tokens['known_errors'])
# Then we separate the expressions into tokens
# The expressions are passed in lower case to the tokenizer
try:
for expression in expressions:
tokenizer(expression, tokens)
if are_there_known_errors(tokens):
print_error(tokens['known_errors'])
sys.exit()
print("\033[92mSuccess: No errors found. The file was processed successfully.\033[0m")
print("\033[92m" + "TRUE" + "\033[0m")
except Exception as _: # If there is an error we print the error
print(f"\033[91m {_} \033[0m")
print("\033[91mFatal: An error occurred while processing the file. Please check the file and try again.\033[0m")
print("\033[91m" + "FALSE" + "\033[0m")