-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.ml
208 lines (189 loc) · 8.67 KB
/
lexer.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
open TokenTypes
open Str
(* Part 1: Lexer - IMPLEMENT YOUR CODE BELOW *)
let re_num = Str.regexp "[0-9]+" (* single digit *)
let re_neg = Str.regexp "(-[0-9]+)"
let re_bool_true = Str.regexp "^true$"
let re_bool_false = Str.regexp "^false$"
let re_rec = Str.regexp "^rec$"
let re_let = Str.regexp "^let$"
let re_in = Str.regexp "^in$"
let re_def = Str.regexp "^def$"
let re_fun = Str.regexp "^fun$"
let re_not = Str.regexp "^not$"
let re_if = Str.regexp "^if$"
let re_then = Str.regexp "^then$"
let re_else = Str.regexp "^else$"
let re_string = Str.regexp "\"[^\"]*\""
let re_id = Str.regexp "[a-zA-Z][a-zA-Z0-9]*"
let re_add = Str.regexp "+"
let re_sub = Str.regexp "-"
let re_mult = Str.regexp "*"
let re_div = Str.regexp "/"
let re_lparen = Str.regexp "("
let re_rparen = Str.regexp ")"
let re_equal = Str.regexp "="
let re_notequal = Str.regexp "<>"
let re_greater = Str.regexp ">"
let re_less = Str.regexp "<"
let re_greaterequal = Str.regexp ">="
let re_lessequal = Str.regexp "<="
let re_or = Str.regexp "||"
let re_and = Str.regexp "&&"
let re_concat = Str.regexp "\\^"
let re_arrow = Str.regexp "->"
let re_doublesemi = Str.regexp ";;"
let re_whitespace = Str.regexp "[ \t\n]+"
let tokenize input =
let rec tok str pos =
if pos < String.length str then
(* Handle negative integers "(-123)" *)
if Str.string_match re_neg str pos then
let matched = Str.matched_string str in
let matched_length = String.length matched in
let head = Tok_Int (int_of_string (String.sub matched 1 ((matched_length) - 2))) in
head :: tok str (pos + (String.length matched))
(* Handle positive integers "123" *)
else if Str.string_match re_num str pos then
let matched = Str.matched_string str in
let head = Tok_Int (int_of_string (matched)) in
head :: tok str (pos + (String.length matched))
(* Handle "arrow" *)
else if Str.string_match re_arrow str pos then
let matched = Str.matched_string str in
let head = Tok_Arrow in
head :: tok str (pos + (String.length matched))
(* Handle ";;" *)
else if Str.string_match re_doublesemi str pos then
let matched = Str.matched_string str in
let head = Tok_DoubleSemi in
head :: tok str (pos + (String.length matched))
(* Handle strings *)
else if Str.string_match re_string str pos then
let matched = Str.matched_string str in
let cleaned_string = String.sub matched (1) (String.length matched - 2) in
let head = Tok_String cleaned_string in
head :: tok str (pos + (String.length matched))
(* Handle ID's *)
else if Str.string_match re_id str pos then
let initial_match = Str.matched_string str in
(* Now check for each special case of ID, that being the tokens that look like ID's but are not *)
(* Handle booleans *)
if Str.string_match re_bool_true initial_match 0 then
let head = Tok_Bool (true) in
head :: tok str (pos + String.length initial_match)
else if Str.string_match re_bool_false initial_match 0 then
let head = Tok_Bool (false) in
head :: tok str (pos + String.length initial_match)
(* Handle "not" *)
else if Str.string_match re_not initial_match 0 then
let head = Tok_Not in
head :: tok str (pos + (String.length initial_match))
(* Handle "if" *)
else if Str.string_match re_if initial_match 0 then
let head = Tok_If in
head :: tok str (pos + (String.length initial_match))
(* Handle "then" *)
else if Str.string_match re_then initial_match 0 then
let head = Tok_Then in
head :: tok str (pos + (String.length initial_match))
(* Handle rec *)
else if Str.string_match re_rec initial_match 0 then
let head = Tok_Rec in
head :: tok str (pos + (String.length initial_match))
(* Handle "else" *)
else if Str.string_match re_else initial_match 0 then
let head = Tok_Else in
head :: tok str (pos + (String.length initial_match))
(* Handle "let" *)
else if Str.string_match re_let initial_match 0 then
let head = Tok_Let in
head :: tok str (pos + (String.length initial_match))
(* Handle "in" *)
else if Str.string_match re_in initial_match 0 then
let head = Tok_In in
head :: tok str (pos + (String.length initial_match))
(* Handle "def" *)
else if Str.string_match re_def initial_match 0 then
let head = Tok_Def in
head :: tok str (pos + (String.length initial_match))
(* Handle "fun" *)
else if Str.string_match re_fun initial_match 0 then
let head = Tok_Fun in
head :: tok str (pos + (String.length initial_match))
else
let head = Tok_ID initial_match in
head :: tok str (pos + String.length initial_match)
(* HANDLE MATH OPERATIONS *)
(* Handle "+" *)
else if Str.string_match re_add str pos then
let head = Tok_Add in
head :: tok str (pos + 1)
(* Handle "-" *)
else if Str.string_match re_sub str pos then
let head = Tok_Sub in
head :: tok str (pos + 1)
(* Handle "*" *)
else if Str.string_match re_mult str pos then
let head = Tok_Mult in
head :: tok str (pos + 1)
(* Handle "/" *)
else if Str.string_match re_div str pos then
let head = Tok_Div in
head :: tok str (pos + 1)
(* HANDLE INEQUALITIES *)
(* Handle "=" *)
else if Str.string_match re_equal str pos then
let head = Tok_Equal in
head :: tok str (pos + 1)
(* Handle not equal *)
else if Str.string_match re_notequal str pos then
let head = Tok_NotEqual in
head :: tok str (pos + 2)
(* Handle less than *)
else if Str.string_match re_less str pos then
let head = Tok_Less in
head :: tok str (pos + 1)
(* Handle less than equal to *)
else if Str.string_match re_lessequal str pos then
let head = Tok_LessEqual in
head :: tok str (pos + 2)
(* Handle greater than *)
else if Str.string_match re_greater str pos then
let head = Tok_Greater in
head :: tok str (pos + 1)
(* Handle greater than equal to *)
else if Str.string_match re_greaterequal str pos then
let head = Tok_GreaterEqual in
head :: tok str (pos + 2)
(* Handle "(" *)
else if Str.string_match re_lparen str pos then
let head = Tok_LParen in
head :: tok str (pos + 1)
(* Handle ")" *)
else if Str.string_match re_rparen str pos then
let head = Tok_RParen in
head :: tok str (pos + 1)
(* Handle logical operators *)
(* Handle "||" *)
else if Str.string_match re_or str pos then
let head = Tok_Or in
head :: tok str (pos + 2)
(* Handle "&&" *)
else if Str.string_match re_and str pos then
let head = Tok_And in
head :: tok str (pos + 2)
(* Handle "concatonation" *)
else if Str.string_match re_concat str pos then
let head = Tok_Concat in
head :: tok str (pos + 1)
(* Handle whitespace, including spaces tabs and newlines *)
else if Str.string_match re_whitespace str pos then
let matched = Str.matched_string str in
tok str (pos + (String.length matched))
else
[]
else
[]
in tok input 0
;;