-
Notifications
You must be signed in to change notification settings - Fork 2
/
ptucc_lex.l
397 lines (340 loc) · 11.5 KB
/
ptucc_lex.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
%{
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include "ptucc_parser.tab.h"
#include "hashtable.h"
#include "cgen.h"
/* (main source file) line tracker */
uint32_t line_num = 1;
/* check if we have verbose output */
extern bool verbose_flag; // enable console spam
extern uint32_t yystack_depth; // no. of yylex buffers available
/* macro support up to a limit. */
extern uint32_t max_macro; // hashtable bin limit
extern uint32_t max_macro_max; // hashtable hard entry limit
/* our hash table to hold the macros */
hashtable_t *mac_ht = NULL;
/* global yylex buffer index */
uint32_t yylex_bufidx = 0;
/* flex input buffers structure */
typedef struct __yybuf_state {
YY_BUFFER_STATE state; // flex buffer
char *fname; // filename we read from
uint32_t incl_lnum; // include file line no. tracker.
} yybuf_state;
/* flex input buffers array (bounded by max_macro_max) */
yybuf_state *yybuf_states = NULL;
/* Return true on success, false on failure */
bool set_macro(char* name, char* def);
/* Return def for macro, or NULL if no such macro is defined. */
char *get_macro(char* name);
/* pop, delete and switch our current buffer */
bool pop_delete_buffer();
/* include the file *inside* the lexer */
FILE *include_file();
/* check if we are including a file */
bool
including_file() {
return(yylex_bufidx > 0 &&
yybuf_states[yylex_bufidx].fname != NULL);
}
/* increment the line count, depending if we are including a file or not */
void
increment_line_count() {
if(including_file())
{yybuf_states[yylex_bufidx].incl_lnum++;}
else
{line_num++;}
}
/* fetch the line count, depending if we are including a file or not */
uint32_t
fetch_line_count() {
return including_file() ?
yybuf_states[yylex_bufidx].incl_lnum : line_num;
}
/* fetch the currently processed file name */
char *
fetch_incl_name() {
return including_file() ?
yybuf_states[yylex_bufidx].fname : NULL;
}
/* wraper to print the formatted string requested */
void
pwrap(char *s) {
if(verbose_flag) {
fprintf(stderr, "Line: %5d\ttoken: %15s\tText='%s'\n",
fetch_line_count(), s, yytext);
}
}
/* wrapper to print in stderr */
void msg(char *s) {
if(verbose_flag) {
fprintf(stderr, "%s", s);
}
}
%}
ID [a-zA-Z_][0-9a-zA-Z_]*
SDIGIT [1-9]
DIGIT [0-9]
SNUMBER (0|{SDIGIT}{DIGIT}*)
NUMBER {DIGIT}{DIGIT}*
DECIMAL ("."{NUMBER})|({SNUMBER}("."{NUMBER}?)?)
REAL {DECIMAL}([eE][+-]?{SNUMBER})?
STRING '(\\[ntr\'\"\\]|[^\'\\"])*'
STR_LIT \"(\\[ntr\'\"\\]|[^\'\\"])*\"
/* multi-line comment starting condition */
%x comment
/* single-line comment starting condition */
%x sl_comment
/* macro expansion starting condition */
%x macro
/* include module starting condition */
%x incl_module
%%
@defmacro[ \r\t]+ {pwrap("DECL_MACRO"); BEGIN(macro);}
<macro>{ID} {
/* Store macro name */
char *mac_name = strdup(yytext);
char *def_buf = NULL;
size_t deflen = 0;
char c;
FILE *deff = open_memstream(&def_buf, &deflen);
while((c = input())!='\n') {fputc(c, deff);}
fclose(deff);
/* perform some error checking */
if(!set_macro(mac_name, def_buf)){
yyerror("lexer error: failed to define macro '%s'\n", mac_name);
if(mac_name) {free(mac_name);}
if(deflen > 0) {free(def_buf);}
}
/* increment line numbers */
increment_line_count();
/* continue tokenization */
BEGIN(INITIAL);
};
program {pwrap("PROGRAM"); return KW_PROGRAM;}
begin {pwrap("BEGIN"); return KW_BEGIN;}
end {pwrap("END"); return KW_END;}
and {pwrap("AND"); return KW_AND;}
array {pwrap("ARRAY"); return KW_ARRAY;}
boolean {pwrap("BOOLEAN"); return KW_BOOLEAN;}
char {pwrap("CHAR"); return KW_CHAR;}
div {pwrap("DIV"); return KW_DIV;}
do {pwrap("DO"); return KW_DO;}
else {pwrap("ELSE"); return KW_ELSE;}
for {pwrap("FOR"); return KW_FOR;}
function {pwrap("FUNCTION"); return KW_FUNCTION;}
goto {pwrap("GOTO"); return KW_GOTO;}
if {pwrap("IF"); return KW_IF;}
integer {pwrap("INTEGER"); return KW_INTEGER;}
var {pwrap("VAR"); return KW_VAR;}
mod {pwrap("MOD"); return KW_MOD;}
not {pwrap("NOT"); return KW_NOT;}
of {pwrap("OF"); return KW_OF;}
or {pwrap("OR"); return KW_OR;}
while {pwrap("WHILE"); return KW_WHILE;}
procedure {pwrap("PROCEDURE"); return KW_PROCEDURE;}
real {pwrap("REAL"); return KW_REAL;}
repeat {pwrap("REPEAT"); return KW_REPEAT;}
to {pwrap("TO"); return KW_TO;}
result {pwrap("RESULT"); return KW_RESULT;}
return {pwrap("RETURN"); return KW_RETURN;}
then {pwrap("THEN"); return KW_THEN;}
until {pwrap("UNTIL"); return KW_UNTIL;}
downto {pwrap("DOWNTO"); return KW_DOWNTO;}
type {pwrap("TYPE"); return KW_TYPE;}
true {pwrap("B_TRUE"); return KW_BOOL_TRUE;}
false {pwrap("B_FALSE"); return KW_BOOL_FALSE;}
/* module support */
module {pwrap("MODULE"); return KW_MODULE;}
use {pwrap("USE"); BEGIN(incl_module);}
/* handle module includes */
<incl_module>[ \t]* {/* eat whitespaces */}
<incl_module>[^ \t\n]+{ID} {
if(include_file() == NULL)
{yyerror("could not open include file");}
else {
if(verbose_flag) {
fprintf(stderr, "\n --\n\tincluding module: %s\n --\n",
yybuf_states[yylex_bufidx].fname);
}
}
BEGIN(INITIAL);
}
{ID} {
pwrap("IDENTIFIER");
char* def = get_macro(yytext);
if(def==NULL) {
yylval.crepr = strdup(yytext);
return IDENT;
}
if(verbose_flag) {
fprintf(stderr, "Line: %5d\ttoken: %15s\tText='%s'\n",
fetch_line_count(), "MACRO_CATCH", def);
}
for(int i=strlen(def); i>0; i--)
{unput(def[i-1]);}
}
{SNUMBER} {
pwrap("SNUMBER");
yylval.crepr = strdup(yytext);
return POSINT;
}
{REAL} {
pwrap("REAL_NUM");
yylval.crepr = strdup(yytext);
return REAL;
}
{STRING} {
pwrap("STRING");
yylval.crepr = strdup(yytext);
return STRING;
}
{STR_LIT} {
pwrap("STR_LIT");
yylval.crepr = strdup(yytext);
return STR_LIT;
}
/* arithmetic operators */
"+" {pwrap("OP_PLUS"); return KW_OP_PLUS;}
"-" {pwrap("OP_MINUS"); return KW_OP_MINUS;}
"*" {pwrap("OP_MUL"); return KW_OP_MUL;}
"/" {pwrap("OP_DIV"); return KW_OP_DIV;}
/* relational operators */
"=" {pwrap("OP_EQ"); return KW_EQ;}
"<>" {pwrap("OP_DIFF"); return KW_DIFF;}
"<=" {pwrap("OP_LESS_EQ"); return KW_LESS_EQ;}
"<" {pwrap("OP_LESS"); return KW_LESS;}
">=" {pwrap("OP_GREATER_EQ"); return KW_GREATER_EQ;}
">" {pwrap("OP_GREATER"); return KW_GREATER;}
/* logic operators (v2) */
"&&" {pwrap("OP_AND"); return KW_OP_AND;}
"||" {pwrap("OP_OR"); return KW_OP_OR;}
"!" {pwrap("OP_NOT"); return KW_OP_NOT;}
/* assignment operators */
":=" {pwrap("OP_ASSIGN"); return KW_OP_ASSIGN;}
/* (remaining) delimiters */
";" {pwrap("DEL_SEMICOLON"); return KW_SEMICOLON;}
"," {pwrap("DEL_COMMA"); return KW_COMMA;}
"." {pwrap("DEL_DOT"); return KW_DOT;}
"(" {pwrap("DEL_LPAR"); return KW_LPAR;}
")" {pwrap("DEL_RPAR"); return KW_RPAR;}
"[" {pwrap("DEL_LBRA"); return KW_LBRA;}
"]" {pwrap("DEL_RBRA"); return KW_RBRA;}
":" {pwrap("DEL_COLON"); return KW_COLON;}
/* handle single-line comments */
"//" {BEGIN(sl_comment);}
<sl_comment>[^\n] /* eat up anything up to newline */
<sl_comment>\n {
increment_line_count(); // increment line
BEGIN(INITIAL); // go back
}
/* handle multi-line comments */
"(*" {BEGIN(comment);}
<comment>[^*\n]* /* eat anything that's not a '*' */
<comment>"*"+[^*)\n]* /* eat up '*'s not followed by ')'s */
<comment>\n {increment_line_count();}
<comment>"*"+")" {BEGIN(INITIAL);}
[ \r\t] /* skip whitespace */
\n {increment_line_count();}//++line_num;
<<EOF>> {
/* pop one of the stacked buffers, if any */
if(!pop_delete_buffer()) {
if(yybuf_states)
{free(yybuf_states);}
flex_closure();
return EOF;
}
}
. {
/* makes again, valgrind happy -- even in failures */
yyerror("lexical error: unrecognized literal '%s'\n", yytext);
}
%%
/* Return true on success, false on failure */
bool
set_macro(char* name, char* def) {
/* check if the hash table is created already */
if(mac_ht == NULL) {
/* try to create it */
if((mac_ht = ht_create(max_macro, NULL)) == NULL)
/* return error, if we can't create it. */
{yyerror("\n -- Error: Hashtable creation failed"); return false;}
}
/* check if we can set it based on the limits */
if(mac_ht->stored_elements >= max_macro_max)
{yyerror("\n -- Error: Max hash table entries reached, adjust sizes"); return false;}
/* now check to find that macro */
if(!ht_set(mac_ht, name, def))
/* error, not inserted and no need to free */
{return false;}
else
/* successful insertion, free duplicates */
{free(name); free(def); return true;}
}
/* this is basically just a wrapper to ht_get */
char *
get_macro(char* name)
{return (ht_get(mac_ht, name));}
/* pop, delete and switch our current buffer to a previous one */
bool
pop_delete_buffer() {
/* check if we have available buffers to clear */
if(yylex_bufidx > 0) {
if(verbose_flag) {
fprintf(stderr, " --\n\tFinished including module: %s\n --\n",
yybuf_states[yylex_bufidx].fname);
}
/* clear the buffers */
fclose(yybuf_states[yylex_bufidx].state->yy_input_file);
yy_delete_buffer(yybuf_states[yylex_bufidx].state);
free(yybuf_states[yylex_bufidx].fname);
yybuf_states[yylex_bufidx].state = NULL;
yylex_bufidx--;
/* switch to a previous buffer */
yy_switch_to_buffer(yybuf_states[yylex_bufidx].state);
return true;
} else
/* if we don't have one, it's probably time to close */
{return false;}
}
/* include a file */
FILE *
include_file() {
if(yylex_bufidx >= yystack_depth-1) {
yyerror("yylex input buffer stack exhausted, current limit is: %d",
yystack_depth);
return NULL;
}
char *fname = template("%s.ptuc", yytext);
if(fname == NULL) {return NULL;}
/* assign the current include file pointer */
FILE *fptr = fopen(fname, "r");
/* return if we can't open */
if(!fptr) {
yyerror("lexical error: couldn't open %s module", yytext);
free(fname);
return NULL;
}
if(yybuf_states == NULL) {
if((yybuf_states = calloc(yystack_depth,
sizeof(*yybuf_states))) == NULL) {
yyerror("\n -- Error: Could not allocate buffer stack");
free(fname);
return NULL;
}
}
/* now save current state and set-up to switch to the next */
yybuf_states[yylex_bufidx].state = YY_CURRENT_BUFFER;
yylex_bufidx++;
yybuf_states[yylex_bufidx].state = yy_create_buffer(fptr, YY_BUF_SIZE);
yybuf_states[yylex_bufidx].fname = fname;
yybuf_states[yylex_bufidx].incl_lnum = 1;
/* switch the state */
yy_switch_to_buffer(yybuf_states[yylex_bufidx].state);
return fptr;
}