-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlexer.l
125 lines (97 loc) · 2.93 KB
/
lexer.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
%option noyywrap
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.c"
extern FILE *yyin;
extern FILE *yyout;
int lineno = 1; // initialize to 1
void ret_print(char *token_type);
void yyerror();
%}
%x ML_COMMENT
alpha [a-zA-Z]
digit [0-9]
alnum {alpha}|{digit}
print [ -~]
ID {alpha}+{alnum}*
ICONST "0"|[0-9]{digit}*
FCONST "0"|{digit}*"."{digit}+
CCONST (\'{print}\')|(\'\\[nftrbv]\')
STRING \"{print}*\"
%%
"//".* { printf("Eat up comment at line %d\n", lineno); }
"/*" { printf("Eat up comment from line %d ", lineno); BEGIN(ML_COMMENT); }
<ML_COMMENT>"*/" { printf("to line %d\n", lineno); BEGIN(INITIAL); }
<ML_COMMENT>[^*\n]+
<ML_COMMENT>"*"
<ML_COMMENT>"\n" { lineno += 1; }
"char"|"CHAR" { ret_print("KEYWORD_CHAR"); }
"int"|"INT" { ret_print("KEYWORD_INT"); }
"float"|"FLOAT" { ret_print("KEYWORD_FLOAT"); }
"double"|"DOUBLE" { ret_print("KEYWORD_DOUBLE"); }
"if"|"IF" { ret_print("KEYWORD_IF"); }
"else"|"ELSE" { ret_print("KEYWORD_ELSE"); }
"while"|"WHILE" { ret_print("KEYWORD_WHILE"); }
"for"|"FOR" { ret_print("KEYWORD_FOR"); }
"continue"|"CONTINUE" { ret_print("KEYWORD_CONTINUE"); }
"break"|"BREAK" { ret_print("KEYWORD_BREAK"); }
"void"|"VOID" { ret_print("KEYWORD_VOID"); }
"return"|"RETURN" { ret_print("KEYWORD_RETURN"); }
"+"|"-" { ret_print("ADDOP"); }
"*" { ret_print("MULOP"); }
"/" { ret_print("DIVOP"); }
"++"|"--" { ret_print("INCR"); }
"||" { ret_print("OROP"); }
"&&" { ret_print("ANDOP"); }
"!" { ret_print("NOTOP"); }
"=="|"!=" { ret_print("EQUOP"); }
">"|"<"|">="|"<=" { ret_print("RELOP"); }
"(" { ret_print("LPAREN"); }
")" { ret_print("RPAREN"); }
"]" { ret_print("LBRACK"); }
"[" { ret_print("RBRACK"); }
"{" { ret_print("LBRACE"); }
"}" { ret_print("RBRACE"); }
";" { ret_print("SEMI"); }
"." { ret_print("DOT"); }
"," { ret_print("COMMA"); }
"=" { ret_print("ASSIGN"); }
"&" { ret_print("REFER"); }
{ID} {
// insert identifier into symbol table
insert(yytext, strlen(yytext), UNDEF, lineno);
ret_print("ID");
}
{ICONST} { ret_print("ICONST"); }
{FCONST} { ret_print("FCONST"); }
{CCONST} { ret_print("CCONST"); }
{STRING} { ret_print("STRING"); }
"\n" { lineno += 1; }
[ \t\r\f]+ /* eat up whitespace */
. { yyerror("Unrecognized character"); }
%%
void ret_print(char *token_type){
printf("yytext: %s\ttoken: %s\tlineno: %d\n", yytext, token_type, lineno);
yyout = fopen("symtab_dump.txt", "w");
symtab_dump(yyout);
}
void yyerror(char *message){
printf("Error: \"%s\" in line %d. Token = %s\n", message, lineno, yytext);
exit(1);
}
int main(int argc, char *argv[]){
// initialize symbol table
init_hash_table();
// open input file
yyin = fopen(argv[1], "r");
// lexical analysis
yylex();
fclose(yyin);
// symbol table dump
yyout = fopen("symtab_dump.txt", "w");
symtab_dump(yyout);
fclose(yyout);
return 0;
}