-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.mll
144 lines (134 loc) · 3.86 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
{
open Error
open Parser
open Lexing
open Format
let keyword_table = Hashtbl.create 72
let _ =
List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok)
[("boolean", BOOLEAN);
("class", CLASS);
("else", ELSE);
("extends", EXTENDS);
("false", FALSE);
("for", FOR);
("if", IF);
("instanceof", INSTANCEOF);
("int", INT);
("native", NATIVE);
("new", NEW);
("null", NULL);
("public", PUBLIC);
("return", RETURN);
("static", STATIC);
("this", THIS);
("true", TRUE);
("void", VOID);
("main", MAIN)
]
let current_pos lb =
(Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb)
let next_line lexbuf =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <-
{
pos with pos_bol = lexbuf.lex_curr_pos;
pos_lnum = pos.pos_lnum + 1
}
(*strbuf est utile pour read_string, lire une chaine entre guillemet*)
let strbuf = Buffer.create 80
}
let alpha = ['a'-'z''A'-'Z']
let chiffre = ['0'-'9']
let eol = ['\n''\r'] | "\r\n"
let whitespace = [' ''\t']
let ident = (alpha | '_')(alpha | '_' | chiffre)*
rule token = parse
(* Commentaire sur une ligne *)
| "//"[^'\n']*'\n' { token lexbuf }
(* Commentaire sur plusieurs lignes *)
| "/*" { comment (current_pos lexbuf) lexbuf }
(* increment du numero de ligne *)
| eol { next_line lexbuf; token lexbuf }
| whitespace { token lexbuf } (* Whitespace, rien a faire *)
| ident as id
{
try
Hashtbl.find keyword_table id
with
Not_found -> IDENT id
}
| chiffre+ as cnum {
try
let i = int_of_string cnum in
if i <= int_of_float (2. ** 31.)
then CONST ( Int32.of_int i )
else
error
(Lexical_error ("integer number too large " ^ cnum))
(current_pos lexbuf)
with Failure _ ->
error
(Lexical_error ("integer number too large " ^ cnum))
(current_pos lexbuf)
}
| '"' {
Buffer.reset strbuf;
read_string (current_pos lexbuf) lexbuf;
STRING (Buffer.contents strbuf)
}
| "++" { INC }
| "--" { DEC }
| '+' { PLUS }
| '*' { MULT }
| '-' { MINUS }
| '/' { DIV }
| '%' { MOD }
| "==" { EQ }
| '=' { AFFECT }
| '!' { NOT }
| "||" { OR }
| "&&" { AND }
| "!=" { NEQ }
| "<=" { LE }
| '<' { LT }
| ">=" { GE }
| '>' { GT }
| '(' { LPAR }
| ')' { RPAR }
| '.' { DOT }
| '{' { LEMB }
| '}' { REMB }
| ';' { SEMICOLON }
| ',' { COM }
| '[' { LBRA }
| ']' { RBRA }
| eof { EOF }
| _ {
error
(Lexical_error "Character not recognized")
(current_pos lexbuf)
}
and comment pos = parse (* Permet de donner plus d'info sur l'erreur *)
| "*/" { token lexbuf }
| eol { next_line lexbuf; comment pos lexbuf }
| eof { error (Lexical_error "Comment opened but not closed") pos}
| _ { comment pos lexbuf }
and read_string pos = parse
| "\\n" { Buffer.add_char strbuf '\n'; read_string pos lexbuf }
| "\\t" { Buffer.add_char strbuf '\t'; read_string pos lexbuf }
| "\\\"" { Buffer.add_char strbuf '"'; read_string pos lexbuf }
| "\\\\" { Buffer.add_char strbuf '\\'; read_string pos lexbuf }
| '"' { () }
| [' '-'~'] as c { Buffer.add_char strbuf c; read_string pos lexbuf }
| eof {
error
(Lexical_error "String not terminated")
(current_pos lexbuf)
}
| _ as c {
error
(Lexical_error
(sprintf "Illegal character '%c' in string" c))
(current_pos lexbuf)
}