-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.mll
71 lines (58 loc) · 1.67 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
open Std
open Parser
exception Error of string * Lexing.position
let needs_semicolon = ref false
let needs_newline = ref false
let emit token =
begin match token with
| LEFT_BRACE
| SEMICOLON -> needs_semicolon := false
| _ -> needs_semicolon := true
end;
token
let lexing_error lexbuf =
let invalid_input = String.make 1 (Lexing.lexeme_char lexbuf 0) in
raise (Error (invalid_input, lexbuf.Lexing.lex_curr_p))
}
let white = ' ' | '\t'
let newline = "\r\n" | '\r' | '\n'
let digit = ['0'-'9']
let integer = digit+
let ident = ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '_' '0'-'9']*
rule token = parse
(* comments *)
| "//" [^ '\r' '\n' ]* { token lexbuf }
| "/*" { read_comment lexbuf; token lexbuf }
(* whitespace *)
| white+ { token lexbuf }
| newline {
Lexing.new_line lexbuf;
if !needs_semicolon then begin
needs_semicolon := false;
SEMICOLON
end
else token lexbuf
}
(* keywords *)
| "type" { emit TYPE }
| "struct" { emit STRUCT }
| "enum" { emit ENUM }
(* symbols *)
| '.' { emit DOT }
| ';' { emit SEMICOLON }
| '[' { emit LEFT_BRACKET }
| ']' { emit RIGHT_BRACKET }
| '{' { emit LEFT_BRACE }
| '}' { emit RIGHT_BRACE }
| '*' { emit STAR }
(* literals *)
| integer { emit (INTEGER (Lexing.lexeme lexbuf)) }
| ident { emit (IDENT (Lexing.lexeme lexbuf)) }
(* everything else *)
| eof { EOF }
| _ { lexing_error lexbuf }
and read_comment = parse
| "*/" { }
| newline { Lexing.new_line lexbuf; read_comment lexbuf }
| _ { read_comment lexbuf }