forked from reactorlabs/sourir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.mll
85 lines (78 loc) · 1.91 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
open Parser
exception Error of string * Lexing.position
let keyword_table = [
"var", VAR;
"branch", BRANCH;
"goto", GOTO;
"print", PRINT;
"assert", ASSERT;
"osr", OSR;
"stop", STOP;
"read", READ;
"drop", DROP;
"return", RETURN;
"call", CALL;
"version", VERSION;
"function", FUNCTION;
"array", ARRAY;
"length", LENGTH;
]
let id_or_keyword id =
match List.assoc id keyword_table with
| exception Not_found -> IDENTIFIER id
| kwd -> kwd
let comment_of_string str =
let buf = Buffer.create 10 in
(* start at 1: skip the '#' character *)
for i = 1 to String.length str - 1 do
if str.[i] <> '\r'
then Buffer.add_char buf str.[i];
done;
Buffer.contents buf
let lexing_error lexbuf =
let invalid_input = String.make 1 (Lexing.lexeme_char lexbuf 0) in
raise (Error (invalid_input, lexbuf.Lexing.lex_curr_p))
}
let int_literal = '-'? ['0'-'9'] ['0'-'9']*
let blank = [' ' '\t']+
let newline = ('\r'* '\n')
let id = ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule token = parse
| newline { Lexing.new_line lexbuf; NEWLINE }
| blank+ { token lexbuf }
| int_literal { INT (int_of_string (Lexing.lexeme lexbuf)) }
| "#" [^ '\n']* { COMMENT (comment_of_string (Lexing.lexeme lexbuf)) }
| "nil" { NIL }
| "true" { BOOL true }
| "false" { BOOL false }
| id { id_or_keyword (Lexing.lexeme lexbuf) }
| "==" { DOUBLE_EQUAL }
| "!=" { NOT_EQUAL }
| "<" { LT }
| "<=" { LTE }
| ">" { GT }
| ">=" { GTE }
| "+" { PLUS }
| "-" { MINUS }
| "*" { TIMES }
| "/" { DIVIDE }
| "%" { MOD }
| "&&" { DOUBLE_AMP }
| "||" { DOUBLE_PIPE }
| "!" { BANG }
| "[" { LBRACKET }
| "]" { RBRACKET }
| "(" { LPAREN }
| ")" { RPAREN }
| "{" { LBRACE }
| "}" { RBRACE }
| "," { COMMA }
| "..." { TRIPLE_DOT }
| ":" { COLON }
| "$" { DOLLAR }
| "=" { EQUAL }
| "<-" { LEFTARROW }
| "'" { SINGLE_QUOTE }
| eof { EOF }
| _ { lexing_error lexbuf }