ref: 264e6b67b3caa3be6c4a23fca1a81adddc9d2bac
dir: /reader.ml/
module T = Types.Types
exception Syntax_error of string
let token_re =
Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\|[^][ \n{}('\"`,;)]*"
;;
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
type reader =
{ form : Types.m9type
; tokens : string list
}
type list_reader =
{ list_form : Types.m9type list
; tokens : string list
}
let tokenize str =
List.map
(function
| Str.Delim x -> String.trim x (* move trim to regex for speed? *)
| Str.Text x -> "tokenize botch")
(List.filter
(function
| Str.Delim x -> true
| Str.Text x -> false)
(Str.full_split token_re str))
;;
(* copied verbatim - must needs grok *)
let gsub re f str =
String.concat
""
(List.map
(function
| Str.Delim x -> f x
| Str.Text x -> x)
(Str.full_split re str))
;;
let unescape_string token =
if Str.string_match string_re token 0
then (
let without_quotes = String.sub token 1 (String.length token - 2) in
gsub
(Str.regexp "\\\\.")
(function
| "\\n" -> "\n"
| x -> String.sub x 1 1)
without_quotes)
else (
output_string stderr "expected '\"', got EOF\n";
flush stderr;
raise End_of_file)
;;
let read_atom token =
match token with
| "null" -> T.Nil
| "#t" | "#true" -> T.Bool true
| "#f" | "#false" -> T.Bool false
| _ ->
(match token.[0] with
| '0' .. '9' -> Types.number (float_of_string token)
| '#' ->
(match token.[1], token.[2] with
| '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
| _ -> Types.symbol token)
| '-' ->
(match String.length token with
| 1 -> Types.symbol token
| _ ->
(match token.[1] with
| '0' .. '9' -> Types.number (float_of_string token)
| _ -> Types.symbol token))
| '"' -> T.String (unescape_string token)
| _ -> Types.symbol token)
;;
let rec read_list eol list_reader =
match list_reader.tokens with
| [] -> raise (Syntax_error ("unterminated '" ^ eol ^ "'"))
| token :: tokens ->
if Str.string_match (Str.regexp eol) token 0
then { list_form = list_reader.list_form; tokens }
else (
let reader = read_form list_reader.tokens in
read_list
eol
{ list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })
and read_quote sym tokens =
let reader = read_form tokens in
{ form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }
and read_vector all_tokens =
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
| _ -> read_form tokens)
and read_form all_tokens =
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "'" -> read_quote "quote" tokens
| "`" -> read_quote "quasiquote" tokens
| "#" -> read_vector tokens
| "#|" ->
let list_reader = read_list "|#" { list_form = []; tokens } in
{ form = T.Comment; tokens = list_reader.tokens }
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.list list_reader.list_form; tokens = list_reader.tokens }
| "" | "\t" | "\n" -> read_form tokens
| _ ->
if token.[0] = ';' then read_form tokens else { form = read_atom token; tokens })
;;
let slurp filename =
let chan = open_in filename in
let b = Buffer.create 27 in
Buffer.add_channel b chan (in_channel_length chan);
close_in chan;
Buffer.contents b
;;
let read str = (read_form (tokenize str)).form