ref: 0560b9b189c123d48e91231a3adf15016c0a49d9
dir: /reader.ml/
module T = Types.Types
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None
type reader = {form: Types.m9type; tokens: string list}
type list_reader = {list_form: Types.m9type list; tokens: string list}
let unescape_string token =
if Str.string_match string_re token 0 then
let without_quotes = String.sub token 1 (String.length token - 2) in
Utils.gsub (Str.regexp "\\\\.")
(function
| "\\n" -> "\n"
| x -> String.sub x 1 1 )
without_quotes
else raise (Utils.Syntax_error "unterminated string")
let trim_end list = List.rev (List.tl (List.rev list))
let rec replace_token tokens replacement block =
match tokens with
| [token] ->
let t = if token = "_" then replacement else token in
block := !block @ [t] ;
String.concat " " !block
| token :: rest ->
let t = if token = "_" then replacement else token in
block := !block @ [t] ;
replace_token rest replacement block
| _ -> String.concat " " !block
and fix_clause original sym clause =
print_endline (">>>>> fix_clause: incoming: " ^ Printer.to_string clause) ;
match clause with
| T.List {T.value= [T.List {T.value= pattern; meta= _}; T.List {T.value= transform; meta= _}]; meta= _} ->
let pattern = Utils.tokenize (Printer.dump pattern) in
let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in
let fixed_transform =
replace_token (Utils.tokenize (Printer.dump transform)) (Printer.to_string original) (ref []) in
["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; "("; fixed_transform; ")"; ")"; ")"]
| T.List {T.value= [T.List {T.value= pattern; meta= _}; atom]; meta= _} ->
let pattern = Utils.tokenize (Printer.dump pattern) in
let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in
["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; Printer.to_string atom; ")"; ")"]
| _ as e -> raise (Utils.Syntax_error ("fix_clause botch: " ^ Printer.to_string e))
let read_atom token =
match token with
| "null" -> T.Nil
| "#t"
|"#true" ->
T.Bool true
| "#f"
|"#false" ->
T.Bool false
| _ -> (
match token.[0] with
| '0' .. '9' -> Types.number (float_of_string token)
| '#' -> (
match (token.[1], token.[2]) with
| '\\', '0' .. '9'
|'\\', 'a' .. 'z'
|'\\', 'A' .. 'Z' ->
T.Char token.[2]
| _ -> Types.symbol token )
| '-' -> (
match String.length token with
| 1 -> Types.symbol token
| _ -> (
match token.[1] with
| '0' .. '9' -> Types.number (float_of_string token)
| _ -> Types.symbol token ) )
| '"' -> T.String (unescape_string token)
| _ -> Types.symbol token )
let rec read_list eol list_reader =
(* we need to replace macro calls with their variant symbols *)
let tweaked_tokens =
if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "(" then
let symbol_str = List.nth list_reader.tokens 1 in
let symbol = Types.symbol symbol_str in
match try Env.get registered_macros symbol with _ -> T.Nil with
| T.Macro {T.value= m; meta} ->
(* print_endline "XXXX MACRO FOUND" ; *)
print_endline ("XXXX MACRO: " ^ Printer.to_string m) ;
(* print_endline ("XXXX META: " ^ Printer.to_string meta); *)
(* print_endline ("XXXX TOKENS: " ^ String.concat " " list_reader.tokens) ; *)
let args = Macro.collect_args (List.tl list_reader.tokens) [] in
(* print_endline ("<><><> args: " ^ String.concat " " args) ; *)
let variant = Macro.match_variant symbol_str meta args in
print_endline ("<><><><>: " ^ variant) ;
(* List.map (fun s -> if s = Printer.to_string symbol then variant else s) (trim_end list_reader.tokens) *)
List.map (fun s -> if s = symbol_str then variant else s) (trim_end list_reader.tokens)
| _ -> list_reader.tokens
else list_reader.tokens in
(* print_endline ("TWEAKED_TOKENS: [" ^ String.concat " " tweaked_tokens ^ "]") ; *)
match tweaked_tokens with
| [] -> raise (Utils.Syntax_error ("read_list botch: '" ^ Printer.dump list_reader.list_form ^ "' eol: '" ^ eol ^ "'"))
| [_] -> {list_form= list_reader.list_form; tokens= [")"]}
| token :: tokens ->
if Str.string_match (Str.regexp eol) token 0 then {list_form= list_reader.list_form; tokens}
else
let reader = read_form tweaked_tokens in
(* print_endline ("token: " ^ token ^ " tokens: " ^ String.concat " " tokens); *)
read_list eol {list_form= list_reader.list_form @ [reader.form]; tokens= reader.tokens}
and read_quote sym tokens =
let reader = read_form tokens in
{form= Types.list [Types.symbol sym; reader.form]; tokens= reader.tokens}
and read_vector all_tokens =
match all_tokens with
| [] -> raise End_of_file
| token :: tokens -> (
match token with
| "(" ->
let list_reader = read_list ")" {list_form= []; tokens} in
{form= Types.vector list_reader.list_form; tokens= list_reader.tokens}
| _ -> read_form tokens )
and read_macro tokens =
let macro = ref [] in
let list_reader = read_list ")" {list_form= []; tokens} in
print_endline ("READ_MACRO: " ^ Printer.dump list_reader.list_form) ;
print_endline ("READ_MACRO tokens: " ^ String.concat " " list_reader.tokens) ;
( match list_reader.list_form with
| sym :: rest -> (
print_endline ("> sym: " ^ Printer.to_string sym) ;
print_endline (">> rest: " ^ Printer.dump rest) ;
match rest with
| [T.List {T.value= T.Symbol {T.value= "syntax-rules"; meta= _} :: literals :: clauses; meta= _}] ->
let sanitized_clauses = List.flatten (Macro.sanitize_clauses sym clauses) in
let variants = Macro.generate_variants sym literals sanitized_clauses in
let fixed_variants = ref Types.M9map.empty in
let transforms = ref Types.M9map.empty in
Types.M9map.iter (fun k v -> transforms := Types.M9map.add k (Utils.tokenize v) !transforms) variants ;
let fixed_clauses = ref [] in
Types.M9map.iter
(fun k v ->
let fixed_clause = fix_clause sym k (read_form (Utils.tokenize v)).form in
(* TODO: is this even used? *)
print_endline
(">>>> registering variant: " ^ Printer.print k true ^ ": " ^ String.concat " " fixed_clause) ;
macro := !macro @ fixed_clause ;
let parsed = (read_form fixed_clause).form in
fixed_clauses := !fixed_clauses @ [parsed] ;
fixed_variants := Types.M9map.add k parsed !fixed_variants ;
Env.set registered_macros k parsed )
variants ;
print_endline ("trying to parse macro: " ^ String.concat " " !macro) ;
let macro_entry = Types.macro sym literals T.Nil !fixed_variants in
Env.set registered_macros sym macro_entry ;
print_endline "finished"
| _ -> raise (Utils.Syntax_error "read_macro botch") )
| _ -> raise (Utils.Syntax_error "read_macro last rest botch") ) ;
print_endline ("SO HERE ARE THE MACRO VARIANTS: " ^ String.concat " " !macro) ;
let trimmed_macro = List.tl !macro in
let trimmed_tokens = trim_end list_reader.tokens in
print_endline ("TRIMMED_MACRO: " ^ String.concat " " trimmed_macro) ;
print_endline ("TRIMMED_TOKENS: " ^ String.concat " " trimmed_tokens) ;
print_endline ("TRIMMED_MACRO: " ^ String.concat " " (trimmed_macro @ trimmed_tokens)) ;
trimmed_macro @ trimmed_tokens
and read_form all_tokens =
(* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
match all_tokens with
| [] -> raise End_of_file
| token :: tokens -> (
match token with
| "'" -> read_quote "quote" tokens
| "`" -> read_quote "quasiquote" tokens
| "#" -> read_vector tokens
| "#|" ->
let list_reader = read_list "|#" {list_form= []; tokens} in
print_endline ("block comment: " ^ Printer.dump list_reader.list_form) ;
{form= T.Unspecified; tokens= list_reader.tokens}
| "(" ->
let list_reader = read_list ")" {list_form= []; tokens} in
{form= Types.list list_reader.list_form; tokens= list_reader.tokens}
| ""
|"\t"
|"\n" ->
read_form tokens
| "define-syntax" -> read_form (read_macro tokens)
| _ ->
if token.[0] = ';' then (
let list_reader = read_list "\\n" {list_form= []; tokens} in
print_endline ("line comment: " ^ String.concat " " list_reader.tokens) ;
{form= T.Unspecified; tokens= list_reader.tokens} )
else {form= read_atom token; tokens} )
let slurp filename =
let chan = open_in filename in
let b = Buffer.create 27 in
Buffer.add_channel b chan (in_channel_length chan) ;
close_in chan ;
Buffer.contents b
(* let read str = (read_form (tokenize str)).form *)
let read str =
let tokenized = Utils.tokenize str in
print_endline ("TOKENIZED: " ^ String.concat " " tokenized) ;
let form = (read_form tokenized).form in
let retoken = Utils.tokenize (Printer.to_string form) in
print_endline ("\nRETOKENIZED: " ^ String.concat " " retoken ^ "\n") ;
let reform = (read_form retoken).form in
print_endline ("\nFORM: " ^ Printer.to_string form) ;
reform