ref: 60993540fa2f1383724705faf0796202250c63f6
dir: /reader.ml/
module T = Types.Types let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][ \n{}('\"`,;)]*" let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\"" let registered_macros = Env.make None type reader = {form: Types.m9type; tokens: string list} type list_reader = {list_form: Types.m9type list; tokens: string list} let tokenize str = List.map (function Str.Delim x -> String.trim x (* move trim to regex for speed? *) | Str.Text _ -> "tokenize botch") (List.filter (function Str.Delim _ -> true | Str.Text _ -> false) (Str.full_split token_re str)) let unescape_string token = if Str.string_match string_re token 0 then let without_quotes = String.sub token 1 (String.length token - 2) in Utils.gsub (Str.regexp "\\\\.") (function "\\n" -> "\n" | x -> String.sub x 1 1) without_quotes else raise (Utils.Syntax_error "unterminated string") let rec replace_token tokens replacement block = match tokens with | [token] -> let t = if token = "_" then replacement else token in block := !block @ [t] ; String.concat " " !block | token :: rest -> let t = if token = "_" then replacement else token in block := !block @ [t] ; replace_token rest replacement block | _ -> String.concat " " !block (* raise (Utils.Syntax_error ("clause is unfixable: " ^ String.concat " " x)) *) and fix_clause original sym clause = print_endline (" fix_clause: incoming: " ^ Printer.print clause true) ; match clause with | T.List {T.value= [T.List {T.value= pattern; meta= _}; T.List {T.value= transform; meta= _}]; meta= _} -> (* print_endline(" fix_clause: pattern: " ^ Printer.dump pattern ^ " sym: " ^ Printer.to_string sym); * print_endline( " fix_clause: transform: " ^ Printer.dump transform ^ " original: " ^ Printer.to_string original ^ " ???? " ^ String.concat "?" (tokenize (Printer.dump transform))); *) let pattern = tokenize (Printer.dump pattern) in let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in let fixed_transform = replace_token (tokenize (Printer.dump transform)) (Printer.to_string original) (ref []) in (* print_endline ("FIXED PATTERN: " ^ fixed_pattern); * print_endline ("FIXED TRANSFORM: " ^ fixed_transform); *) [ "(" ; "define" ; Printer.print sym true ; "(" ; "lambda" ; "(" ; fixed_pattern ; ")" ; "(" ; fixed_transform ; ")" ; ")" ; ")" ] | T.List {T.value= [T.List {T.value= pattern; meta= _}; atom]; meta= _} -> (* print_endline(" fix_clause (atom): pattern: " ^ Printer.dump pattern ^ " sym: " ^ Printer.print sym true); * print_endline( "fix_clause: atom: " ^ Printer.to_string atom ^ " original: " ^ Printer.print original true); *) let pattern = tokenize (Printer.dump pattern) in let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in ["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; Printer.to_string atom; ")"; ")"] | _ as e -> raise (Utils.Syntax_error ("fix_clause botch: " ^ Printer.to_string e)) let read_atom token = match token with | "null" -> T.Nil | "#t" | "#true" -> T.Bool true | "#f" | "#false" -> T.Bool false | _ -> ( match token.[0] with | '0' .. '9' -> Types.number (float_of_string token) | '#' -> ( match (token.[1], token.[2]) with | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2] | _ -> Types.symbol token ) | '-' -> ( match String.length token with | 1 -> Types.symbol token | _ -> ( match token.[1] with '0' .. '9' -> Types.number (float_of_string token) | _ -> Types.symbol token ) ) | '"' -> T.String (unescape_string token) | _ -> Types.symbol token ) let rec read_list eol list_reader = ( if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "(" then match try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with _ -> T.Nil with | T.List {T.value= _; T.meta} -> print_endline "XXXX MACRO FOUND" ; let rec collect_args tokens args = match tokens with | [t] -> args @ [t] | t :: ts -> if t = eol then args else collect_args ts args @ [t] | _ -> [] in let args = collect_args (List.tl list_reader.tokens) [] in print_endline ("<><><> args: " ^ String.concat " " args) ; print_endline ("<><><><>: " ^ Macro.match_variant meta args) | _ -> () ) ; match list_reader.tokens with | [] -> print_endline ("ERROR: " ^ Printer.dump list_reader.list_form) ; raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'")) | [_] -> {list_form= list_reader.list_form; tokens= [")"]} | token :: tokens -> if Str.string_match (Str.regexp eol) token 0 then {list_form= list_reader.list_form; tokens} else let reader = read_form list_reader.tokens in read_list eol {list_form= list_reader.list_form @ [reader.form]; tokens= reader.tokens} and read_quote sym tokens = let reader = read_form tokens in {form= Types.list [Types.symbol sym; reader.form]; tokens= reader.tokens} and read_vector all_tokens = match all_tokens with | [] -> raise End_of_file | token :: tokens -> ( match token with | "(" -> let list_reader = read_list ")" {list_form= []; tokens} in {form= Types.vector list_reader.list_form; tokens= list_reader.tokens} | _ -> read_form tokens ) and read_macro tokens = let macro = ref [] in let list_reader = read_list ")" {list_form= []; tokens} in print_endline ("MACRO: " ^ Printer.dump list_reader.list_form) ; ( match list_reader.list_form with | sym :: rest -> ( print_endline (" sym: " ^ Printer.print sym true) ; print_endline (" rest: " ^ Printer.dump rest) ; match rest with | [T.List {T.value= T.Symbol {T.value= "syntax-rules"; meta= _} :: literals :: clauses; meta= _}] -> let sanitized_clauses = List.flatten (Macro.sanitize_clauses sym clauses) in print_endline (" sanitized_clauses: " ^ String.concat "!" sanitized_clauses) ; let variants = Macro.generate_variants sym literals sanitized_clauses in Types.M9map.iter (fun k v -> print_endline (" >>> " ^ Printer.print k true ^ ": " ^ v) ; print_endline (" VARIANT ==> " ^ String.concat " " (fix_clause sym k (read_form (tokenize v)).form)) ) variants ; let variant_map = ref Types.M9map.empty in Types.M9map.iter (fun k v -> variant_map := Types.M9map.add k (read_form (tokenize v)).form !variant_map) variants ; let macro_entry = Types.macro sym literals (Types.list (List.map (fun x -> (read_form (tokenize x)).form) sanitized_clauses)) !variant_map in Env.set registered_macros sym macro_entry ; Types.M9map.iter (fun k v -> let fixed_clause = fix_clause sym k (read_form (tokenize v)).form in print_endline (" >>> " ^ Printer.print k true ^ ": " ^ String.concat " " fixed_clause) ; macro := !macro @ fixed_clause ; Env.set registered_macros k (read_form fixed_clause).form ) variants (* List.iter (fun x -> print_endline("<<<<< " ^ String.concat "." x)) (Macro.generate_patterns sym clauses); * let sanitized_clauses = List.map (fun x -> (read_form x).form) (Macro.generate_patterns sym clauses) in * (\* print_endline ("sanitized: " ^ String.concat " " (List.map (fun x -> String.concat " " x) sanitized_clauses)); *\) * print_endline ("sanitized: " ^ Printer.dump sanitized_clauses); * let variants = Macro.generate_variants sym literals sanitized_clauses in * let macro_entry = Types.macro sym literals (Types.list sanitized_clauses) variants in * Env.set registered_macros sym macro_entry; * Types.M9map.iter * (fun k v -> * print_endline * (" >>> " ^ Printer.print k true ^ ": " ^ String.concat " " (fix_clause sym k v)); * macro := !macro @ fix_clause sym k v; * Env.set registered_macros k (read_form (fix_clause sym k v)).form) * variants *) | _ -> raise (Utils.Syntax_error "read_macro botch") ) | _ as x -> print_endline (" last rest: " ^ Printer.dump x) ) ; read_form !macro and read_form all_tokens = (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *) match all_tokens with | [] -> raise End_of_file | token :: tokens -> ( match token with | "'" -> read_quote "quote" tokens | "`" -> read_quote "quasiquote" tokens | "#" -> read_vector tokens | "#|" -> let list_reader = read_list "|#" {list_form= []; tokens} in print_endline ("block comment: " ^ Printer.dump list_reader.list_form) ; {form= T.Unspecified; tokens= list_reader.tokens} | "(" -> let list_reader = read_list ")" {list_form= []; tokens} in {form= Types.list list_reader.list_form; tokens= list_reader.tokens} | "" | "\t" | "\n" -> read_form tokens | "define-syntax" -> read_macro tokens | _ -> if token.[0] = ';' then ( let list_reader = read_list "\\n" {list_form= []; tokens} in print_endline ("line comment: " ^ String.concat " " list_reader.tokens) ; {form= T.Unspecified; tokens= list_reader.tokens} ) else {form= read_atom token; tokens} ) let slurp filename = let chan = open_in filename in let b = Buffer.create 27 in Buffer.add_channel b chan (in_channel_length chan) ; close_in chan ; Buffer.contents b let read str = (read_form (tokenize str)).form