ref: 8b4ebe50739d76ce9591716e394ca68194f22245
dir: /reader.ml/
module T = Types.Types let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][ \n{}('\"`,;)]*" let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\"" let registered_macros = Env.make None type reader = { form : Types.m9type ; tokens : string list } type list_reader = { list_form : Types.m9type list ; tokens : string list } let tokenize str = List.map (function | Str.Delim x -> String.trim x (* move trim to regex for speed? *) | Str.Text _ -> "tokenize botch") (List.filter (function | Str.Delim _ -> true | Str.Text _ -> false) (Str.full_split token_re str)) ;; let unescape_string token = if Str.string_match string_re token 0 then ( let without_quotes = String.sub token 1 (String.length token - 2) in Utils.gsub (Str.regexp "\\\\.") (function | "\\n" -> "\n" | x -> String.sub x 1 1) without_quotes) else raise (Utils.Syntax_error "unterminated string") ;; let fix_pattern sym pattern = let tokenized_pattern = tokenize pattern in let new_pattern = ref [] in let rec replace_token tokens = match tokens with | [ token ] -> let t = if token = "_" then Printer.print sym true else token in new_pattern := !new_pattern @ [ t ]; !new_pattern | token :: rest -> let t = if token = "_" then Printer.print sym true else token in new_pattern := !new_pattern @ [ t ]; replace_token rest | _ -> raise (Utils.Syntax_error "unable to fix pattern") in replace_token (List.hd tokenized_pattern :: "define" :: List.tl tokenized_pattern) ;; let read_atom token = match token with | "null" -> T.Nil | "#t" | "#true" -> T.Bool true | "#f" | "#false" -> T.Bool false | _ -> (match token.[0] with | '0' .. '9' -> Types.number (float_of_string token) | '#' -> (match token.[1], token.[2] with | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2] | _ -> Types.symbol token) | '-' -> (match String.length token with | 1 -> Types.symbol token | _ -> (match token.[1] with | '0' .. '9' -> Types.number (float_of_string token) | _ -> Types.symbol token)) | '"' -> T.String (unescape_string token) | _ -> Types.symbol token) ;; let rec read_list eol list_reader = if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "(" then ( match try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with | _ -> T.Nil with | T.List { T.value = _; T.meta } -> print_endline "XXXX MACRO FOUND"; let rec collect_args tokens args = match tokens with | [ t ] -> args @ [ t ] | t :: ts -> if t = eol then args else collect_args ts args @ [ t ] | _ -> [] in let args = collect_args (List.tl list_reader.tokens) [] in print_endline ("<><><> args: " ^ String.concat " " args); print_endline ("<><><><>: " ^ Macro.match_variant meta args) | _ -> ()); match list_reader.tokens with | [] -> print_endline ("ERROR: " ^ Printer.dump list_reader.list_form); raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'")) | [ _ ] -> { list_form = list_reader.list_form; tokens = [ ")" ] } | token :: tokens -> if Str.string_match (Str.regexp eol) token 0 then { list_form = list_reader.list_form; tokens } else ( let reader = read_form list_reader.tokens in read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens }) and read_quote sym tokens = let reader = read_form tokens in { form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens } and read_vector all_tokens = match all_tokens with | [] -> raise End_of_file | token :: tokens -> (match token with | "(" -> let list_reader = read_list ")" { list_form = []; tokens } in { form = Types.vector list_reader.list_form; tokens = list_reader.tokens } | _ -> read_form tokens) and read_macro tokens = let macro = ref [] in let list_reader = read_list ")" { list_form = []; tokens } in print_endline ("MACRO: " ^ Printer.dump list_reader.list_form); (match list_reader.list_form with | sym :: rest -> print_endline (" sym: " ^ Printer.print sym true); print_endline (" rest: " ^ Printer.dump rest); (match rest with | [ T.List { T.value = T.Symbol { T.value = "syntax-rules"; meta = _ } :: literals :: clauses; meta = _ } ] -> let sanitized_clauses = Macro.generate_patterns sym clauses in print_endline ("sanitized: " ^ String.concat " " (List.map (fun x -> String.concat " " x) sanitized_clauses)); let variants = Macro.generate_variants sym literals sanitized_clauses in let macro_entry = Types.macro sym literals (Types.list clauses) variants in Env.set registered_macros sym macro_entry; Types.M9map.iter (fun k v -> print_endline (" >>> " ^ Printer.print k true ^ ": " ^ String.concat " " (fix_pattern k (Printer.print v true))); macro := !macro @ fix_pattern k (Printer.print v true); Env.set registered_macros k (read_form (fix_pattern k (Printer.print v true))).form) variants | _ -> raise (Utils.Syntax_error "read_macro botch")) | _ as x -> print_endline (" last rest: " ^ Printer.dump x)); read_form !macro and read_form all_tokens = (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *) match all_tokens with | [] -> raise End_of_file | token :: tokens -> (match token with | "'" -> read_quote "quote" tokens | "`" -> read_quote "quasiquote" tokens | "#" -> read_vector tokens | "#|" -> let list_reader = read_list "|#" { list_form = []; tokens } in print_endline ("block comment: " ^ Printer.dump list_reader.list_form); { form = T.Unspecified; tokens = list_reader.tokens } | "(" -> let list_reader = read_list ")" { list_form = []; tokens } in { form = Types.list list_reader.list_form; tokens = list_reader.tokens } | "" | "\t" | "\n" -> read_form tokens | "define-syntax" -> read_macro tokens | _ -> if token.[0] = ';' then ( let list_reader = read_list "\\n" { list_form = []; tokens } in print_endline ("line comment: " ^ String.concat " " list_reader.tokens); { form = T.Unspecified; tokens = list_reader.tokens }) else { form = read_atom token; tokens }) ;; let slurp filename = let chan = open_in filename in let b = Buffer.create 27 in Buffer.add_channel b chan (in_channel_length chan); close_in chan; Buffer.contents b ;; let read str = (read_form (tokenize str)).form