shithub: martian9

ref: 8b4ebe50739d76ce9591716e394ca68194f22245
dir: /reader.ml/

View raw version
module T = Types.Types

let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][  \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None

type reader =
  { form : Types.m9type
  ; tokens : string list
  }

type list_reader =
  { list_form : Types.m9type list
  ; tokens : string list
  }

let tokenize str =
  List.map
    (function
      | Str.Delim x -> String.trim x (* move trim to regex for speed? *)
      | Str.Text _ -> "tokenize botch")
    (List.filter
       (function
         | Str.Delim _ -> true
         | Str.Text _ -> false)
       (Str.full_split token_re str))
;;

let unescape_string token =
  if Str.string_match string_re token 0
  then (
    let without_quotes = String.sub token 1 (String.length token - 2) in
    Utils.gsub
      (Str.regexp "\\\\.")
      (function
        | "\\n" -> "\n"
        | x -> String.sub x 1 1)
      without_quotes)
  else raise (Utils.Syntax_error "unterminated string")
;;

let fix_pattern sym pattern =
  let tokenized_pattern = tokenize pattern in
  let new_pattern = ref [] in
  let rec replace_token tokens =
    match tokens with
    | [ token ] ->
      let t = if token = "_" then Printer.print sym true else token in
      new_pattern := !new_pattern @ [ t ];
      !new_pattern
    | token :: rest ->
      let t = if token = "_" then Printer.print sym true else token in
      new_pattern := !new_pattern @ [ t ];
      replace_token rest
    | _ -> raise (Utils.Syntax_error "unable to fix pattern")
  in
  replace_token (List.hd tokenized_pattern :: "define" :: List.tl tokenized_pattern)
;;

let read_atom token =
  match token with
  | "null" -> T.Nil
  | "#t" | "#true" -> T.Bool true
  | "#f" | "#false" -> T.Bool false
  | _ ->
    (match token.[0] with
    | '0' .. '9' -> Types.number (float_of_string token)
    | '#' ->
      (match token.[1], token.[2] with
      | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
      | _ -> Types.symbol token)
    | '-' ->
      (match String.length token with
      | 1 -> Types.symbol token
      | _ ->
        (match token.[1] with
        | '0' .. '9' -> Types.number (float_of_string token)
        | _ -> Types.symbol token))
    | '"' -> T.String (unescape_string token)
    | _ -> Types.symbol token)
;;

let rec read_list eol list_reader =
  if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "("
  then (
    match
      try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with
      | _ -> T.Nil
    with
    | T.List { T.value = _; T.meta } ->
      print_endline "XXXX MACRO FOUND";
      let rec collect_args tokens args =
        match tokens with
        | [ t ] -> args @ [ t ]
        | t :: ts -> if t = eol then args else collect_args ts args @ [ t ]
        | _ -> []
      in
      let args = collect_args (List.tl list_reader.tokens) [] in
      print_endline ("<><><> args: " ^ String.concat " " args);
      print_endline ("<><><><>: " ^ Macro.match_variant meta args)
    | _ -> ());
  match list_reader.tokens with
  | [] ->
    print_endline ("ERROR: " ^ Printer.dump list_reader.list_form);
    raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'"))
  | [ _ ] -> { list_form = list_reader.list_form; tokens = [ ")" ] }
  | token :: tokens ->
    if Str.string_match (Str.regexp eol) token 0
    then { list_form = list_reader.list_form; tokens }
    else (
      let reader = read_form list_reader.tokens in
      read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })

and read_quote sym tokens =
  let reader = read_form tokens in
  { form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }

and read_vector all_tokens =
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
    | _ -> read_form tokens)

and read_macro tokens =
  let macro = ref [] in
  let list_reader = read_list ")" { list_form = []; tokens } in
  print_endline ("MACRO: " ^ Printer.dump list_reader.list_form);
  (match list_reader.list_form with
  | sym :: rest ->
    print_endline ("  sym: " ^ Printer.print sym true);
    print_endline ("    rest: " ^ Printer.dump rest);
    (match rest with
    | [ T.List { T.value = T.Symbol { T.value = "syntax-rules"; meta = _ } :: literals :: clauses; meta = _ } ] ->
      let sanitized_clauses = Macro.generate_patterns sym clauses in
      print_endline ("sanitized: " ^ String.concat " " (List.map (fun x -> String.concat " " x) sanitized_clauses));
      let variants = Macro.generate_variants sym literals sanitized_clauses in
      let macro_entry = Types.macro sym literals (Types.list clauses) variants in
      Env.set registered_macros sym macro_entry;
      Types.M9map.iter
        (fun k v ->
          print_endline
            ("   >>> " ^ Printer.print k true ^ ":  " ^ String.concat " " (fix_pattern k (Printer.print v true)));
          macro := !macro @ fix_pattern k (Printer.print v true);
          Env.set registered_macros k (read_form (fix_pattern k (Printer.print v true))).form)
        variants
    | _ -> raise (Utils.Syntax_error "read_macro botch"))
  | _ as x -> print_endline ("  last rest: " ^ Printer.dump x));
  read_form !macro

and read_form all_tokens =
  (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "'" -> read_quote "quote" tokens
    | "`" -> read_quote "quasiquote" tokens
    | "#" -> read_vector tokens
    | "#|" ->
      let list_reader = read_list "|#" { list_form = []; tokens } in
      print_endline ("block comment: " ^ Printer.dump list_reader.list_form);
      { form = T.Unspecified; tokens = list_reader.tokens }
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.list list_reader.list_form; tokens = list_reader.tokens }
    | "" | "\t" | "\n" -> read_form tokens
    | "define-syntax" -> read_macro tokens
    | _ ->
      if token.[0] = ';'
      then (
        let list_reader = read_list "\\n" { list_form = []; tokens } in
        print_endline ("line comment: " ^ String.concat " " list_reader.tokens);
        { form = T.Unspecified; tokens = list_reader.tokens })
      else { form = read_atom token; tokens })
;;

let slurp filename =
  let chan = open_in filename in
  let b = Buffer.create 27 in
  Buffer.add_channel b chan (in_channel_length chan);
  close_in chan;
  Buffer.contents b
;;

let read str = (read_form (tokenize str)).form