shithub: martian9

ref: bdb99b496ec68880effa7df50fb4b05e7c1799bc
dir: /reader.ml/

View raw version
module T = Types.Types

let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None

type reader = {form: Types.m9type; tokens: string list}
type list_reader = {list_form: Types.m9type list; tokens: string list}

let unescape_string token =
  if Str.string_match string_re token 0 then
    let without_quotes = String.sub token 1 (String.length token - 2) in
    Utils.gsub (Str.regexp "\\\\.") (function "\\n" -> "\n" | x -> String.sub x 1 1) without_quotes
  else raise (Utils.Syntax_error "unterminated string")

let trim_end list = List.rev (List.tl (List.rev list))

let rec replace_token tokens replacement block =
  match tokens with
  | [token] ->
      let t = if token = "_" then replacement else token in
      block := !block @ [t] ;
      String.concat " " !block
  | token :: rest ->
      let t = if token = "_" then replacement else token in
      block := !block @ [t] ;
      replace_token rest replacement block
  | _ -> String.concat " " !block

and fix_clause original sym clause =
  print_endline (">>>>> fix_clause: incoming: " ^ Printer.to_string clause) ;
  match clause with
  | T.List {T.value= [T.List {T.value= pattern; meta= _}; T.List {T.value= transform; meta= _}]; meta= _} ->
      let pattern = Utils.tokenize (Printer.dump pattern) in
      let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in
      let fixed_transform =
        replace_token (Utils.tokenize (Printer.dump transform)) (Printer.to_string original) (ref []) in
      ["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; "("; fixed_transform; ")"; ")"; ")"]
  | T.List {T.value= [T.List {T.value= pattern; meta= _}; atom]; meta= _} ->
      let pattern = Utils.tokenize (Printer.dump pattern) in
      let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in
      ["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; Printer.to_string atom; ")"; ")"]
  | _ as e -> raise (Utils.Syntax_error ("fix_clause botch: " ^ Printer.to_string e))

let read_atom token =
  match token with
  | "null" -> T.Nil
  | "#t" | "#true" -> T.Bool true
  | "#f" | "#false" -> T.Bool false
  | _ -> (
    match token.[0] with
    | '0' .. '9' -> Types.number (float_of_string token)
    | '#' -> (
      match (token.[1], token.[2]) with
      | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
      | _ -> Types.symbol token )
    | '-' -> (
      match String.length token with
      | 1 -> Types.symbol token
      | _ -> ( match token.[1] with '0' .. '9' -> Types.number (float_of_string token) | _ -> Types.symbol token ) )
    | '"' -> T.String (unescape_string token)
    | _ -> Types.symbol token )

let rec read_list eol list_reader =
  (* we need to replace macro calls with their variant symbols *)
  let tweaked_tokens =
    if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "(" then
      let symbol_str = List.nth list_reader.tokens 1 in
      let symbol = Types.symbol symbol_str in
      match try Env.get registered_macros symbol with _ -> T.Nil with
      | T.Macro {T.value= m; meta} ->
          (* print_endline "XXXX MACRO FOUND" ; *)
          print_endline ("XXXX MACRO: " ^ Printer.to_string m) ;
          (* print_endline ("XXXX META: " ^ Printer.to_string meta); *)
          (* print_endline ("XXXX TOKENS: " ^ String.concat " " list_reader.tokens) ; *)
          let args = Macro.collect_args (List.tl list_reader.tokens) [] in
          (* print_endline ("<><><> args: " ^ String.concat " " args) ; *)
          let variant = Macro.match_variant symbol_str meta args in
          print_endline ("<><><><>: " ^ variant) ;
          (* List.map (fun s -> if s = Printer.to_string symbol then variant else s) (trim_end list_reader.tokens) *)
          List.map (fun s -> if s = symbol_str then variant else s) (trim_end list_reader.tokens)
      | _ -> list_reader.tokens
    else list_reader.tokens in
  (* print_endline ("TWEAKED_TOKENS: [" ^ String.concat " " tweaked_tokens ^ "]") ; *)
  match tweaked_tokens with
  | [] -> raise (Utils.Syntax_error ("read_list botch: '" ^ Printer.dump list_reader.list_form ^ "' eol: '" ^ eol ^ "'"))
  | [_] -> {list_form= list_reader.list_form; tokens= [")"]}
  | token :: tokens ->
      if Str.string_match (Str.regexp eol) token 0 then {list_form= list_reader.list_form; tokens}
      else
        let reader = read_form tweaked_tokens in
        (* print_endline ("token: " ^ token ^ "  tokens: " ^ String.concat " " tokens); *)
        read_list eol {list_form= list_reader.list_form @ [reader.form]; tokens= reader.tokens}

and read_quote sym tokens =
  let reader = read_form tokens in
  {form= Types.list [Types.symbol sym; reader.form]; tokens= reader.tokens}

and read_vector all_tokens =
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens -> (
    match token with
    | "(" ->
        let list_reader = read_list ")" {list_form= []; tokens} in
        {form= Types.vector list_reader.list_form; tokens= list_reader.tokens}
    | _ -> read_form tokens )

and read_macro tokens =
  let macro = ref [] in
  let list_reader = read_list ")" {list_form= []; tokens} in
  print_endline ("READ_MACRO: " ^ Printer.dump list_reader.list_form) ;
  print_endline ("READ_MACRO tokens: " ^ String.concat " " list_reader.tokens) ;
  ( match list_reader.list_form with
  | sym :: rest -> (
      print_endline ("> sym: " ^ Printer.to_string sym) ;
      print_endline (">> rest: " ^ Printer.dump rest) ;
      match rest with
      | [T.List {T.value= T.Symbol {T.value= "syntax-rules"; meta= _} :: literals :: clauses; meta= _}] ->
          let sanitized_clauses = List.flatten (Macro.sanitize_clauses sym clauses) in
          let variants = Macro.generate_variants sym literals sanitized_clauses in
          let fixed_variants = ref Types.M9map.empty in
          let transforms = ref Types.M9map.empty in
          Types.M9map.iter (fun k v -> transforms := Types.M9map.add k (Utils.tokenize v) !transforms) variants ;
          let fixed_clauses = ref [] in
          Types.M9map.iter
            (fun k v ->
              let fixed_clause = fix_clause sym k (read_form (Utils.tokenize v)).form in
              (* TODO: is this even used? *)
              print_endline
                (">>>> registering variant: " ^ Printer.print k true ^ ":  " ^ String.concat " " fixed_clause) ;
              macro := !macro @ fixed_clause ;
              let parsed = (read_form fixed_clause).form in
              fixed_clauses := !fixed_clauses @ [parsed] ;
              fixed_variants := Types.M9map.add k parsed !fixed_variants ;
              Env.set registered_macros k parsed )
            variants ;
          print_endline ("trying to parse macro: " ^ String.concat " " !macro) ;
          let macro_entry = Types.macro sym literals T.Nil !fixed_variants in
          Env.set registered_macros sym macro_entry ;
          print_endline "finished"
      | _ -> raise (Utils.Syntax_error "read_macro botch") )
  | _ -> raise (Utils.Syntax_error "read_macro last rest botch") ) ;
  print_endline ("SO HERE ARE THE MACRO VARIANTS: " ^ String.concat " " !macro) ;
  let trimmed_macro = List.tl !macro in
  let trimmed_tokens = trim_end list_reader.tokens in
  print_endline ("TRIMMED_MACRO: " ^ String.concat " " trimmed_macro) ;
  print_endline ("TRIMMED_TOKENS: " ^ String.concat " " trimmed_tokens) ;
  print_endline ("TRIMMED_MACRO: " ^ String.concat " " (trimmed_macro @ trimmed_tokens)) ;
  trimmed_macro @ trimmed_tokens

and read_form all_tokens =
  (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens -> (
    match token with
    | "'" -> read_quote "quote" tokens
    | "`" -> read_quote "quasiquote" tokens
    | "#" -> read_vector tokens
    | "#|" ->
        let list_reader = read_list "|#" {list_form= []; tokens} in
        print_endline ("block comment: " ^ Printer.dump list_reader.list_form) ;
        {form= T.Unspecified; tokens= list_reader.tokens}
    | "(" ->
        let list_reader = read_list ")" {list_form= []; tokens} in
        {form= Types.list list_reader.list_form; tokens= list_reader.tokens}
    | "" | "\t" | "\n" -> read_form tokens
    | "define-syntax" -> read_form (read_macro tokens)
    | _ ->
        if token.[0] = ';' then (
          let list_reader = read_list "\\n" {list_form= []; tokens} in
          print_endline ("line comment: " ^ String.concat " " list_reader.tokens) ;
          {form= T.Unspecified; tokens= list_reader.tokens} )
        else {form= read_atom token; tokens} )

let slurp filename =
  let chan = open_in filename in
  let b = Buffer.create 27 in
  Buffer.add_channel b chan (in_channel_length chan) ;
  close_in chan ;
  Buffer.contents b

(* let read str = (read_form (tokenize str)).form *)
let read str =
  let tokenized = Utils.tokenize str in
  print_endline ("TOKENIZED: " ^ String.concat " " tokenized) ;
  let form = (read_form tokenized).form in
  let retoken = Utils.tokenize (Printer.to_string form) in
  print_endline ("\nRETOKENIZED: " ^ String.concat " " retoken ^ "\n") ;
  let reform = (read_form retoken).form in
  print_endline ("\nFORM: " ^ Printer.to_string form) ;
  reform