shithub: martian9

ref: 60993540fa2f1383724705faf0796202250c63f6
dir: /reader.ml/

View raw version
module T = Types.Types

let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][  \n{}('\"`,;)]*"

let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""

let registered_macros = Env.make None

type reader = {form: Types.m9type; tokens: string list}

type list_reader = {list_form: Types.m9type list; tokens: string list}

let tokenize str =
  List.map
    (function Str.Delim x -> String.trim x (* move trim to regex for speed? *) | Str.Text _ -> "tokenize botch")
    (List.filter (function Str.Delim _ -> true | Str.Text _ -> false) (Str.full_split token_re str))

let unescape_string token =
  if Str.string_match string_re token 0 then
    let without_quotes = String.sub token 1 (String.length token - 2) in
    Utils.gsub (Str.regexp "\\\\.") (function "\\n" -> "\n" | x -> String.sub x 1 1) without_quotes
  else raise (Utils.Syntax_error "unterminated string")

let rec replace_token tokens replacement block =
  match tokens with
  | [token] ->
      let t = if token = "_" then replacement else token in
      block := !block @ [t] ;
      String.concat " " !block
  | token :: rest ->
      let t = if token = "_" then replacement else token in
      block := !block @ [t] ;
      replace_token rest replacement block
  | _ ->
      String.concat " " !block

(* raise (Utils.Syntax_error ("clause is unfixable: " ^ String.concat " " x)) *)
and fix_clause original sym clause =
  print_endline (" fix_clause: incoming: " ^ Printer.print clause true) ;
  match clause with
  | T.List {T.value= [T.List {T.value= pattern; meta= _}; T.List {T.value= transform; meta= _}]; meta= _} ->
      (* print_endline(" fix_clause: pattern: " ^ Printer.dump pattern ^ "  sym: " ^ Printer.to_string sym);
       * print_endline( " fix_clause: transform: " ^ Printer.dump transform ^ "  original: " ^ Printer.to_string original ^ " ???? " ^ String.concat "?" (tokenize (Printer.dump transform))); *)
      let pattern = tokenize (Printer.dump pattern) in
      let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in
      let fixed_transform = replace_token (tokenize (Printer.dump transform)) (Printer.to_string original) (ref []) in
      (* print_endline ("FIXED PATTERN: " ^ fixed_pattern);
       * print_endline ("FIXED TRANSFORM: " ^ fixed_transform); *)
      [ "("
      ; "define"
      ; Printer.print sym true
      ; "("
      ; "lambda"
      ; "("
      ; fixed_pattern
      ; ")"
      ; "("
      ; fixed_transform
      ; ")"
      ; ")"
      ; ")" ]
  | T.List {T.value= [T.List {T.value= pattern; meta= _}; atom]; meta= _} ->
      (* print_endline(" fix_clause (atom): pattern: " ^ Printer.dump pattern ^ "  sym: " ^ Printer.print sym true);
       * print_endline( "fix_clause: atom: " ^ Printer.to_string atom ^ "  original: " ^ Printer.print original true); *)
      let pattern = tokenize (Printer.dump pattern) in
      let fixed_pattern = replace_token (List.tl pattern) (Printer.to_string sym) (ref []) in
      ["("; "define"; Printer.to_string sym; "("; "lambda"; "("; fixed_pattern; ")"; Printer.to_string atom; ")"; ")"]
  | _ as e ->
      raise (Utils.Syntax_error ("fix_clause botch: " ^ Printer.to_string e))

let read_atom token =
  match token with
  | "null" ->
      T.Nil
  | "#t" | "#true" ->
      T.Bool true
  | "#f" | "#false" ->
      T.Bool false
  | _ -> (
    match token.[0] with
    | '0' .. '9' ->
        Types.number (float_of_string token)
    | '#' -> (
      match (token.[1], token.[2]) with
      | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' ->
          T.Char token.[2]
      | _ ->
          Types.symbol token )
    | '-' -> (
      match String.length token with
      | 1 ->
          Types.symbol token
      | _ -> (
        match token.[1] with '0' .. '9' -> Types.number (float_of_string token) | _ -> Types.symbol token ) )
    | '"' ->
        T.String (unescape_string token)
    | _ ->
        Types.symbol token )

let rec read_list eol list_reader =
  ( if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "(" then
    match try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with _ -> T.Nil with
    | T.List {T.value= _; T.meta} ->
        print_endline "XXXX MACRO FOUND" ;
        let rec collect_args tokens args =
          match tokens with
          | [t] ->
              args @ [t]
          | t :: ts ->
              if t = eol then args else collect_args ts args @ [t]
          | _ ->
              []
        in
        let args = collect_args (List.tl list_reader.tokens) [] in
        print_endline ("<><><> args: " ^ String.concat " " args) ;
        print_endline ("<><><><>: " ^ Macro.match_variant meta args)
    | _ ->
        () ) ;
  match list_reader.tokens with
  | [] ->
      print_endline ("ERROR: " ^ Printer.dump list_reader.list_form) ;
      raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'"))
  | [_] ->
      {list_form= list_reader.list_form; tokens= [")"]}
  | token :: tokens ->
      if Str.string_match (Str.regexp eol) token 0 then {list_form= list_reader.list_form; tokens}
      else
        let reader = read_form list_reader.tokens in
        read_list eol {list_form= list_reader.list_form @ [reader.form]; tokens= reader.tokens}

and read_quote sym tokens =
  let reader = read_form tokens in
  {form= Types.list [Types.symbol sym; reader.form]; tokens= reader.tokens}

and read_vector all_tokens =
  match all_tokens with
  | [] ->
      raise End_of_file
  | token :: tokens -> (
    match token with
    | "(" ->
        let list_reader = read_list ")" {list_form= []; tokens} in
        {form= Types.vector list_reader.list_form; tokens= list_reader.tokens}
    | _ ->
        read_form tokens )

and read_macro tokens =
  let macro = ref [] in
  let list_reader = read_list ")" {list_form= []; tokens} in
  print_endline ("MACRO: " ^ Printer.dump list_reader.list_form) ;
  ( match list_reader.list_form with
  | sym :: rest -> (
      print_endline ("  sym: " ^ Printer.print sym true) ;
      print_endline ("    rest: " ^ Printer.dump rest) ;
      match rest with
      | [T.List {T.value= T.Symbol {T.value= "syntax-rules"; meta= _} :: literals :: clauses; meta= _}] ->
          let sanitized_clauses = List.flatten (Macro.sanitize_clauses sym clauses) in
          print_endline ("   sanitized_clauses: " ^ String.concat "!" sanitized_clauses) ;
          let variants = Macro.generate_variants sym literals sanitized_clauses in
          Types.M9map.iter
            (fun k v ->
              print_endline ("   >>> " ^ Printer.print k true ^ ":  " ^ v) ;
              print_endline (" VARIANT ==> " ^ String.concat " " (fix_clause sym k (read_form (tokenize v)).form)) )
            variants ;
          let variant_map = ref Types.M9map.empty in
          Types.M9map.iter
            (fun k v -> variant_map := Types.M9map.add k (read_form (tokenize v)).form !variant_map)
            variants ;
          let macro_entry =
            Types.macro sym literals
              (Types.list (List.map (fun x -> (read_form (tokenize x)).form) sanitized_clauses))
              !variant_map
          in
          Env.set registered_macros sym macro_entry ;
          Types.M9map.iter
            (fun k v ->
              let fixed_clause = fix_clause sym k (read_form (tokenize v)).form in
              print_endline ("   >>> " ^ Printer.print k true ^ ":  " ^ String.concat " " fixed_clause) ;
              macro := !macro @ fixed_clause ;
              Env.set registered_macros k (read_form fixed_clause).form )
            variants
      (*   List.iter (fun x -> print_endline("<<<<< " ^ String.concat "." x)) (Macro.generate_patterns sym clauses);
       * let sanitized_clauses = List.map (fun x -> (read_form x).form) (Macro.generate_patterns sym clauses) in
       * (\* print_endline ("sanitized: " ^ String.concat " " (List.map (fun x -> String.concat " " x) sanitized_clauses)); *\)
       * print_endline ("sanitized: " ^ Printer.dump sanitized_clauses);
       * let variants = Macro.generate_variants sym literals sanitized_clauses in
       * let macro_entry = Types.macro sym literals (Types.list sanitized_clauses) variants in
       * Env.set registered_macros sym macro_entry;
       * Types.M9map.iter
       *   (fun k v ->
       *     print_endline
       *       ("   >>> " ^ Printer.print k true ^ ":  " ^ String.concat " " (fix_clause sym k v));
       *     macro := !macro @ fix_clause sym k v;
       *     Env.set registered_macros k (read_form (fix_clause sym k v)).form)
       *   variants *)
      | _ ->
          raise (Utils.Syntax_error "read_macro botch") )
  | _ as x ->
      print_endline ("  last rest: " ^ Printer.dump x) ) ;
  read_form !macro

and read_form all_tokens =
  (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
  match all_tokens with
  | [] ->
      raise End_of_file
  | token :: tokens -> (
    match token with
    | "'" ->
        read_quote "quote" tokens
    | "`" ->
        read_quote "quasiquote" tokens
    | "#" ->
        read_vector tokens
    | "#|" ->
        let list_reader = read_list "|#" {list_form= []; tokens} in
        print_endline ("block comment: " ^ Printer.dump list_reader.list_form) ;
        {form= T.Unspecified; tokens= list_reader.tokens}
    | "(" ->
        let list_reader = read_list ")" {list_form= []; tokens} in
        {form= Types.list list_reader.list_form; tokens= list_reader.tokens}
    | "" | "\t" | "\n" ->
        read_form tokens
    | "define-syntax" ->
        read_macro tokens
    | _ ->
        if token.[0] = ';' then (
          let list_reader = read_list "\\n" {list_form= []; tokens} in
          print_endline ("line comment: " ^ String.concat " " list_reader.tokens) ;
          {form= T.Unspecified; tokens= list_reader.tokens} )
        else {form= read_atom token; tokens} )

let slurp filename =
  let chan = open_in filename in
  let b = Buffer.create 27 in
  Buffer.add_channel b chan (in_channel_length chan) ;
  close_in chan ;
  Buffer.contents b

let read str = (read_form (tokenize str)).form