shithub: martian9

ref: cc10399b3b05fc3fcda7f04afe9b34fffa7f743a
dir: /reader.ml/

View raw version
module T = Types.Types

let token_re =
  Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\|[^][  \n{}('\"`,;)]*"
;;

let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""

type reader =
  { form : Types.m9type
  ; tokens : string list
  }

type list_reader =
  { list_form : Types.m9type list
  ; tokens : string list
  }

let tokenize str =
  List.map
    (function
      | Str.Delim x -> x
      | Str.Text x -> "tokenize botch")
    (List.filter
       (function
         | Str.Delim x -> true
         | Str.Text x -> false)
       (Str.full_split token_re str))
;;

let read_atom token =
  match token with
  | "null" -> T.Nil
  | "#t" | "#true" -> T.Bool true
  | "#f" | "#false" -> T.Bool false
  | _ ->
    (match token.[0] with
    | '0' .. '9' -> Types.number (float_of_string token)
    | '-' ->
      (match String.length token with
      | 1 -> Types.symbol token
      | _ ->
        (match token.[1] with
        | '0' .. '9' -> Types.number (float_of_string token)
        | _ -> Types.symbol token))
    | '"' -> T.String token (* TODO: unescape *)
    | _ -> Types.symbol token)
;;

let rec read_list eol list_reader =
  match list_reader.tokens with
  | [] ->
    print_endline "unexpected EOF";
    raise End_of_file
  | token :: tokens ->
    if Str.string_match (Str.regexp eol) token 0
    then { list_form = list_reader.list_form; tokens }
    else (
      let reader = read_form list_reader.tokens in
      read_list
        eol
        { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })

and read_form all_tokens =
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.list list_reader.list_form; tokens = list_reader.tokens }
    | "#|" ->
      let list_reader = read_list "|#" { list_form = []; tokens } in
      { form = T.Comment; tokens = list_reader.tokens }
    | _ ->
      if token.[0] = ';' then read_form tokens else { form = read_atom token; tokens })
;;

let read_str str = (read_form (tokenize str)).form