ref: 489868c4e3e09955b41506806168e1283400f06c
dir: /desereter.ml/
let entry_rgx = Str.regexp "\\([.a-z'-]+\\)(?[0-9]?)? \\(.*\\)" type wordset = {prefix: string; word: string; suffix: string} type runesets = DESERET | FUTHORC let is_uppercase = function | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' |'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' |'Y' | 'Z' -> true | _ -> false let get_vowel vowel vowels = if String.length vowel != 3 then ("", "") else let trimmed = String.sub vowel 0 2 in try Hashtbl.find vowels trimmed with Not_found -> ("", "") let get_char c runes = let vowels, consonants = match runes with | DESERET -> (Deseret.vowels, Deseret.consonants) | FUTHORC -> (Futhorc.vowels, Futhorc.consonants) in try Hashtbl.find consonants c with Not_found -> get_vowel c vowels let rec parse_arpabet line des uppercase runes = match line with | hd :: tl -> let u, l = get_char hd runes in (if uppercase then u else l) ^ parse_arpabet tl des false runes | [] -> des let unquoted word = String.sub word 1 (String.length word - 2) let consider word = let wrd = ref (String.lowercase_ascii word) in let prefix = ref "" in let suffix = ref "" in ( try let pos = Str.search_forward (Str.regexp "[({\"]") !wrd 0 + 1 in wrd := String.sub word pos (String.length !wrd - pos) ; prefix := String.sub word 0 pos with Not_found -> () ) ; ( try let pos = Str.search_backward (Str.regexp "[})\"\\.,!;:]") !wrd (String.length !wrd) in suffix := String.sub !wrd pos (String.length !wrd - pos) ; wrd := String.sub !wrd 0 pos with Not_found -> () ) ; {prefix= !prefix; word= !wrd; suffix= !suffix} let parse word dictionary runes = let uppercase = is_uppercase word.[0] in let wordparts = consider word in try let des = parse_arpabet (String.split_on_char ' ' (Hashtbl.find dictionary wordparts.word)) "" uppercase runes in wordparts.prefix ^ des ^ wordparts.suffix with Not_found -> word let sanitize line = Str.global_replace (Str.regexp "\\.\\.\\.") " ... " line let load_dictionary extra = let prefix = Unix.getenv "OPAM_SWITCH_PREFIX" in let dictionaries = [prefix ^ "/share/desereter/cmudict.dict"] @ String.split_on_char ';' extra in let dictionary = Hashtbl.create 150000 in let load file = if String.length file > 0 then let ic = open_in file in try while true do let entry = input_line ic in if Str.string_match entry_rgx entry 0 then let word = Str.matched_group 1 entry in let pronunciation = Str.matched_group 2 entry in Hashtbl.add dictionary word pronunciation done with End_of_file -> close_in ic in List.iter load dictionaries ; dictionary let translate runes dictionary line = let words = String.split_on_char ' ' (sanitize line) in let words = List.filter (fun x -> let w = String.trim x in String.length w > 0 ) words in print_endline (List.fold_left (fun acc word -> acc ^ parse word dictionary runes ^ " ") "" words ) let () = let line = ref "" in let extra = ref "" in let runes = ref "deseret" in Arg.parse [ (* ("-i", Arg.Set_string line, "input"); *) ("-d", Arg.Set_string extra, "dictionary") ; ("-r", Arg.Set_string runes, "runes (deseret|futhorc)") ] (fun x -> line := x) (Sys.argv.(0) ^ " [-d dictionary] <-i input>") ; let dictionary = load_dictionary !extra in let runeset = match !runes with | "deseret" -> DESERET | "futhorc" -> FUTHORC | _ -> raise Not_found in if String.length !line > 0 then translate runeset dictionary !line else try while true do read_line () |> translate runeset dictionary done with End_of_file -> ()