ref: 489868c4e3e09955b41506806168e1283400f06c
dir: /desereter.ml/
let entry_rgx = Str.regexp "\\([.a-z'-]+\\)(?[0-9]?)? \\(.*\\)"
type wordset = {prefix: string; word: string; suffix: string}
type runesets = DESERET | FUTHORC
let is_uppercase = function
| 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L'
|'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
|'Y' | 'Z' ->
true
| _ -> false
let get_vowel vowel vowels =
if String.length vowel != 3 then ("", "")
else
let trimmed = String.sub vowel 0 2 in
try Hashtbl.find vowels trimmed with Not_found -> ("", "")
let get_char c runes =
let vowels, consonants =
match runes with
| DESERET -> (Deseret.vowels, Deseret.consonants)
| FUTHORC -> (Futhorc.vowels, Futhorc.consonants) in
try Hashtbl.find consonants c with Not_found -> get_vowel c vowels
let rec parse_arpabet line des uppercase runes =
match line with
| hd :: tl ->
let u, l = get_char hd runes in
(if uppercase then u else l) ^ parse_arpabet tl des false runes
| [] -> des
let unquoted word = String.sub word 1 (String.length word - 2)
let consider word =
let wrd = ref (String.lowercase_ascii word) in
let prefix = ref "" in
let suffix = ref "" in
( try
let pos = Str.search_forward (Str.regexp "[({\"]") !wrd 0 + 1 in
wrd := String.sub word pos (String.length !wrd - pos) ;
prefix := String.sub word 0 pos
with Not_found -> () ) ;
( try
let pos =
Str.search_backward
(Str.regexp "[})\"\\.,!;:]")
!wrd (String.length !wrd) in
suffix := String.sub !wrd pos (String.length !wrd - pos) ;
wrd := String.sub !wrd 0 pos
with Not_found -> () ) ;
{prefix= !prefix; word= !wrd; suffix= !suffix}
let parse word dictionary runes =
let uppercase = is_uppercase word.[0] in
let wordparts = consider word in
try
let des =
parse_arpabet
(String.split_on_char ' ' (Hashtbl.find dictionary wordparts.word))
"" uppercase runes in
wordparts.prefix ^ des ^ wordparts.suffix
with Not_found -> word
let sanitize line = Str.global_replace (Str.regexp "\\.\\.\\.") " ... " line
let load_dictionary extra =
let prefix = Unix.getenv "OPAM_SWITCH_PREFIX" in
let dictionaries =
[prefix ^ "/share/desereter/cmudict.dict"] @ String.split_on_char ';' extra
in
let dictionary = Hashtbl.create 150000 in
let load file =
if String.length file > 0 then
let ic = open_in file in
try
while true do
let entry = input_line ic in
if Str.string_match entry_rgx entry 0 then
let word = Str.matched_group 1 entry in
let pronunciation = Str.matched_group 2 entry in
Hashtbl.add dictionary word pronunciation
done
with End_of_file -> close_in ic in
List.iter load dictionaries ;
dictionary
let translate runes dictionary line =
let words = String.split_on_char ' ' (sanitize line) in
let words =
List.filter
(fun x ->
let w = String.trim x in
String.length w > 0 )
words in
print_endline
(List.fold_left
(fun acc word -> acc ^ parse word dictionary runes ^ " ")
"" words )
let () =
let line = ref "" in
let extra = ref "" in
let runes = ref "deseret" in
Arg.parse
[ (* ("-i", Arg.Set_string line, "input"); *)
("-d", Arg.Set_string extra, "dictionary")
; ("-r", Arg.Set_string runes, "runes (deseret|futhorc)") ]
(fun x -> line := x)
(Sys.argv.(0) ^ " [-d dictionary] <-i input>") ;
let dictionary = load_dictionary !extra in
let runeset =
match !runes with
| "deseret" -> DESERET
| "futhorc" -> FUTHORC
| _ -> raise Not_found in
if String.length !line > 0 then translate runeset dictionary !line
else
try
while true do
read_line () |> translate runeset dictionary
done
with End_of_file -> ()