shithub: flite

Download patch

ref: f15235c1d015f08c9278ac5f5fa6096da716a651
parent: 25477859abb169a6041bca8e318c2b7039f5189d
author: Alan W Black <awb@cs.cmu.edu>
date: Mon Nov 6 01:21:42 EST 2017

is_english flag and formatting

--- a/lang/cmu_indic_lang/cmu_indic_lang.c
+++ b/lang/cmu_indic_lang/cmu_indic_lang.c
@@ -373,6 +373,7 @@
     return r;
 }
 
+#if 0
 static int indic_nump_old(const char *number)
 {
     /* True if all (unicode) characters are in num_table's digit table */
@@ -406,8 +407,8 @@
     return flag;
 
 }
+#endif
 
-
 static int indic_nump(const char *number)
 {
     /* Check if non-empty string */
@@ -515,6 +516,7 @@
     cst_utterance *utt;
 
     /* printf("awb_debug token_name %s name %s\n",item_name(token),name); */
+    r = NULL;
 
     if (item_feat_present(token,"phones"))
 	return cons_val(string_val(name),NULL);
@@ -562,58 +564,58 @@
     else if (indic_nump(name))
             
     {   /* Its script specific digits (commas/dots) */
-	    if (indic_nump(name) == 2)
-	    {   /* All characters are digits */ 
-           // printf("nump is 2\n");
-	        p = indic_num_normalize(name,num_table);
-	        if (val_length(p) <= 9)
-		    r = indic_number(p,num_table);
-	        else
-		    r = indic_number_indiv(p,num_table);
-	        delete_val(p);
-	    }
-	    else if (indic_nump(name) == 1)
-	    {   /* Some characters are digits */
-	        int len = 1;
-	        int i = 0;
-	        char c0;
-                char *aaa;
-                char *bbb;
-	        while(name[i] != '\0')
-	        {
-		        /* Iterate over UTF-8 string */
-		        c0 = name[i];
-		        len = ts_utf8_sequence_length(c0);
-                        /* Check if char after this is comma */
-                        if (name[i+len] == ',')
-                        {   
-                          /* Skip commas */
-                        i += len;
-                        c0 = name[i];
-                        len = ts_utf8_sequence_length(c0);
-                        i += len;
-                        continue;
-                        }
-		        /* Find where character type switches to or from digits */
-		        if(indic_text_splitable(name, i, len))
-		            break;
-		        i +=len;
-	        }
-	        aaa = cst_strdup(name);
-	        aaa[i+len] = '\0';
-	        bbb = cst_strdup(&name[i+len]);
-	        r = val_append(cmu_indic_tokentowords_one(token, aaa),
-			        cmu_indic_tokentowords_one(token, bbb));
-	        cst_free(aaa);
-	        cst_free(bbb);
-	    }
+        if (indic_nump(name) == 2)
+        {   /* All characters are digits */ 
+            // printf("nump is 2\n");
+            p = indic_num_normalize(name,num_table);
+            if (val_length(p) <= 9)
+                r = indic_number(p,num_table);
+            else
+                r = indic_number_indiv(p,num_table);
+            delete_val(p);
+        }
+        else if (indic_nump(name) == 1)
+        {   /* Some characters are digits */
+            int len = 1;
+            int i = 0;
+            char c0;
+            char *aaa;
+            char *bbb;
+            while(name[i] != '\0')
+            {
+                /* Iterate over UTF-8 string */
+                c0 = name[i];
+                len = ts_utf8_sequence_length(c0);
+                /* Check if char after this is comma */
+                if (name[i+len] == ',')
+                {   
+                    /* Skip commas */
+                    i += len;
+                    c0 = name[i];
+                    len = ts_utf8_sequence_length(c0);
+                    i += len;
+                    continue;
+                }
+                /* Find where character type switches to or from digits */
+                if(indic_text_splitable(name, i, len))
+                    break;
+                i +=len;
+            }
+            aaa = cst_strdup(name);
+            aaa[i+len] = '\0';
+            bbb = cst_strdup(&name[i+len]);
+            r = val_append(cmu_indic_tokentowords_one(token, aaa),
+                           cmu_indic_tokentowords_one(token, bbb));
+            cst_free(aaa);
+            cst_free(bbb);
+        }
     }
     else if (indic_hyphenated(name))
     {	/* For numbers seeparated by - / , */
-            char *aaa;
-	    aaa = cst_strdup(&name[1]);
-	    r = cmu_indic_tokentowords_one(token, aaa);
-	    cst_free(aaa);
+        char *aaa;
+        aaa = cst_strdup(&name[1]);
+        r = cmu_indic_tokentowords_one(token, aaa);
+        cst_free(aaa);
     }
 
     else if (cst_regex_match(cst_rx_not_indic,name))
@@ -651,6 +653,18 @@
     return FALSE;
 }
 
+DEF_STATIC_CONST_VAL_STRING(val_string_zero,"0");
+DEF_STATIC_CONST_VAL_STRING(val_string_one,"1");
+
+const cst_val *is_english(const cst_item *p)
+{
+    if (p && cst_regex_match(cst_rx_not_indic,
+                             flite_ffeature_string(p,"name")))
+        return (cst_val *)&val_string_one;
+    else
+        return (cst_val *)&val_string_zero;
+}
+
 void cmu_indic_lang_init(cst_voice *v)
 {
     /* Set indic language stuff */
@@ -690,6 +704,9 @@
 
     /* Default ffunctions (required) */
     basic_ff_register(v->ffunctions);
+
+    /* Indic specific features */
+    ff_register(v->ffunctions, "lisp_is_english", is_english);
 
     return;
 }