shithub: opus

Download patch

ref: fc4f594e25ee608f6cf806809622b29bbf1eed2e
parent: 3fc183df5575ef4e50a570f324420635efbcd272
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Mar 1 11:34:41 EST 2019

Better quantization

--- a/dnn/ceps_vq_train.c
+++ b/dnn/ceps_vq_train.c
@@ -237,13 +237,17 @@
   }
 
   float w2=0;
+  int min_count = 1000000000;
+  int small=0;
   for (i=0;i<nb_entries;i++)
   { 
     for (j=0;j<ndim;j++)
       codebook[i*ndim+j] *= (1./count[i]);
     w2 += (count[i]/(float)nb_vectors)*(count[i]/(float)nb_vectors);
+    if (count[i] < min_count) min_count = count[i];
+    small += (count[i] < 50);
   }
-  //fprintf(stderr, "%f / %d\n", 1./w2, nb_entries);
+  fprintf(stderr, "%f / %d, min = %d, small=%d\n", 1./w2, nb_entries, min_count, small);
 }
 
 void update_multi(float *data, int nb_vectors, float *codebook, int nb_entries, int ndim, int sign)
@@ -276,13 +280,17 @@
   }
 
   float w2=0;
+  int min_count = 1000000000;
+  int small=0;
   for (i=0;i<nb_entries;i++)
   {
     for (j=0;j<ndim;j++)
       codebook[i*ndim+j] *= (1./count[i]);
     w2 += (count[i]/(float)nb_vectors)*(count[i]/(float)nb_vectors);
+    if (count[i] < min_count) min_count = count[i];
+    small += (count[i] < 50);
   }
-  //fprintf(stderr, "%f / %d\n", 1./w2, nb_entries);
+  fprintf(stderr, "%f / %d, min = %d, small=%d\n", 1./w2, nb_entries, min_count, small);
 }
 
 
@@ -357,6 +365,7 @@
 void vq_train_multi(float *data, int nb_vectors, float *codebook, int nb_entries, int ndim, int sign)
 {
   int i, j, e;
+#if 1
   for (e=0;e<MULTI;e++) {
     for (j=0;j<ndim;j++)
       codebook[e*ndim+j] = 0;
@@ -369,6 +378,9 @@
       codebook[e*ndim+j] += delta;
     }
   }
+#else
+  for (i=0;i<MULTI*ndim;i++) codebook[i] = .01*(rand()/(float)RAND_MAX-.5);
+#endif
   e = MULTI;
   for (j=0;j<10;j++)
     update_multi(data, nb_vectors, codebook, e, ndim, sign);
@@ -420,7 +432,7 @@
   int i,j;
   int nb_vectors, nb_entries, nb_entries1, nb_entries2a, nb_entries2b, ndim, ndim0, total_dim;
   float *data, *pred, *multi_data, *multi_data2, *qdata;
-  float *codebook, *codebook2, *codebook_diff2, *codebook_diff4;
+  float *codebook, *codebook2, *codebook3, *codebook_diff2, *codebook_diff4;
   float *delta;
   double err;
   FILE *fout;
@@ -430,9 +442,9 @@
   total_dim = atoi(argv[2]);
   nb_vectors = atoi(argv[3]);
   nb_entries = 1<<atoi(argv[4]);
-  nb_entries1 = 256;
+  nb_entries1 = 1024;
   nb_entries2a = 2048;
-  nb_entries2b = 256;
+  nb_entries2b = 64;
   
   data = malloc((nb_vectors*ndim+total_dim)*sizeof(*data));
   qdata = malloc((nb_vectors*ndim+total_dim)*sizeof(*qdata));
@@ -441,6 +453,7 @@
   multi_data2 = malloc(MULTI*nb_vectors*ndim*sizeof(*multi_data));
   codebook = malloc(nb_entries*ndim0*sizeof(*codebook));
   codebook2 = malloc(nb_entries1*ndim0*sizeof(*codebook2));
+  codebook3 = malloc(nb_entries1*ndim0*sizeof(*codebook3));
   codebook_diff4 = malloc(nb_entries2a*ndim*sizeof(*codebook_diff4));
   codebook_diff2 = malloc(nb_entries2b*ndim*sizeof(*codebook_diff2));
   
@@ -472,7 +485,7 @@
   for (i=0;i<nb_vectors;i++)
   {
     int nearest = find_nearest(codebook, nb_entries, &pred[i*ndim0], ndim0, NULL);
-    qdata[i*ndim+j] = data[i*ndim+j];
+    qdata[i*ndim] = data[i*ndim];
     for (j=0;j<ndim0;j++)
     {
       qdata[i*ndim+j+1] = codebook[nearest*ndim0+j];
@@ -494,12 +507,28 @@
     {
       qdata[i*ndim+j+1] += codebook2[n1*ndim0+j];
       //delta[i*ndim0+j] = delta[i*ndim0+j] - codebook2[n1*ndim0+j];
-      delta[i*ndim0+j] = qdata[i*ndim+j+1] - data[i*ndim+j+1];
+      delta[i*ndim0+j] = data[i*ndim+j+1] - qdata[i*ndim+j+1];
       err += delta[i*ndim0+j]*delta[i*ndim0+j];
     }
   }
   fprintf(stderr, "Cepstrum RMS error after stage 2: %f)\n", sqrt(err/nb_vectors/ndim));
 
+  vq_train(delta, nb_vectors, codebook3, nb_entries1, ndim0);
+  err=0;
+  for (i=0;i<nb_vectors;i++)
+  {
+    int n1;
+    n1 = find_nearest(codebook3, nb_entries1, &delta[i*ndim0], ndim0, NULL);
+    for (j=0;j<ndim0;j++)
+    {
+      qdata[i*ndim+j+1] += codebook3[n1*ndim0+j];
+      //delta[i*ndim0+j] = delta[i*ndim0+j] - codebook2[n1*ndim0+j];
+      delta[i*ndim0+j] = data[i*ndim+j+1] - qdata[i*ndim+j+1];
+      err += delta[i*ndim0+j]*delta[i*ndim0+j];
+    }
+  }
+  fprintf(stderr, "Cepstrum RMS error after stage 3: %f)\n", sqrt(err/nb_vectors/ndim));
+
   for (i=0;i<nb_vectors-4;i++)
   {
     for (j=0;j<ndim;j++)
@@ -510,6 +539,8 @@
       multi_data[(MULTI*i+2)*ndim+j] = data[(i+1)*ndim+j] - qdata[i*ndim+j];
     for (j=0;j<ndim;j++)
       multi_data[(MULTI*i+3)*ndim+j] = data[(i+1)*ndim+j] - qdata[(i+2)*ndim+j];
+    //for (j=0;j<4*ndim;j++) printf("%f ", multi_data[MULTI*i*ndim + j]);
+    //printf("\n");
   }
 
   for (i=0;i<nb_vectors-4;i++)
@@ -547,6 +578,15 @@
   {
     for (j=0;j<ndim0;j++)
       fprintf(fout, "%f, ", codebook2[i*ndim0+j]);
+    fprintf(fout, "\n");
+  }
+  fprintf(fout, "};\n\n");
+
+  fprintf(fout, "float ceps_codebook3[%d*%d] = {\n",nb_entries1, ndim0);
+  for (i=0;i<nb_entries1;i++)
+  {
+    for (j=0;j<ndim0;j++)
+      fprintf(fout, "%f, ", codebook3[i*ndim0+j]);
     fprintf(fout, "\n");
   }
   fprintf(fout, "};\n\n");
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -84,7 +84,7 @@
 int quantize_2stage(float *x)
 {
     int i;
-    int id, id2;
+    int id, id2, id3;
     float ref[NB_BANDS_1];
     RNN_COPY(ref, x, NB_BANDS_1);
     id = vq_quantize(ceps_codebook1, 1024, x, NB_BANDS_1, NULL);
@@ -91,12 +91,13 @@
     for (i=0;i<NB_BANDS_1;i++) {
         x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
     }
-    id2 = vq_quantize(ceps_codebook2, 256, x, NB_BANDS_1, NULL);
+    id2 = vq_quantize(ceps_codebook2, 1024, x, NB_BANDS_1, NULL);
     for (i=0;i<NB_BANDS_1;i++) {
-        x[i] = ceps_codebook2[id2*NB_BANDS_1 + i];
+        x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
     }
+    id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);
     for (i=0;i<NB_BANDS_1;i++) {
-        x[i] += ceps_codebook1[id*NB_BANDS_1 + i];
+        x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
     }
     if (1) {
         float err = 0;
@@ -184,7 +185,42 @@
     return id;
 }
 
+void interp_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)
+{
+    int i, k;
+    float min_dist = 1e15;
+    int best_pred = 0;
+    float ref[NB_BANDS];
+    float pred[4*NB_BANDS];
+    (void)sign;
+    (void)codebook;
+    (void)bits;
+    RNN_COPY(ref, x, NB_BANDS);
+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);
+    for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
+    for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
 
+    for (k=1;k<4;k++) {
+      float dist = 0;
+      for (i=0;i<NB_BANDS;i++) dist += (x[i] - pred[k*NB_BANDS+i])*(x[i] - pred[k*NB_BANDS+i]);
+      if (dist < min_dist) {
+        min_dist = dist;
+        best_pred = k;
+      }
+    }
+    for (i=0;i<NB_BANDS;i++) {
+      x[i] = pred[best_pred*NB_BANDS + i];
+    }
+    if (1) {
+        float err = 0;
+        for (i=0;i<NB_BANDS;i++) {
+            err += (x[i]-ref[i])*(x[i]-ref[i]);
+        }
+        printf("%f\n", sqrt(err/NB_BANDS));
+    }
+}
+
+
 typedef struct {
   float analysis_mem[OVERLAP_SIZE];
   float cepstral_mem[CEPS_MEM][NB_BANDS];
@@ -380,7 +416,7 @@
     sxy += w*sub*best[sub];
     sy += w*best[sub];
   }
-  voiced = frame_corr > .3;
+  voiced = frame_corr >= .3;
   /* Linear regression to figure out the pitch contour. */
   best_a = (sw*sxy - sx*sy)/(sw*sxx - sx*sx);
   if (voiced) {
@@ -389,8 +425,10 @@
     /* Allow a relative variation of up to 1/4 over 8 sub-frames. */
     max_a = mean_pitch/32;
     best_a = MIN16(max_a, MAX16(-max_a, best_a));
+    frame_corr = 0.3875f + .175f*floor((frame_corr-.3f)/.175f);
   } else {
     best_a = 0;
+    frame_corr = 0.0375f + .075f*floor(frame_corr/.075f);
   }
   //best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);
   best_b = (sy - best_a*sx)/sw;
@@ -425,8 +463,8 @@
   quantize_2stage(&st->features[3][1]);
   quantize_diff(&st->features[1][0], vq_mem, &st->features[3][0], ceps_codebook_diff4, 11, 1);
   //quantize_2stage(&st->features[1][1]);
-  quantize_diff(&st->features[0][0], vq_mem, &st->features[1][0], ceps_codebook_diff2, 8, 0);
-  quantize_diff(&st->features[2][0], &st->features[1][0], &st->features[3][0], ceps_codebook_diff2, 8, 0);
+  interp_diff(&st->features[0][0], vq_mem, &st->features[1][0], ceps_codebook_diff2, 6, 0);
+  interp_diff(&st->features[2][0], &st->features[1][0], &st->features[3][0], ceps_codebook_diff2, 6, 0);
   RNN_COPY(vq_mem, &st->features[3][0], NB_BANDS);
   for (i=0;i<4;i++) {
     fwrite(st->features[i], sizeof(float), NB_FEATURES, ffeat);
--