ref: fc4f594e25ee608f6cf806809622b29bbf1eed2e
parent: 3fc183df5575ef4e50a570f324420635efbcd272
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Mar 1 11:34:41 EST 2019
Better quantization
--- a/dnn/ceps_vq_train.c
+++ b/dnn/ceps_vq_train.c
@@ -237,13 +237,17 @@
}
float w2=0;
+ int min_count = 1000000000;
+ int small=0;
for (i=0;i<nb_entries;i++)
{
for (j=0;j<ndim;j++)
codebook[i*ndim+j] *= (1./count[i]);
w2 += (count[i]/(float)nb_vectors)*(count[i]/(float)nb_vectors);
+ if (count[i] < min_count) min_count = count[i];
+ small += (count[i] < 50);
}
- //fprintf(stderr, "%f / %d\n", 1./w2, nb_entries);
+ fprintf(stderr, "%f / %d, min = %d, small=%d\n", 1./w2, nb_entries, min_count, small);
}
void update_multi(float *data, int nb_vectors, float *codebook, int nb_entries, int ndim, int sign)
@@ -276,13 +280,17 @@
}
float w2=0;
+ int min_count = 1000000000;
+ int small=0;
for (i=0;i<nb_entries;i++)
{
for (j=0;j<ndim;j++)
codebook[i*ndim+j] *= (1./count[i]);
w2 += (count[i]/(float)nb_vectors)*(count[i]/(float)nb_vectors);
+ if (count[i] < min_count) min_count = count[i];
+ small += (count[i] < 50);
}
- //fprintf(stderr, "%f / %d\n", 1./w2, nb_entries);
+ fprintf(stderr, "%f / %d, min = %d, small=%d\n", 1./w2, nb_entries, min_count, small);
}
@@ -357,6 +365,7 @@
void vq_train_multi(float *data, int nb_vectors, float *codebook, int nb_entries, int ndim, int sign)
{
int i, j, e;
+#if 1
for (e=0;e<MULTI;e++) {
for (j=0;j<ndim;j++)
codebook[e*ndim+j] = 0;
@@ -369,6 +378,9 @@
codebook[e*ndim+j] += delta;
}
}
+#else
+ for (i=0;i<MULTI*ndim;i++) codebook[i] = .01*(rand()/(float)RAND_MAX-.5);
+#endif
e = MULTI;
for (j=0;j<10;j++)
update_multi(data, nb_vectors, codebook, e, ndim, sign);
@@ -420,7 +432,7 @@
int i,j;
int nb_vectors, nb_entries, nb_entries1, nb_entries2a, nb_entries2b, ndim, ndim0, total_dim;
float *data, *pred, *multi_data, *multi_data2, *qdata;
- float *codebook, *codebook2, *codebook_diff2, *codebook_diff4;
+ float *codebook, *codebook2, *codebook3, *codebook_diff2, *codebook_diff4;
float *delta;
double err;
FILE *fout;
@@ -430,9 +442,9 @@
total_dim = atoi(argv[2]);
nb_vectors = atoi(argv[3]);
nb_entries = 1<<atoi(argv[4]);
- nb_entries1 = 256;
+ nb_entries1 = 1024;
nb_entries2a = 2048;
- nb_entries2b = 256;
+ nb_entries2b = 64;
data = malloc((nb_vectors*ndim+total_dim)*sizeof(*data));
qdata = malloc((nb_vectors*ndim+total_dim)*sizeof(*qdata));
@@ -441,6 +453,7 @@
multi_data2 = malloc(MULTI*nb_vectors*ndim*sizeof(*multi_data));
codebook = malloc(nb_entries*ndim0*sizeof(*codebook));
codebook2 = malloc(nb_entries1*ndim0*sizeof(*codebook2));
+ codebook3 = malloc(nb_entries1*ndim0*sizeof(*codebook3));
codebook_diff4 = malloc(nb_entries2a*ndim*sizeof(*codebook_diff4));
codebook_diff2 = malloc(nb_entries2b*ndim*sizeof(*codebook_diff2));
@@ -472,7 +485,7 @@
for (i=0;i<nb_vectors;i++)
{
int nearest = find_nearest(codebook, nb_entries, &pred[i*ndim0], ndim0, NULL);
- qdata[i*ndim+j] = data[i*ndim+j];
+ qdata[i*ndim] = data[i*ndim];
for (j=0;j<ndim0;j++)
{
qdata[i*ndim+j+1] = codebook[nearest*ndim0+j];
@@ -494,12 +507,28 @@
{
qdata[i*ndim+j+1] += codebook2[n1*ndim0+j];
//delta[i*ndim0+j] = delta[i*ndim0+j] - codebook2[n1*ndim0+j];
- delta[i*ndim0+j] = qdata[i*ndim+j+1] - data[i*ndim+j+1];
+ delta[i*ndim0+j] = data[i*ndim+j+1] - qdata[i*ndim+j+1];
err += delta[i*ndim0+j]*delta[i*ndim0+j];
}
}
fprintf(stderr, "Cepstrum RMS error after stage 2: %f)\n", sqrt(err/nb_vectors/ndim));
+ vq_train(delta, nb_vectors, codebook3, nb_entries1, ndim0);
+ err=0;
+ for (i=0;i<nb_vectors;i++)
+ {
+ int n1;
+ n1 = find_nearest(codebook3, nb_entries1, &delta[i*ndim0], ndim0, NULL);
+ for (j=0;j<ndim0;j++)
+ {
+ qdata[i*ndim+j+1] += codebook3[n1*ndim0+j];
+ //delta[i*ndim0+j] = delta[i*ndim0+j] - codebook2[n1*ndim0+j];
+ delta[i*ndim0+j] = data[i*ndim+j+1] - qdata[i*ndim+j+1];
+ err += delta[i*ndim0+j]*delta[i*ndim0+j];
+ }
+ }
+ fprintf(stderr, "Cepstrum RMS error after stage 3: %f)\n", sqrt(err/nb_vectors/ndim));
+
for (i=0;i<nb_vectors-4;i++)
{
for (j=0;j<ndim;j++)
@@ -510,6 +539,8 @@
multi_data[(MULTI*i+2)*ndim+j] = data[(i+1)*ndim+j] - qdata[i*ndim+j];
for (j=0;j<ndim;j++)
multi_data[(MULTI*i+3)*ndim+j] = data[(i+1)*ndim+j] - qdata[(i+2)*ndim+j];
+ //for (j=0;j<4*ndim;j++) printf("%f ", multi_data[MULTI*i*ndim + j]);
+ //printf("\n");
}
for (i=0;i<nb_vectors-4;i++)
@@ -547,6 +578,15 @@
{
for (j=0;j<ndim0;j++)
fprintf(fout, "%f, ", codebook2[i*ndim0+j]);
+ fprintf(fout, "\n");
+ }
+ fprintf(fout, "};\n\n");
+
+ fprintf(fout, "float ceps_codebook3[%d*%d] = {\n",nb_entries1, ndim0);
+ for (i=0;i<nb_entries1;i++)
+ {
+ for (j=0;j<ndim0;j++)
+ fprintf(fout, "%f, ", codebook3[i*ndim0+j]);
fprintf(fout, "\n");
}
fprintf(fout, "};\n\n");
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -84,7 +84,7 @@
int quantize_2stage(float *x)
{
int i;
- int id, id2;
+ int id, id2, id3;
float ref[NB_BANDS_1];
RNN_COPY(ref, x, NB_BANDS_1);
id = vq_quantize(ceps_codebook1, 1024, x, NB_BANDS_1, NULL);
@@ -91,12 +91,13 @@
for (i=0;i<NB_BANDS_1;i++) {
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
}
- id2 = vq_quantize(ceps_codebook2, 256, x, NB_BANDS_1, NULL);
+ id2 = vq_quantize(ceps_codebook2, 1024, x, NB_BANDS_1, NULL);
for (i=0;i<NB_BANDS_1;i++) {
- x[i] = ceps_codebook2[id2*NB_BANDS_1 + i];
+ x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
}
+ id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);
for (i=0;i<NB_BANDS_1;i++) {
- x[i] += ceps_codebook1[id*NB_BANDS_1 + i];
+ x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
}
if (1) {
float err = 0;
@@ -184,7 +185,42 @@
return id;
}
+void interp_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)
+{
+ int i, k;
+ float min_dist = 1e15;
+ int best_pred = 0;
+ float ref[NB_BANDS];
+ float pred[4*NB_BANDS];
+ (void)sign;
+ (void)codebook;
+ (void)bits;
+ RNN_COPY(ref, x, NB_BANDS);
+ for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);
+ for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
+ for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
+ for (k=1;k<4;k++) {
+ float dist = 0;
+ for (i=0;i<NB_BANDS;i++) dist += (x[i] - pred[k*NB_BANDS+i])*(x[i] - pred[k*NB_BANDS+i]);
+ if (dist < min_dist) {
+ min_dist = dist;
+ best_pred = k;
+ }
+ }
+ for (i=0;i<NB_BANDS;i++) {
+ x[i] = pred[best_pred*NB_BANDS + i];
+ }
+ if (1) {
+ float err = 0;
+ for (i=0;i<NB_BANDS;i++) {
+ err += (x[i]-ref[i])*(x[i]-ref[i]);
+ }
+ printf("%f\n", sqrt(err/NB_BANDS));
+ }
+}
+
+
typedef struct {
float analysis_mem[OVERLAP_SIZE];
float cepstral_mem[CEPS_MEM][NB_BANDS];
@@ -380,7 +416,7 @@
sxy += w*sub*best[sub];
sy += w*best[sub];
}
- voiced = frame_corr > .3;
+ voiced = frame_corr >= .3;
/* Linear regression to figure out the pitch contour. */
best_a = (sw*sxy - sx*sy)/(sw*sxx - sx*sx);
if (voiced) {
@@ -389,8 +425,10 @@
/* Allow a relative variation of up to 1/4 over 8 sub-frames. */
max_a = mean_pitch/32;
best_a = MIN16(max_a, MAX16(-max_a, best_a));
+ frame_corr = 0.3875f + .175f*floor((frame_corr-.3f)/.175f);
} else {
best_a = 0;
+ frame_corr = 0.0375f + .075f*floor(frame_corr/.075f);
}
//best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);
best_b = (sy - best_a*sx)/sw;
@@ -425,8 +463,8 @@
quantize_2stage(&st->features[3][1]);
quantize_diff(&st->features[1][0], vq_mem, &st->features[3][0], ceps_codebook_diff4, 11, 1);
//quantize_2stage(&st->features[1][1]);
- quantize_diff(&st->features[0][0], vq_mem, &st->features[1][0], ceps_codebook_diff2, 8, 0);
- quantize_diff(&st->features[2][0], &st->features[1][0], &st->features[3][0], ceps_codebook_diff2, 8, 0);
+ interp_diff(&st->features[0][0], vq_mem, &st->features[1][0], ceps_codebook_diff2, 6, 0);
+ interp_diff(&st->features[2][0], &st->features[1][0], &st->features[3][0], ceps_codebook_diff2, 6, 0);
RNN_COPY(vq_mem, &st->features[3][0], NB_BANDS);
for (i=0;i<4;i++) {
fwrite(st->features[i], sizeof(float), NB_FEATURES, ffeat);
--
⑨