ref: c9b7efd274807043ab926d5067362f6b7fbf352b
parent: 6f8db9392902f156125442730cf93b7de5f9aec1
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Mar 11 14:00:27 EDT 2019
dump as ASCII
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -142,7 +142,7 @@
}
-int quantize_3stage_mbest(float *x)
+int quantize_3stage_mbest(float *x, int entry[3])
{
int i, k;
int id, id2, id3;
@@ -227,9 +227,9 @@
}
}
}
- id = index3[0][0];
- id2 = index3[0][1];
- id3 = index3[0][2];
+ entry[0] = id = index3[0][0];
+ entry[1] = id2 = index3[0][1];
+ entry[2] = id3 = index3[0][2];
//printf("%f ", glob_dist[0]);
for (i=0;i<NB_BANDS_1;i++) {
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
@@ -292,7 +292,7 @@
}
-int quantize_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)
+int quantize_diff(float *x, float *left, float *right, float *codebook, int bits, int sign, int *entry)
{
int i;
int nb_entries;
@@ -309,6 +309,7 @@
for (i=0;i<4*NB_BANDS;i++) target[i] = x[i%NB_BANDS] - pred[i];
id = find_nearest_multi(codebook, nb_entries, target, NB_BANDS, NULL, sign);
+ *entry = id;
if (id >= 1<<bits) {
s = -1;
id -= (1<<bits);
@@ -391,12 +392,11 @@
int double_interp_search(const float features[4][NB_FEATURES], const float *mem) {
int i, j;
- int id0, id1;
int best_id=0;
float min_dist = 1e15;
float dist[2][3];
- id0 = interp_search(features[0], mem, features[1], dist[0]);
- id1 = interp_search(features[2], features[1], features[3], dist[1]);
+ interp_search(features[0], mem, features[1], dist[0]);
+ interp_search(features[2], features[1], features[3], dist[1]);
for (i=0;i<3;i++) {
for (j=0;j<3;j++) {
float d;
@@ -591,7 +591,7 @@
}
}
-static void process_superframe(DenoiseState *st, FILE *ffeat) {
+static void process_superframe(DenoiseState *st, FILE *ffeat, int encode) {
int i;
int sub;
int best_i;
@@ -607,6 +607,10 @@
float center_pitch;
int main_pitch;
int modulation;
+ int c0_id;
+ int vq_end[3];
+ int vq_mid;
+ int corr_id = 0;
for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
for(sub=0;sub<8;sub++) {
@@ -650,6 +654,7 @@
best_i = pitch_prev[sub][best_i];
}
frame_corr /= 8;
+ if (frame_corr < 0) frame_corr = 0;
for (sub=0;sub<8;sub++) {
//printf("%d %f\n", best[2+sub], frame_corr);
}
@@ -671,10 +676,12 @@
/* Allow a relative variation of up to 1/4 over 8 sub-frames. */
max_a = mean_pitch/32;
best_a = MIN16(max_a, MAX16(-max_a, best_a));
- frame_corr = 0.3875f + .175f*floor((frame_corr-.3f)/.175f);
+ corr_id = (int)floor((frame_corr-.3f)/.175f);
+ frame_corr = 0.3875f + .175f*corr_id;
} else {
best_a = 0;
- frame_corr = 0.0375f + .075f*floor(frame_corr/.075f);
+ corr_id = (int)floor(frame_corr/.075f);
+ frame_corr = 0.0375f + .075f*corr_id;
}
//best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);
best_b = (sy - best_a*sx)/sw;
@@ -705,10 +712,11 @@
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
//printf("%f\n", st->features[3][0]);
- st->features[3][0] = floor(.5 + st->features[3][0]*5)/5;
- quantize_3stage_mbest(&st->features[3][1]);
+ c0_id = (int)floor(.5 + st->features[3][0]*5);
+ st->features[3][0] = c0_id/5.;
+ quantize_3stage_mbest(&st->features[3][1], vq_end);
/*perform_interp_relaxation(st->features, vq_mem);*/
- quantize_diff(&st->features[1][0], vq_mem, &st->features[3][0], ceps_codebook_diff4, 11, 1);
+ quantize_diff(&st->features[1][0], vq_mem, &st->features[3][0], ceps_codebook_diff4, 11, 1, &vq_mid);
#if 0
interp_diff(&st->features[0][0], vq_mem, &st->features[1][0], ceps_codebook_diff2, 6, 0);
interp_diff(&st->features[2][0], &st->features[1][0], &st->features[3][0], ceps_codebook_diff2, 6, 0);
@@ -718,8 +726,12 @@
#endif
//printf("\n");
RNN_COPY(vq_mem, &st->features[3][0], NB_BANDS);
- for (i=0;i<4;i++) {
- fwrite(st->features[i], sizeof(float), NB_FEATURES, ffeat);
+ if (encode) {
+ fprintf(ffeat, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, voiced ? modulation : -4, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
+ } else {
+ for (i=0;i<4;i++) {
+ fwrite(st->features[i], sizeof(float), NB_FEATURES, ffeat);
+ }
}
}
@@ -811,9 +823,14 @@
DenoiseState *st;
float noise_std=0;
int training = -1;
+ int encode = 0;
st = rnnoise_create();
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
+ if (argc == 4 && strcmp(argv[1], "-encode")==0) {
+ training = 0;
+ encode = 1;
+ }
if (training == -1) {
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv[0]);
fprintf(stderr, " or %s -test <speech> <features out>\n", argv[0]);
@@ -890,7 +907,7 @@
st->pcount++;
/* Running on groups of 4 frames. */
if (st->pcount == 4) {
- process_superframe(st, ffeat);
+ process_superframe(st, ffeat, encode);
st->pcount = 0;
}
--
⑨