ref: 6b4e3c56c87faa99407f989f2c7bd98643ddf1df
parent: 66c29fb6207e87c0e5ada178328db7a7a19bfe5e
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Fri Aug 13 08:43:44 EDT 2021
WIP: single-frame inference
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -710,6 +710,133 @@
}
}
+
+void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {
+ int i;
+ int sub;
+ int best_i;
+ int best[10];
+ int pitch_prev[8][PITCH_MAX_PERIOD];
+ float frame_corr;
+ float frame_weight_sum = 1e-15;
+ for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
+ for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
+ for(sub=0;sub<8;sub++) {
+ float max_path_all = -1e15;
+ best_i = 0;
+ for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
+ float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
+ if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;
+ }
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ int j;
+ float max_prev;
+ max_prev = st->pitch_max_path_all - 6.f;
+ pitch_prev[sub][i] = st->best_i;
+ for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
+ if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
+ max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
+ pitch_prev[sub][i] = i+j;
+ }
+ }
+ st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
+ if (st->pitch_max_path[1][i] > max_path_all) {
+ max_path_all = st->pitch_max_path[1][i];
+ best_i = i;
+ }
+ }
+ /* Renormalize. */
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
+ //for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
+ //printf("\n");
+ RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
+ st->pitch_max_path_all = max_path_all;
+ st->best_i = best_i;
+ }
+ best_i = st->best_i;
+ frame_corr = 0;
+ /* Backward pass. */
+ for (sub=7;sub>=0;sub--) {
+ best[2+sub] = PITCH_MAX_PERIOD-best_i;
+ frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
+ best_i = pitch_prev[sub][best_i];
+ }
+ frame_corr /= 8;
+ for (sub=0;sub<4;sub++) {
+ st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
+ st->features[sub][NB_BANDS + 1] = frame_corr-.5;
+ //printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);
+ }
+ //printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);
+ RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
+ RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
+ //printf("\n");
+ RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
+ if (ffeat) {
+ for (i=0;i<4;i++) {
+ fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ }
+ }
+}
+
+void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
+ int i;
+ int sub;
+ int best_i;
+ int best[4];
+ int pitch_prev[2][PITCH_MAX_PERIOD];
+ float frame_corr;
+ float frame_weight_sum = 1e-15;
+ for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+sub];
+ for(sub=0;sub<2;sub++) st->frame_weight[2+sub] *= (2.f/frame_weight_sum);
+ for(sub=0;sub<2;sub++) {
+ float max_path_all = -1e15;
+ best_i = 0;
+ for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
+ float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
+ if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;
+ }
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ int j;
+ float max_prev;
+ max_prev = st->pitch_max_path_all - 6.f;
+ pitch_prev[sub][i] = st->best_i;
+ for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
+ if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
+ max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
+ pitch_prev[sub][i] = i+j;
+ }
+ }
+ st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
+ if (st->pitch_max_path[1][i] > max_path_all) {
+ max_path_all = st->pitch_max_path[1][i];
+ best_i = i;
+ }
+ }
+ /* Renormalize. */
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
+ //for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
+ //printf("\n");
+ RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
+ st->pitch_max_path_all = max_path_all;
+ st->best_i = best_i;
+ }
+ best_i = st->best_i;
+ frame_corr = 0;
+ /* Backward pass. */
+ for (sub=1;sub>=0;sub--) {
+ best[2+sub] = PITCH_MAX_PERIOD-best_i;
+ frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
+ best_i = pitch_prev[sub][best_i];
+ }
+ frame_corr /= 2;
+ st->features[st->pcount][NB_BANDS] = .01*(IMAX(66, IMIN(510, 2*best[2]))-200);
+ st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5;
+ if (ffeat) {
+ fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ }
+}
+
void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
int i;
for (i=0;i<N;i++) {
@@ -733,6 +860,14 @@
return 0;
}
+void print_vec(float *x, int len) {
+ int i;
+ for (i=0;i<len;i++) {
+ printf("%f ", x[i]);
+ }
+ printf("\n");
+}
+
LPCNET_EXPORT int lpcnet_compute_features(LPCNetEncState *st, const short *pcm, float features[4][NB_TOTAL_FEATURES]) {
int i, k;
for (k=0;k<4;k++) {
@@ -741,10 +876,13 @@
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
st->pcount = k;
compute_frame_features(st, x);
+ process_single_frame(st, NULL);
}
- process_superframe(st, NULL, NULL, 0, 0);
+ //process_superframe(st, NULL, NULL, 0, 0);
+ //process_multi_frame(st, NULL);
for (k=0;k<4;k++) {
RNN_COPY(&features[k][0], &st->features[k][0], NB_TOTAL_FEATURES);
+ //print_vec(&features[k][0], 20);
}
return 0;
}
--
⑨