shithub: opus

Download patch

ref: bd23d9115bb1fbb7e5b9a44c94586248d6a7a59a
parent: 6a184fc764e98eec8abef5e63790414e594ef94e
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Thu Aug 3 21:10:49 EDT 2023

Continuation now working

Added required filtering/delay/memory to get continuation

--- a/dnn/fwgan.c
+++ b/dnn/fwgan.c
@@ -36,14 +36,6 @@
 #include "pitch.h"
 #include "nnet.h"
 
-#define NB_SUBFRAMES 4
-#define SUBFRAME_SIZE 40
-#define FWGAN_FRAME_SIZE (NB_SUBFRAMES*SUBFRAME_SIZE)
-#define CONT_PCM_INPUTS 320
-#define MAX_CONT_SIZE CONT_NET_0_OUT_SIZE
-#define FWGAN_GAMMA 0.92f
-#define FWGAN_DEEMPHASIS 0.85f
-
 #define FEAT_IN_SIZE (BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4 + FWGAN_FRAME_SIZE/2)
 
 #define FWGAN_FEATURES (NB_FEATURES-1)
@@ -98,19 +90,37 @@
   compute_generic_dense(&model->bfcc_with_corr_upsampler_fc, cond, features, ACTIVATION_TANH);
 }
 
+static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features);
 void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
 {
   int i;
   float norm2, norm_1;
+  float wpcm0[CONT_PCM_INPUTS];
   float cont_inputs[CONT_PCM_INPUTS+1];
   float tmp1[MAX_CONT_SIZE];
   float tmp2[MAX_CONT_SIZE];
+  float lpc[LPC_ORDER];
+  float new_pcm[FWGAN_FRAME_SIZE];
   FWGAN *model;
   st->embed_phase[0] = 1;
   model = &st->model;
-  norm2 = celt_inner_prod(pcm0, pcm0, CONT_PCM_INPUTS, st->arch);
+  compute_wlpc(lpc, features0);
+  st->deemph_mem = pcm0[CONT_PCM_INPUTS-1];
+
+  for (i=LPC_ORDER;i<CONT_PCM_INPUTS;i++) {
+    int j;
+    wpcm0[i] = pcm0[i];
+    for (j=0;j<LPC_ORDER;j++) wpcm0[i] += lpc[j]*pcm0[i-j-1];
+  }
+  /* FIXME: Make this less stupid. */
+  for (i=0;i<LPC_ORDER;i++) wpcm0[i] = wpcm0[LPC_ORDER];
+
+  st->preemph_mem = wpcm0[CONT_PCM_INPUTS-1];
+  for (i=0;i<LPC_ORDER;i++) st->syn_mem[i] = pcm0[CONT_PCM_INPUTS-1-i] - FWGAN_DEEMPHASIS*pcm0[CONT_PCM_INPUTS-2-i];
+
+  norm2 = celt_inner_prod(wpcm0, wpcm0, CONT_PCM_INPUTS, st->arch);
   norm_1 = 1.f/sqrt(1e-8f + norm2);
-  for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*pcm0[i];
+  for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*wpcm0[i];
   cont_inputs[0] = log(sqrt(norm2) + 1e-7f);
 
   compute_generic_dense(&model->cont_net_0, tmp1, cont_inputs, ACTIVATION_TANH);
@@ -140,6 +150,8 @@
   compute_generic_dense(&model->fwc7_cont_fc_0, st->fwc7_state, st->cont, ACTIVATION_TANH);
 
   st->cont_initialized = 1;
+  fwgan_synthesize_impl(st, new_pcm, lpc, features0);
+  OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
 }
 
 static void apply_gain(float *pcm, float c0, float *last_gain) {
@@ -198,7 +210,6 @@
 
   if (st->cont_initialized == 1) {
     OPUS_CLEAR(pcm, SUBFRAME_SIZE);
-    /* FIXME: Do we need to handle initial features? How? */
     st->cont_initialized = 2;
     apply_gain(pcm, c0, &st->last_gain);
     OPUS_COPY(st->last_lpc, lpc, LPC_ORDER);
@@ -234,7 +245,6 @@
   fwgan_preemphasis(pcm, &st->preemph_mem);
   fwgan_lpc_syn(pcm, st->syn_mem, lpc, st->last_lpc);
   fwgan_deemphasis(pcm, &st->deemph_mem);
-
 }
 
 
@@ -272,6 +282,10 @@
 void fwgan_synthesize(FWGANState *st, float *pcm, const float *features)
 {
   float lpc[LPC_ORDER];
+  float new_pcm[FWGAN_FRAME_SIZE];
   compute_wlpc(lpc, features);
-  fwgan_synthesize_impl(st, pcm, lpc, features);
+  fwgan_synthesize_impl(st, new_pcm, lpc, features);
+  OPUS_COPY(pcm, st->pcm_buf, FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+  OPUS_COPY(&pcm[FWGAN_FRAME_SIZE-SUBFRAME_SIZE], new_pcm, SUBFRAME_SIZE);
+  OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
 }
--- a/dnn/fwgan.h
+++ b/dnn/fwgan.h
@@ -31,6 +31,13 @@
 #include "fwgan_data.h"
 
 #define FWGAN_CONT_SAMPLES 320
+#define NB_SUBFRAMES 4
+#define SUBFRAME_SIZE 40
+#define FWGAN_FRAME_SIZE (NB_SUBFRAMES*SUBFRAME_SIZE)
+#define CONT_PCM_INPUTS 320
+#define MAX_CONT_SIZE CONT_NET_0_OUT_SIZE
+#define FWGAN_GAMMA 0.92f
+#define FWGAN_DEEMPHASIS 0.85f
 
 /* FIXME: Derive those from the model rather than hardcoding. */
 #define FWC1_STATE_SIZE 512
@@ -51,6 +58,7 @@
   float syn_mem[LPC_ORDER];
   float preemph_mem;
   float deemph_mem;
+  float pcm_buf[FWGAN_FRAME_SIZE];
   float cont[CONT_NET_10_OUT_SIZE];
   float cont_conv1_mem[FEAT_IN_CONV1_CONV_STATE_SIZE];
   float rnn_state[RNN_GRU_STATE_SIZE];
--- a/dnn/lpcnet_demo.c
+++ b/dnn/lpcnet_demo.c
@@ -189,19 +189,20 @@
         lpcnet_destroy(net);
     } else if (mode == MODE_FWGAN_SYNTHESIS) {
         FWGANState fwgan;
+        size_t ret;
+        float in_features[NB_TOTAL_FEATURES];
         float zeros[320] = {0};
         fwgan_init(&fwgan);
-        fwgan_cont(&fwgan, zeros, NULL);
 #ifdef USE_WEIGHTS_FILE
         fwgan_load_model(fwgan, data, len);
 #endif
+        ret = fread(in_features, sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);
+        fwgan_cont(&fwgan, zeros, in_features);
         while (1) {
             int i;
-            float in_features[NB_TOTAL_FEATURES];
             float features[NB_FEATURES];
             float fpcm[LPCNET_FRAME_SIZE];
             opus_int16 pcm[LPCNET_FRAME_SIZE];
-            size_t ret;
             ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
             if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
             OPUS_COPY(features, in_features, NB_FEATURES);
--