shithub: opus

--- a/dnn/lpcnet_plc.c

+++ b/dnn/lpcnet_plc.c

@@ -67,7 +67,7 @@

   _lpcnet_compute_dense(&plc_dense1, dense_out, in);

   compute_gruB(&plc_gru1, zeros, net->plc_gru1_state, dense_out);

   compute_gruB(&plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);

-  if (out != NULL) _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);

+  _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);

 LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {

@@ -74,6 +74,9 @@

   int i;

   float x[FRAME_SIZE];

   short output[FRAME_SIZE];

+#if PLC_DNN_PRED

+  float plc_features[NB_FEATURES+1];

+#endif

   st->enc.pcount = 0;

   if (st->skip_analysis) {

     /*fprintf(stderr, "skip update\n");*/

@@ -100,6 +103,15 @@

   preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);

   compute_frame_features(&st->enc, x);

   process_single_frame(&st->enc, NULL);

+#if PLC_DNN_PRED

+  if (st->skip_analysis <= 1) {

+    RNN_COPY(plc_features, st->enc.features[0], NB_FEATURES);

+    plc_features[NB_FEATURES] = 1;

+    compute_plc_pred(&st->plc_net, st->features, plc_features);

+  }

+#else

+  RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);

+#endif

   if (st->skip_analysis) {

     float lpc[LPC_ORDER];

     float gru_a_condition[3*GRU_A_STATE_SIZE];

@@ -108,17 +120,9 @@

     run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);

     st->skip_analysis--;

   } else {

-#if PLC_DNN_PRED

-    float plc_features[NB_FEATURES+1];

-#endif

     for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];

     RNN_COPY(output, &st->pcm[0], FRAME_SIZE);

     lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);

-#if PLC_DNN_PRED

-    RNN_COPY(plc_features, st->enc.features[0], NB_FEATURES);

-    plc_features[NB_FEATURES] = 1;

-    compute_plc_pred(&st->plc_net, NULL, plc_features);

-#endif

 #if PLC_READ_FEATURES

     for (i=0;i<NB_FEATURES;i++) scanf("%f", &st->features[i]);

 #endif

@@ -128,7 +132,6 @@

 #endif

     RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);

-  RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);

   st->loss_count = 0;

   return 0;

@@ -149,7 +152,7 @@

     update_count = IMIN(st->pcm_fill, FRAME_SIZE);

     RNN_COPY(output, &st->pcm[0], update_count);

 #if PLC_DNN_PRED

-    compute_plc_pred(&st->plc_net, st->features, zeros);

+    if (st->pcm_fill > FRAME_SIZE) compute_plc_pred(&st->plc_net, st->features, zeros);

 #endif

 #if PLC_READ_FEATURES

     for (i=0;i<NB_FEATURES;i++) scanf("%f", &st->features[i]);

@@ -165,10 +168,10 @@

   lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);

 #if PLC_DNN_PRED

-    compute_plc_pred(&st->plc_net, st->features, zeros);

-    if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));

-    else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);

-    if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));

+  compute_plc_pred(&st->plc_net, st->features, zeros);

+  if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));

+  else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);

+  if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));

 #endif

 #if PLC_READ_FEATURES

   for (i=0;i<NB_FEATURES;i++) scanf("%f", &st->features[i]);

--- a/dnn/training_tf2/train_plc.py

+++ b/dnn/training_tf2/train_plc.py

@@ -99,8 +99,9 @@

 def plc_loss(alpha=1.0, bias=0.):

     def loss(y_true,y_pred):

-        mask = y_true[:,:,-1:]

-        y_true = y_true[:,:,:-1]

+        mask = .2 + .8*y_true[:,1:,-1:]

+        y_true = y_true[:,1:,:-1]

+        y_pred = y_pred[:,:-1,:]

         e = (y_pred - y_true)*mask

         e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')

         l1_loss = K.mean(K.abs(e)) + bias*K.mean(K.maximum(e[:,:,:1], 0.)) + alpha*K.mean(K.abs(e_bands) + bias*K.maximum(e_bands, 0.))

@@ -109,8 +110,9 @@

 def plc_l1_loss():

     def L1_loss(y_true,y_pred):

-        mask = y_true[:,:,-1:]

-        y_true = y_true[:,:,:-1]

+        mask = y_true[:,1:,-1:]

+        y_true = y_true[:,1:,:-1]

+        y_pred = y_pred[:,:-1,:]

         e = (y_pred - y_true)*mask

         l1_loss = K.mean(K.abs(e))

         return l1_loss

@@ -118,8 +120,9 @@

 def plc_band_loss():

     def L1_band_loss(y_true,y_pred):

-        mask = y_true[:,:,-1:]

-        y_true = y_true[:,:,:-1]

+        mask = y_true[:,1:,-1:]

+        y_true = y_true[:,1:,:-1]

+        y_pred = y_pred[:,:-1,:]

         e = (y_pred - y_true)*mask

         e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')

         l1_loss = K.mean(K.abs(e_bands))

--

⑨