shithub: opus

Download patch

ref: 3e2198c6e10d77df440371925577f04d55cb26c5
parent: d816477c58e7a229c74b910e426e2dbf14a03eb8
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Feb 2 10:00:24 EST 2022

Learning to predict time t+1 from time t

Instead of t from t

--- a/dnn/lpcnet_plc.c
+++ b/dnn/lpcnet_plc.c
@@ -67,7 +67,7 @@
   _lpcnet_compute_dense(&plc_dense1, dense_out, in);
   compute_gruB(&plc_gru1, zeros, net->plc_gru1_state, dense_out);
   compute_gruB(&plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);
-  if (out != NULL) _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);
+  _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);
 }
 
 LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
@@ -74,6 +74,9 @@
   int i;
   float x[FRAME_SIZE];
   short output[FRAME_SIZE];
+#if PLC_DNN_PRED
+  float plc_features[NB_FEATURES+1];
+#endif
   st->enc.pcount = 0;
   if (st->skip_analysis) {
     /*fprintf(stderr, "skip update\n");*/
@@ -100,6 +103,15 @@
   preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
   compute_frame_features(&st->enc, x);
   process_single_frame(&st->enc, NULL);
+#if PLC_DNN_PRED
+  if (st->skip_analysis <= 1) {
+    RNN_COPY(plc_features, st->enc.features[0], NB_FEATURES);
+    plc_features[NB_FEATURES] = 1;
+    compute_plc_pred(&st->plc_net, st->features, plc_features);
+  }
+#else
+  RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);
+#endif
   if (st->skip_analysis) {
     float lpc[LPC_ORDER];
     float gru_a_condition[3*GRU_A_STATE_SIZE];
@@ -108,17 +120,9 @@
     run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);
     st->skip_analysis--;
   } else {
-#if PLC_DNN_PRED
-    float plc_features[NB_FEATURES+1];
-#endif
     for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];
     RNN_COPY(output, &st->pcm[0], FRAME_SIZE);
     lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);
-#if PLC_DNN_PRED
-    RNN_COPY(plc_features, st->enc.features[0], NB_FEATURES);
-    plc_features[NB_FEATURES] = 1;
-    compute_plc_pred(&st->plc_net, NULL, plc_features);
-#endif
 #if PLC_READ_FEATURES
     for (i=0;i<NB_FEATURES;i++) scanf("%f", &st->features[i]);
 #endif
@@ -128,7 +132,6 @@
 #endif
     RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
   }
-  RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);
   st->loss_count = 0;
   return 0;
 }
@@ -149,7 +152,7 @@
     update_count = IMIN(st->pcm_fill, FRAME_SIZE);
     RNN_COPY(output, &st->pcm[0], update_count);
 #if PLC_DNN_PRED
-    compute_plc_pred(&st->plc_net, st->features, zeros);
+    if (st->pcm_fill > FRAME_SIZE) compute_plc_pred(&st->plc_net, st->features, zeros);
 #endif
 #if PLC_READ_FEATURES
     for (i=0;i<NB_FEATURES;i++) scanf("%f", &st->features[i]);
@@ -165,10 +168,10 @@
   }
   lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);
 #if PLC_DNN_PRED
-    compute_plc_pred(&st->plc_net, st->features, zeros);
-    if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
-    else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
-    if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
+  compute_plc_pred(&st->plc_net, st->features, zeros);
+  if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
+  else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
+  if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
 #endif
 #if PLC_READ_FEATURES
   for (i=0;i<NB_FEATURES;i++) scanf("%f", &st->features[i]);
--- a/dnn/training_tf2/train_plc.py
+++ b/dnn/training_tf2/train_plc.py
@@ -99,8 +99,9 @@
 
 def plc_loss(alpha=1.0, bias=0.):
     def loss(y_true,y_pred):
-        mask = y_true[:,:,-1:]
-        y_true = y_true[:,:,:-1]
+        mask = .2 + .8*y_true[:,1:,-1:]
+        y_true = y_true[:,1:,:-1]
+        y_pred = y_pred[:,:-1,:]
         e = (y_pred - y_true)*mask
         e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
         l1_loss = K.mean(K.abs(e)) + bias*K.mean(K.maximum(e[:,:,:1], 0.)) + alpha*K.mean(K.abs(e_bands) + bias*K.maximum(e_bands, 0.))
@@ -109,8 +110,9 @@
 
 def plc_l1_loss():
     def L1_loss(y_true,y_pred):
-        mask = y_true[:,:,-1:]
-        y_true = y_true[:,:,:-1]
+        mask = y_true[:,1:,-1:]
+        y_true = y_true[:,1:,:-1]
+        y_pred = y_pred[:,:-1,:]
         e = (y_pred - y_true)*mask
         l1_loss = K.mean(K.abs(e))
         return l1_loss
@@ -118,8 +120,9 @@
 
 def plc_band_loss():
     def L1_band_loss(y_true,y_pred):
-        mask = y_true[:,:,-1:]
-        y_true = y_true[:,:,:-1]
+        mask = y_true[:,1:,-1:]
+        y_true = y_true[:,1:,:-1]
+        y_pred = y_pred[:,:-1,:]
         e = (y_pred - y_true)*mask
         e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
         l1_loss = K.mean(K.abs(e_bands))
--