shithub: opus

Download patch

ref: ab9a09266f770e11eb780ebadcf01474661ee771
parent: c1532559a2fc7ccdf2e442d4257779849f59a52d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Jul 24 14:09:20 EDT 2021

Sharing conditioning network with LPC

--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@@ -54,7 +54,7 @@
 }
 #endif
 
-void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
+void run_frame_network(LPCNetState *lpcnet, float *rc, float *gru_a_condition, float *gru_b_condition, const float *features, int pitch)
 {
     NNetState *net;
     float condition[FEATURE_DENSE2_OUT_SIZE];
@@ -74,6 +74,7 @@
     memcpy(lpcnet->old_input[0], in, FRAME_INPUT_SIZE*sizeof(in[0]));
     compute_dense(&feature_dense1, dense1_out, conv2_out);
     compute_dense(&feature_dense2, condition, dense1_out);
+    RNN_COPY(rc, condition, LPC_ORDER);
     compute_dense(&gru_a_dense_feature, gru_a_condition, condition);
     compute_dense(&gru_b_dense_feature, gru_b_condition, condition);
     if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
@@ -154,26 +155,6 @@
   }
 }
 
-void lpc_from_features(LPCNetState *lpcnet,const float *features)
-{
-  NNetState *net;
-  float in[NB_FEATURES];
-  float conv1_out[F2RC_CONV1_OUT_SIZE];
-  float conv2_out[F2RC_CONV2_OUT_SIZE];
-  float dense1_out[F2RC_DENSE3_OUT_SIZE];
-  float rc[LPC_ORDER];
-  net = &lpcnet->nnet;
-  RNN_COPY(in, features, NB_FEATURES);
-  compute_conv1d(&f2rc_conv1, conv1_out, net->f2rc_conv1_state, in);
-  if (lpcnet->frame_count < F2RC_CONV1_DELAY + 1) RNN_CLEAR(conv1_out, F2RC_CONV1_OUT_SIZE);
-  compute_conv1d(&f2rc_conv2, conv2_out, net->f2rc_conv2_state, conv1_out);
-  if (lpcnet->frame_count < (FEATURES_DELAY_F2RC + 1)) RNN_CLEAR(conv2_out, F2RC_CONV2_OUT_SIZE);
-  memmove(lpcnet->old_input_f2rc[1], lpcnet->old_input_f2rc[0], (FEATURES_DELAY_F2RC-1)*NB_FEATURES*sizeof(in[0]));
-  memcpy(lpcnet->old_input_f2rc[0], in, NB_FEATURES*sizeof(in[0]));
-  compute_dense(&f2rc_dense3, dense1_out, conv2_out);
-  compute_dense(&f2rc_dense4_outp_rc, rc, dense1_out);
-  rc2lpc(lpcnet->old_lpc[0], rc);
-}
 #endif
 
 LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N)
@@ -180,6 +161,7 @@
 {
     int i;
     float lpc[LPC_ORDER];
+    float rc[LPC_ORDER];
     float gru_a_condition[3*GRU_A_STATE_SIZE];
     float gru_b_condition[3*GRU_B_STATE_SIZE];
     int pitch;
@@ -188,10 +170,9 @@
     pitch = IMIN(255, IMAX(33, pitch));
     memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
     lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
-    run_frame_network(lpcnet, gru_a_condition, gru_b_condition, features, pitch);
+    run_frame_network(lpcnet, rc, gru_a_condition, gru_b_condition, features, pitch);
 #ifdef END2END
-    lpc_from_features(lpcnet,features);
-    memcpy(lpc, lpcnet->old_lpc[0], LPC_ORDER*sizeof(lpc[0]));
+    rc2lpc(lpc, rc);
 #else
     memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
     memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -22,18 +22,11 @@
 
 #define FEATURES_DELAY (FEATURE_CONV1_DELAY + FEATURE_CONV2_DELAY)
 
-#ifdef END2END
-  #define FEATURES_DELAY_F2RC (F2RC_CONV1_DELAY + F2RC_CONV2_DELAY)
-#endif
-
 struct LPCNetState {
     NNetState nnet;
     int last_exc;
     float last_sig[LPC_ORDER];
     float old_input[FEATURES_DELAY][FEATURE_CONV2_OUT_SIZE];
-#ifdef END2END
-    float old_input_f2rc[FEATURES_DELAY_F2RC][F2RC_CONV2_OUT_SIZE];
-#endif
     float old_lpc[FEATURES_DELAY][LPC_ORDER];
     float old_gain[FEATURES_DELAY];
     float sampling_logit_table[256];
--- a/dnn/training_tf2/difflpc.py
+++ /dev/null
@@ -1,27 +1,0 @@
-"""
-Tensorflow model (differentiable lpc) to learn the lpcs from the features
-"""
-
-from tensorflow.keras.models import Model
-from tensorflow.keras.layers import Input, Dense, Concatenate, Lambda, Conv1D, Multiply, Layer, LeakyReLU
-from tensorflow.keras import backend as K
-from tf_funcs import diff_rc2lpc
-
-frame_size = 160
-lpcoeffs_N = 16
-
-def difflpc(nb_used_features = 20, training=False):
-    feat = Input(shape=(None, nb_used_features)) # BFCC
-    padding = 'valid' if training else 'same'
-    L1 = Conv1D(100, 3, padding=padding, activation='tanh', name='f2rc_conv1')
-    L2 = Conv1D(75, 3, padding=padding, activation='tanh', name='f2rc_conv2')
-    L3 = Dense(50, activation='tanh',name = 'f2rc_dense3')
-    L4 = Dense(lpcoeffs_N, activation='tanh',name = "f2rc_dense4_outp_rc")
-    rc = L4(L3(L2(L1(feat))))
-    # Differentiable RC 2 LPC
-    lpcoeffs = diff_rc2lpc(name = "rc2lpc")(rc)
-
-    model = Model(feat,lpcoeffs,name = 'f2lpc')
-    model.nb_used_features = nb_used_features
-    model.frame_size = frame_size
-    return model
--- a/dnn/training_tf2/dump_lpcnet.py
+++ b/dnn/training_tf2/dump_lpcnet.py
@@ -291,12 +291,6 @@
     if layer.dump_layer(f, hf):
         layer_list.append(layer.name)
 
-if flag_e2e:
-    print("-- Weight Dumping for the Differentiable LPC Block --")
-    for i, layer in enumerate(model.get_layer("f2lpc").layers):
-        if layer.dump_layer(f, hf):
-            layer_list.append(layer.name)
-
 dump_sparse_gru(model.get_layer('gru_a'), f, hf)
 
 hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
--- a/dnn/training_tf2/lpcnet.py
+++ b/dnn/training_tf2/lpcnet.py
@@ -40,7 +40,6 @@
 import sys
 from tf_funcs import *
 from diffembed import diff_Embed
-import difflpc
 
 frame_size = 160
 pcm_bits = 8
@@ -226,7 +225,16 @@
     padding = 'valid' if training else 'same'
     fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1')
     fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2')
+    pembed = Embedding(256, 64, name='embed_pitch')
+    cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
 
+    cfeat = fconv2(fconv1(cat_feat))
+
+    fdense1 = Dense(128, activation='tanh', name='feature_dense1')
+    fdense2 = Dense(128, activation='tanh', name='feature_dense2')
+
+    cfeat = fdense2(fdense1(cfeat))
+
     if not flag_e2e:
         embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')
         cpcm = Reshape((-1, embed_size*3))(embed(pcm))
@@ -233,8 +241,7 @@
     else:
         Input_extractor = Lambda(lambda x: K.expand_dims(x[0][:,:,x[1]],axis = -1))
         error_calc = Lambda(lambda x: tf_l2u(tf_u2l(x[0]) - tf.roll(tf_u2l(x[1]),1,axis = 1)))
-        feat2lpc = difflpc.difflpc(training = training)
-        lpcoeffs = feat2lpc(feat)
+        lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
         tensor_preds = diff_pred(name = "lpc2preds")([Input_extractor([pcm,0]),lpcoeffs])
         past_errors = error_calc([Input_extractor([pcm,0]),tensor_preds])
         embed = diff_Embed(name='embed_sig',initializer = PCMInit())
@@ -243,15 +250,6 @@
         cpcm_decoder = Concatenate()([Input_extractor([pcm,0]),Input_extractor([pcm,1]),Input_extractor([pcm,2])])
         cpcm_decoder = Reshape((-1, embed_size*3))(embed(cpcm_decoder))
 
-    pembed = Embedding(256, 64, name='embed_pitch')
-    cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
-    
-    cfeat = fconv2(fconv1(cat_feat))
-
-    fdense1 = Dense(128, activation='tanh', name='feature_dense1')
-    fdense2 = Dense(128, activation='tanh', name='feature_dense2')
-
-    cfeat = fdense2(fdense1(cfeat))
     
     rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
 
--- a/dnn/training_tf2/tf_funcs.py
+++ b/dnn/training_tf2/tf_funcs.py
@@ -49,6 +49,7 @@
             temp = Concatenate(axis = 2)([temp,input[1]])
             return temp
         Llpc = Lambda(pred_lpc_recursive)
+        inputs = inputs[:,:,:lpcoeffs_N]
         lpc_init = inputs
         for i in range(1,lpcoeffs_N):
             lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)])
@@ -66,4 +67,4 @@
         for i in range(1,lpcoeffs_N):
             j = (lpcoeffs_N - i + 1)
             rc_init = Lrc([rc_init[:,:,:(j - 1)],rc_init[:,:,(j - 1):]])
-        return rc_init
\ No newline at end of file
+        return rc_init
--