shithub: opus

--- a/dnn/training_tf2/lpcnet.py

+++ b/dnn/training_tf2/lpcnet.py

@@ -26,8 +26,10 @@

'''

 import math

+import tensorflow as tf

 from tensorflow.keras.models import Model

 from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation

+from tensorflow.compat.v1.keras.layers import CuDNNGRU

 from tensorflow.keras import backend as K

 from tensorflow.keras.constraints import Constraint

 from tensorflow.keras.initializers import Initializer

@@ -42,6 +44,12 @@

 embed_size = 128

 pcm_levels = 2**pcm_bits

+def quant_regularizer(x):

+    Q = 128

+    Q_1 = 1./Q

+    #return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))

+    return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))

 class Sparsify(Callback):

     def __init__(self, t_start, t_end, interval, density):

         super(Sparsify, self).__init__()

@@ -129,9 +137,9 @@

         return {'name': self.__class__.__name__,

             'c': self.c}

-constraint = WeightClip(0.999)

+constraint = WeightClip(0.992)

-def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False):

+def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False):

     pcm = Input(shape=(None, 3))

     feat = Input(shape=(None, nb_used_features))

     pitch = Input(shape=(None, 1))

@@ -158,10 +166,18 @@

     rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))

-    rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',

-              recurrent_constraint = constraint)

-    rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',

-               kernel_constraint=constraint)

+    quant = quant_regularizer if quantize else None

+    if training:

+        rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a',

+              recurrent_constraint = constraint, recurrent_regularizer=quant)

+        rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b',

+               kernel_constraint=constraint, kernel_regularizer=quant)

+    else:

+        rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',

+              recurrent_constraint = constraint, recurrent_regularizer=quant)

+        rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',

+               kernel_constraint=constraint, kernel_regularizer=quant)

     rnn_in = Concatenate()([cpcm, rep(cfeat)])

     md = MDense(pcm_levels, activation='softmax', name='dual_fc')

--- a/dnn/training_tf2/train_lpcnet.py

+++ b/dnn/training_tf2/train_lpcnet.py

@@ -49,11 +49,24 @@

 # Try reducing batch_size if you run out of memory on your GPU

 batch_size = 128

-model, _, _ = lpcnet.new_lpcnet_model(training=True)

+#Set this to True to adapt an existing model (e.g. on new data)

+adaptation = False

-model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

-model.summary()

+if adaptation:

+    lr = 0.0001

+    decay = 0

+else:

+    lr = 0.001

+    decay = 2.5e-5

+opt = Adam(lr, decay=decay, beta_2=0.99)

+strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

+with strategy.scope():

+    model, _, _ = lpcnet.new_lpcnet_model(training=True)

+    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

+    model.summary()

 feature_file = sys.argv[1]

 pcm_file = sys.argv[2]     # 16 bit unsigned short PCM samples

 frame_size = model.frame_size

@@ -65,7 +78,7 @@

 # u for unquantised, load 16 bit PCM samples and convert to mu-law

 data = np.fromfile(pcm_file, dtype='uint8')

-nb_frames = len(data)//(4*pcm_chunk_size)

+nb_frames = len(data)//(4*pcm_chunk_size)//batch_size*batch_size

 features = np.fromfile(feature_file, dtype='float32')

@@ -102,23 +115,15 @@

 del in_exc

 # dump models to disk as we go

-checkpoint = ModelCheckpoint('lpcnet33_384_{epoch:02d}.h5')

+checkpoint = ModelCheckpoint('lpcnet33e_384_{epoch:02d}.h5')

-#Set this to True to adapt an existing model (e.g. on new data)

-adaptation = False

 if adaptation:

     #Adapting from an existing model

-    model.load_weights('lpcnet32v_384_100.h5')

+    model.load_weights('lpcnet33a_384_100.h5')

     sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))

-    lr = 0.0001

-    decay = 0

 else:

     #Training from scratch

     sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))

-    lr = 0.001

-    decay = 5e-5

-model.compile(optimizer=Adam(lr, decay=decay, beta_2=0.99), loss='sparse_categorical_crossentropy')

-model.save_weights('lpcnet33_384_00.h5');

+model.save_weights('lpcnet33e_384_00.h5');

 model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify])

--

⑨