ref: 237245f815fbdf402d75d239fecf19d70fe1956e
parent: ebc9483b4c981303b71fc92a95b80bbc7c7c5c7d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Jun 12 23:50:51 EDT 2021
Support for multi-GPU training Not sure why CuDNNGRU doesn't get used by default, but we need to explicitly use it to get things to run fast.
--- a/dnn/training_tf2/lpcnet.py
+++ b/dnn/training_tf2/lpcnet.py
@@ -26,8 +26,10 @@
'''
import math
+import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
+from tensorflow.compat.v1.keras.layers import CuDNNGRU
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.initializers import Initializer
@@ -42,6 +44,12 @@
embed_size = 128
pcm_levels = 2**pcm_bits
+def quant_regularizer(x):
+ Q = 128
+ Q_1 = 1./Q
+ #return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
+ return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
+
class Sparsify(Callback):
def __init__(self, t_start, t_end, interval, density):
super(Sparsify, self).__init__()
@@ -129,9 +137,9 @@
return {'name': self.__class__.__name__,
'c': self.c}
-constraint = WeightClip(0.999)
+constraint = WeightClip(0.992)
-def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False):
+def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False):
pcm = Input(shape=(None, 3))
feat = Input(shape=(None, nb_used_features))
pitch = Input(shape=(None, 1))
@@ -158,10 +166,18 @@
rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
- rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',
- recurrent_constraint = constraint)
- rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',
- kernel_constraint=constraint)
+ quant = quant_regularizer if quantize else None
+
+ if training:
+ rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a',
+ recurrent_constraint = constraint, recurrent_regularizer=quant)
+ rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b',
+ kernel_constraint=constraint, kernel_regularizer=quant)
+ else:
+ rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',
+ recurrent_constraint = constraint, recurrent_regularizer=quant)
+ rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',
+ kernel_constraint=constraint, kernel_regularizer=quant)
rnn_in = Concatenate()([cpcm, rep(cfeat)])
md = MDense(pcm_levels, activation='softmax', name='dual_fc')
--- a/dnn/training_tf2/train_lpcnet.py
+++ b/dnn/training_tf2/train_lpcnet.py
@@ -49,11 +49,24 @@
# Try reducing batch_size if you run out of memory on your GPU
batch_size = 128
-model, _, _ = lpcnet.new_lpcnet_model(training=True)
+#Set this to True to adapt an existing model (e.g. on new data)
+adaptation = False
-model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
-model.summary()
+if adaptation:
+ lr = 0.0001
+ decay = 0
+else:
+ lr = 0.001
+ decay = 2.5e-5
+opt = Adam(lr, decay=decay, beta_2=0.99)
+strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+
+with strategy.scope():
+ model, _, _ = lpcnet.new_lpcnet_model(training=True)
+ model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
+ model.summary()
+
feature_file = sys.argv[1]
pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
frame_size = model.frame_size
@@ -65,7 +78,7 @@
# u for unquantised, load 16 bit PCM samples and convert to mu-law
data = np.fromfile(pcm_file, dtype='uint8')
-nb_frames = len(data)//(4*pcm_chunk_size)
+nb_frames = len(data)//(4*pcm_chunk_size)//batch_size*batch_size
features = np.fromfile(feature_file, dtype='float32')
@@ -102,23 +115,15 @@
del in_exc
# dump models to disk as we go
-checkpoint = ModelCheckpoint('lpcnet33_384_{epoch:02d}.h5')
+checkpoint = ModelCheckpoint('lpcnet33e_384_{epoch:02d}.h5')
-#Set this to True to adapt an existing model (e.g. on new data)
-adaptation = False
-
if adaptation:
#Adapting from an existing model
- model.load_weights('lpcnet32v_384_100.h5')
+ model.load_weights('lpcnet33a_384_100.h5')
sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))
- lr = 0.0001
- decay = 0
else:
#Training from scratch
sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))
- lr = 0.001
- decay = 5e-5
-model.compile(optimizer=Adam(lr, decay=decay, beta_2=0.99), loss='sparse_categorical_crossentropy')
-model.save_weights('lpcnet33_384_00.h5');
+model.save_weights('lpcnet33e_384_00.h5');
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify])
--
⑨