ref: dc082d7c1ce87a926c6ceda4f1c89c216347ca00
parent: 38cd5cf08f5422887257c3d5ae2415def1e8884e
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Jan 18 10:08:06 EST 2019
Making it easier to change the frame size
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -313,7 +313,7 @@
}
last_silent = silent;
}
- if (count>=5000000 && one_pass_completed) break;
+ if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
if (training && ++gain_change_count > 2821) {
float tmp;
speech_gain = pow(10., (-20+(rand()%40))/20.);
--- a/dnn/lpcnet.py
+++ b/dnn/lpcnet.py
@@ -36,6 +36,7 @@
import h5py
import sys
+frame_size = 160
pcm_bits = 8
embed_size = 128
pcm_levels = 2**pcm_bits
@@ -139,7 +140,7 @@
cfeat = fdense2(fdense1(cfeat))
- rep = Lambda(lambda x: K.repeat_elements(x, 160, 1))
+ rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
if use_gpu:
rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a')
@@ -158,6 +159,7 @@
model.rnn_units1 = rnn_units1
model.rnn_units2 = rnn_units2
model.nb_used_features = nb_used_features
+ model.frame_size = frame_size
encoder = Model([feat, pitch], cfeat)
--- a/dnn/test_lpcnet.py
+++ b/dnn/test_lpcnet.py
@@ -47,7 +47,7 @@
feature_file = sys.argv[1]
out_file = sys.argv[2]
-frame_size = 160
+frame_size = model.frame_size
nb_features = 55
nb_used_features = model.nb_used_features
--- a/dnn/train_lpcnet.py
+++ b/dnn/train_lpcnet.py
@@ -58,7 +58,7 @@
feature_file = sys.argv[1]
pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
-frame_size = 160
+frame_size = model.frame_size
nb_features = 55
nb_used_features = model.nb_used_features
feature_chunk_size = 15
@@ -97,7 +97,7 @@
del pred
# dump models to disk as we go
-checkpoint = ModelCheckpoint('lpcnet20c_384_10_G16_{epoch:02d}.h5')
+checkpoint = ModelCheckpoint('lpcnet20g_384_10_G16_{epoch:02d}.h5')
#model.load_weights('lpcnet9b_384_10_G16_01.h5')
model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')
--
⑨