ref: 2275853ac408f1a229dca12f5f0a45c305b1e1e4
parent: 9776e8e828206b6a66bc9f41745068df27e0fcf5
parent: 444b4370d4f0c9c8a27d488dedba81a288114fb7
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sun Oct 3 23:14:06 EDT 2021
Merge branch 'exp_quant_grub2'
--- a/dnn/autogen.sh
+++ b/dnn/autogen.sh
@@ -6,7 +6,7 @@
test -n "$srcdir" && cd "$srcdir"
#SHA1 of the first commit compatible with the current model
-commit=2d22197
+commit=b7d25ac
if [ ! -f lpcnet_data-$commit.tar.gz ]; then
echo "Downloading latest model"
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -75,9 +75,9 @@
}
-void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) {
+void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) {
int i, k;
- for (k=0;k<4;k++) {
+ for (k=0;k<nframes;k++) {
unsigned char data[4*FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) {
float p=0;
@@ -250,7 +250,7 @@
rand_resp(a_sig, b_sig);
tmp = (float)rand()/RAND_MAX;
tmp2 = (float)rand()/RAND_MAX;
- noise_std = -log(tmp)-log(tmp2);
+ noise_std = ABS16(-1.5*log(1e-4+tmp)-.5*log(1e-4+tmp2));
}
biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
@@ -270,12 +270,19 @@
if (fpcm) {
compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std);
}
+
+ if (!quantize) {
+ process_single_frame(st, ffeat);
+ if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);
+ }
st->pcount++;
/* Running on groups of 4 frames. */
if (st->pcount == 4) {
- unsigned char buf[8];
- process_superframe(st, buf, ffeat, encode, quantize);
- if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm);
+ if (quantize) {
+ unsigned char buf[8];
+ process_superframe(st, buf, ffeat, encode, quantize);
+ if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4);
+ }
st->pcount = 0;
}
//if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
--- a/dnn/lpcnet_demo.c
+++ b/dnn/lpcnet_demo.c
@@ -99,13 +99,13 @@
LPCNetEncState *net;
net = lpcnet_encoder_create();
while (1) {
- float features[4][NB_TOTAL_FEATURES];
- short pcm[LPCNET_PACKET_SAMPLES];
+ float features[NB_TOTAL_FEATURES];
+ short pcm[LPCNET_FRAME_SIZE];
size_t ret;
- ret = fread(pcm, sizeof(pcm[0]), LPCNET_PACKET_SAMPLES, fin);
- if (feof(fin) || ret != LPCNET_PACKET_SAMPLES) break;
- lpcnet_compute_features(net, pcm, features);
- fwrite(features, sizeof(float), 4*NB_TOTAL_FEATURES, fout);
+ ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
+ if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
+ lpcnet_compute_single_frame_features(net, pcm, features);
+ fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
}
lpcnet_encoder_destroy(net);
} else if (mode == MODE_SYNTHESIS) {
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -710,6 +710,133 @@
}
}
+
+void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {
+ int i;
+ int sub;
+ int best_i;
+ int best[10];
+ int pitch_prev[8][PITCH_MAX_PERIOD];
+ float frame_corr;
+ float frame_weight_sum = 1e-15;
+ for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
+ for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
+ for(sub=0;sub<8;sub++) {
+ float max_path_all = -1e15;
+ best_i = 0;
+ for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
+ float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
+ if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;
+ }
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ int j;
+ float max_prev;
+ max_prev = st->pitch_max_path_all - 6.f;
+ pitch_prev[sub][i] = st->best_i;
+ for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
+ if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
+ max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
+ pitch_prev[sub][i] = i+j;
+ }
+ }
+ st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
+ if (st->pitch_max_path[1][i] > max_path_all) {
+ max_path_all = st->pitch_max_path[1][i];
+ best_i = i;
+ }
+ }
+ /* Renormalize. */
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
+ //for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
+ //printf("\n");
+ RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
+ st->pitch_max_path_all = max_path_all;
+ st->best_i = best_i;
+ }
+ best_i = st->best_i;
+ frame_corr = 0;
+ /* Backward pass. */
+ for (sub=7;sub>=0;sub--) {
+ best[2+sub] = PITCH_MAX_PERIOD-best_i;
+ frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
+ best_i = pitch_prev[sub][best_i];
+ }
+ frame_corr /= 8;
+ for (sub=0;sub<4;sub++) {
+ st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
+ st->features[sub][NB_BANDS + 1] = frame_corr-.5;
+ //printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);
+ }
+ //printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);
+ RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
+ RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
+ //printf("\n");
+ RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
+ if (ffeat) {
+ for (i=0;i<4;i++) {
+ fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ }
+ }
+}
+
+void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
+ int i;
+ int sub;
+ int best_i;
+ int best[4];
+ int pitch_prev[2][PITCH_MAX_PERIOD];
+ float frame_corr;
+ float frame_weight_sum = 1e-15;
+ for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+2*st->pcount+sub];
+ for(sub=0;sub<2;sub++) st->frame_weight[2+2*st->pcount+sub] *= (2.f/frame_weight_sum);
+ for(sub=0;sub<2;sub++) {
+ float max_path_all = -1e15;
+ best_i = 0;
+ for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
+ float xc_half = MAX16(MAX16(st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i-1)/2]);
+ if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1) st->xc[2+2*st->pcount+sub][i] *= .8;
+ }
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ int j;
+ float max_prev;
+ max_prev = st->pitch_max_path_all - 6.f;
+ pitch_prev[sub][i] = st->best_i;
+ for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
+ if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
+ max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
+ pitch_prev[sub][i] = i+j;
+ }
+ }
+ st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][i];
+ if (st->pitch_max_path[1][i] > max_path_all) {
+ max_path_all = st->pitch_max_path[1][i];
+ best_i = i;
+ }
+ }
+ /* Renormalize. */
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
+ //for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
+ //printf("\n");
+ RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
+ st->pitch_max_path_all = max_path_all;
+ st->best_i = best_i;
+ }
+ best_i = st->best_i;
+ frame_corr = 0;
+ /* Backward pass. */
+ for (sub=1;sub>=0;sub--) {
+ best[2+sub] = PITCH_MAX_PERIOD-best_i;
+ frame_corr += st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][best_i];
+ best_i = pitch_prev[sub][best_i];
+ }
+ frame_corr /= 2;
+ st->features[st->pcount][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2]+best[3]))-200);
+ st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5;
+ if (ffeat) {
+ fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ }
+}
+
void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
int i;
for (i=0;i<N;i++) {
@@ -746,5 +873,16 @@
for (k=0;k<4;k++) {
RNN_COPY(&features[k][0], &st->features[k][0], NB_TOTAL_FEATURES);
}
+ return 0;
+}
+
+LPCNET_EXPORT int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]) {
+ int i;
+ float x[FRAME_SIZE];
+ for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+ preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+ compute_frame_features(st, x);
+ process_single_frame(st, NULL);
+ RNN_COPY(features, &st->features[0][0], NB_TOTAL_FEATURES);
return 0;
}
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -1,6 +1,7 @@
#ifndef LPCNET_PRIVATE_H
#define LPCNET_PRIVATE_H
+#include <stdio.h>
#include "common.h"
#include "freq.h"
#include "lpcnet.h"
@@ -73,6 +74,8 @@
void compute_frame_features(LPCNetEncState *st, const float *in);
void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);
+
+void process_single_frame(LPCNetEncState *st, FILE *ffeat);
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
#endif
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -283,7 +283,7 @@
sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, stride, input);
for (i=0;i<3*N;i++)
recur[i] = gru->bias[3*N + i];
- sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);
+ sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);
@@ -324,9 +324,14 @@
zrh[i] = gru->bias[i] + gru_b_condition[i];
#endif
sparse_sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, gru->input_weights_idx, input);
+#ifdef USE_SU_BIAS
for (i=0;i<3*N;i++)
+ recur[i] = gru->subias[3*N + i];
+#else
+ for (i=0;i<3*N;i++)
recur[i] = gru->bias[3*N + i];
- sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);
+#endif
+ sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);
@@ -361,7 +366,7 @@
RNN_COPY(zrh, input, 3*N);
for (i=0;i<3*N;i++)
recur[i] = gru->bias[3*N + i];
- sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);
+ sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -59,7 +59,7 @@
const float *subias;
const qweight *input_weights;
const int *input_weights_idx;
- const float *recurrent_weights;
+ const qweight *recurrent_weights;
int nb_inputs;
int nb_neurons;
int activation;
--- /dev/null
+++ b/dnn/training_tf2/dataloader.py
@@ -1,0 +1,26 @@
+import numpy as np
+from tensorflow.keras.utils import Sequence
+
+class LPCNetLoader(Sequence):
+ def __init__(self, data, features, periods, batch_size):
+ self.batch_size = batch_size
+ self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size
+ self.data = data[:self.nb_batches*self.batch_size, :]
+ self.features = features[:self.nb_batches*self.batch_size, :]
+ self.periods = periods[:self.nb_batches*self.batch_size, :]
+ self.on_epoch_end()
+
+ def on_epoch_end(self):
+ self.indices = np.arange(self.nb_batches*self.batch_size)
+ np.random.shuffle(self.indices)
+
+ def __getitem__(self, index):
+ data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
+ in_data = data[: , :, :3]
+ out_data = data[: , :, 3:4]
+ features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
+ periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
+ return ([in_data, features, periods], out_data)
+
+ def __len__(self):
+ return self.nb_batches
--- a/dnn/training_tf2/dump_lpcnet.py
+++ b/dnn/training_tf2/dump_lpcnet.py
@@ -138,10 +138,18 @@
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)
+
+ f.write('#ifdef DOT_PROD\n')
+ qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
+ printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
printVector(f, weights[1], name + '_recurrent_weights')
+ f.write('#endif /*DOT_PROD*/\n')
+
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
+ subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
printVector(f, subias, name + '_subias')
if hasattr(self, 'activation'):
activation = self.activation.__name__.upper()
--- a/dnn/training_tf2/lpcnet.py
+++ b/dnn/training_tf2/lpcnet.py
@@ -28,7 +28,7 @@
import math
import tensorflow as tf
from tensorflow.keras.models import Model
-from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
from tensorflow.compat.v1.keras.layers import CuDNNGRU
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint
@@ -70,7 +70,7 @@
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
class Sparsify(Callback):
- def __init__(self, t_start, t_end, interval, density):
+ def __init__(self, t_start, t_end, interval, density, quantize=False):
super(Sparsify, self).__init__()
self.batch = 0
self.t_start = t_start
@@ -77,14 +77,12 @@
self.t_end = t_end
self.interval = interval
self.final_density = density
+ self.quantize = quantize
def on_batch_end(self, batch, logs=None):
#print("batch number", self.batch)
self.batch += 1
- if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end):
- #print("don't constrain");
- pass
- else:
+ if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
#print("constrain");
layer = self.model.get_layer('gru_a')
w = layer.get_weights()
@@ -96,7 +94,7 @@
#print ("density = ", density)
for k in range(nb):
density = self.final_density[k]
- if self.batch < self.t_end:
+ if self.batch < self.t_end and not self.quantize:
r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
density = 1 - (1-self.final_density[k])*(1 - r*r*r)
A = p[:, k*N:(k+1)*N]
@@ -108,7 +106,7 @@
S=np.sum(S, axis=1)
SS=np.sort(np.reshape(S, (-1,)))
thresh = SS[round(N*N//32*(1-density))]
- mask = (S>=thresh).astype('float32');
+ mask = (S>=thresh).astype('float32')
mask = np.repeat(mask, 4, axis=0)
mask = np.repeat(mask, 8, axis=1)
mask = np.minimum(1, mask + np.diag(np.ones((N,))))
@@ -116,11 +114,21 @@
mask = np.transpose(mask, (1, 0))
p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask
#print(thresh, np.mean(mask))
+ if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
+ if self.batch < self.t_end:
+ threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
+ else:
+ threshold = .5
+ quant = np.round(p*128.)
+ res = p*128.-quant
+ mask = (np.abs(res) <= threshold).astype('float32')
+ p = mask/128.*quant + (1-mask)*p
+
w[1] = p
layer.set_weights(w)
class SparsifyGRUB(Callback):
- def __init__(self, t_start, t_end, interval, grua_units, density):
+ def __init__(self, t_start, t_end, interval, grua_units, density, quantize=False):
super(SparsifyGRUB, self).__init__()
self.batch = 0
self.t_start = t_start
@@ -128,14 +136,12 @@
self.interval = interval
self.final_density = density
self.grua_units = grua_units
+ self.quantize = quantize
def on_batch_end(self, batch, logs=None):
#print("batch number", self.batch)
self.batch += 1
- if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end):
- #print("don't constrain");
- pass
- else:
+ if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
#print("constrain");
layer = self.model.get_layer('gru_b')
w = layer.get_weights()
@@ -144,7 +150,7 @@
M = p.shape[1]//3
for k in range(3):
density = self.final_density[k]
- if self.batch < self.t_end:
+ if self.batch < self.t_end and not self.quantize:
r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
density = 1 - (1-self.final_density[k])*(1 - r*r*r)
A = p[:, k*M:(k+1)*M]
@@ -158,7 +164,7 @@
S=np.sum(S, axis=1)
SS=np.sort(np.reshape(S, (-1,)))
thresh = SS[round(M*N2//32*(1-density))]
- mask = (S>=thresh).astype('float32');
+ mask = (S>=thresh).astype('float32')
mask = np.repeat(mask, 4, axis=0)
mask = np.repeat(mask, 8, axis=1)
A = np.concatenate([A2*mask, A[N2:,:]], axis=0)
@@ -167,6 +173,16 @@
A = np.reshape(A, (N, M))
p[:, k*M:(k+1)*M] = A
#print(thresh, np.mean(mask))
+ if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
+ if self.batch < self.t_end:
+ threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
+ else:
+ threshold = .5
+ quant = np.round(p*128.)
+ res = p*128.-quant
+ mask = (np.abs(res) <= threshold).astype('float32')
+ p = mask/128.*quant + (1-mask)*p
+
w[0] = p
layer.set_weights(w)
@@ -215,9 +231,9 @@
constraint = WeightClip(0.992)
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, training=False, adaptation=False, quantize=False, flag_e2e = False):
- pcm = Input(shape=(None, 3))
- feat = Input(shape=(None, nb_used_features))
- pitch = Input(shape=(None, 1))
+ pcm = Input(shape=(None, 3), batch_size=128)
+ feat = Input(shape=(None, nb_used_features), batch_size=128)
+ pitch = Input(shape=(None, 1), batch_size=128)
dec_feat = Input(shape=(None, 128))
dec_state1 = Input(shape=(rnn_units1,))
dec_state2 = Input(shape=(rnn_units2,))
@@ -256,19 +272,20 @@
quant = quant_regularizer if quantize else None
if training:
- rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a',
+ rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a', stateful=True,
recurrent_constraint = constraint, recurrent_regularizer=quant)
- rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b',
- kernel_constraint=constraint, kernel_regularizer=quant)
+ rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
else:
- rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',
+ rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', stateful=True,
recurrent_constraint = constraint, recurrent_regularizer=quant)
- rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',
- kernel_constraint=constraint, kernel_regularizer=quant)
+ rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
rnn_in = Concatenate()([cpcm, rep(cfeat)])
md = MDense(pcm_levels, activation='sigmoid', name='dual_fc')
gru_out1, _ = rnn(rnn_in)
+ gru_out1 = GaussianNoise(.005)(gru_out1)
gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))
ulaw_prob = Lambda(tree_to_pdf_train)(md(gru_out2))
--- a/dnn/training_tf2/train_lpcnet.py
+++ b/dnn/training_tf2/train_lpcnet.py
@@ -28,6 +28,7 @@
# Train an LPCNet model
import argparse
+from dataloader import LPCNetLoader
parser = argparse.ArgumentParser(description='Train an LPCNet model')
@@ -148,10 +149,10 @@
data = np.reshape(data, (nb_frames, pcm_chunk_size, 4))
-in_data = data[:,:,:3]
-out_exc = data[:,:,3:4]
+#in_data = data[:,:,:3]
+#out_exc = data[:,:,3:4]
-print("ulaw std = ", np.std(out_exc))
+#print("ulaw std = ", np.std(out_exc))
sizeof = features.strides[-1]
features = np.lib.stride_tricks.as_strided(features, shape=(nb_frames, feature_chunk_size+4, nb_features),
@@ -171,8 +172,12 @@
if quantize or retrain:
#Adapting from an existing model
model.load_weights(input_model)
- sparsify = lpcnet.Sparsify(0, 0, 1, density)
- grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)
+ if quantize:
+ sparsify = lpcnet.Sparsify(10000, 30000, 100, density, quantize=True)
+ grub_sparsify = lpcnet.SparsifyGRUB(10000, 30000, 100, args.grua_size, grub_density, quantize=True)
+ else:
+ sparsify = lpcnet.Sparsify(0, 0, 1, density)
+ grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)
else:
#Training from scratch
sparsify = lpcnet.Sparsify(2000, 40000, 400, density)
@@ -180,4 +185,5 @@
model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))
csv_logger = CSVLogger('training_vals.log')
-model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify, grub_sparsify, csv_logger])
+loader = LPCNetLoader(data, features, periods, batch_size)
+model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify, grub_sparsify, csv_logger])
--
⑨