shithub: opus

--- a/dnn/autogen.sh

+++ b/dnn/autogen.sh

@@ -6,7 +6,7 @@

 test -n "$srcdir" && cd "$srcdir"

 #SHA1 of the first commit compatible with the current model

-commit=2d22197

+commit=b7d25ac

 if [ ! -f lpcnet_data-$commit.tar.gz ]; then

 	echo "Downloading latest model"

--- a/dnn/dump_data.c

+++ b/dnn/dump_data.c

@@ -75,9 +75,9 @@

-void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) {

+void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) {

   int i, k;

-  for (k=0;k<4;k++) {

+  for (k=0;k<nframes;k++) {

   unsigned char data[4*FRAME_SIZE];

   for (i=0;i<FRAME_SIZE;i++) {

     float p=0;

@@ -250,7 +250,7 @@

       rand_resp(a_sig, b_sig);

       tmp = (float)rand()/RAND_MAX;

       tmp2 = (float)rand()/RAND_MAX;

-      noise_std = -log(tmp)-log(tmp2);

+      noise_std = ABS16(-1.5*log(1e-4+tmp)-.5*log(1e-4+tmp2));

     biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);

     biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);

@@ -270,12 +270,19 @@

     if (fpcm) {

         compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std);

+    if (!quantize) {

+      process_single_frame(st, ffeat);

+      if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);

+    }

     st->pcount++;

     /* Running on groups of 4 frames. */

     if (st->pcount == 4) {

-      unsigned char buf[8];

-      process_superframe(st, buf, ffeat, encode, quantize);

-      if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm);

+      if (quantize) {

+        unsigned char buf[8];

+        process_superframe(st, buf, ffeat, encode, quantize);

+        if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4);

+      }

       st->pcount = 0;

     //if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);

--- a/dnn/lpcnet_demo.c

+++ b/dnn/lpcnet_demo.c

@@ -99,13 +99,13 @@

         LPCNetEncState *net;

         net = lpcnet_encoder_create();

         while (1) {

-            float features[4][NB_TOTAL_FEATURES];

-            short pcm[LPCNET_PACKET_SAMPLES];

+            float features[NB_TOTAL_FEATURES];

+            short pcm[LPCNET_FRAME_SIZE];

             size_t ret;

-            ret = fread(pcm, sizeof(pcm[0]), LPCNET_PACKET_SAMPLES, fin);

-            if (feof(fin) || ret != LPCNET_PACKET_SAMPLES) break;

-            lpcnet_compute_features(net, pcm, features);

-            fwrite(features, sizeof(float), 4*NB_TOTAL_FEATURES, fout);

+            ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);

+            if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;

+            lpcnet_compute_single_frame_features(net, pcm, features);

+            fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);

         lpcnet_encoder_destroy(net);

     } else if (mode == MODE_SYNTHESIS) {

--- a/dnn/lpcnet_enc.c

+++ b/dnn/lpcnet_enc.c

@@ -710,6 +710,133 @@

+void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {

+  int i;

+  int sub;

+  int best_i;

+  int best[10];

+  int pitch_prev[8][PITCH_MAX_PERIOD];

+  float frame_corr;

+  float frame_weight_sum = 1e-15;

+  for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];

+  for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);

+  for(sub=0;sub<8;sub++) {

+    float max_path_all = -1e15;

+    best_i = 0;

+    for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {

+      float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);

+      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;

+    }

+    for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {

+      int j;

+      float max_prev;

+      max_prev = st->pitch_max_path_all - 6.f;

+      pitch_prev[sub][i] = st->best_i;

+      for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {

+        if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {

+          max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);

+          pitch_prev[sub][i] = i+j;

+        }

+      }

+      st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];

+      if (st->pitch_max_path[1][i] > max_path_all) {

+        max_path_all = st->pitch_max_path[1][i];

+        best_i = i;

+      }

+    }

+    /* Renormalize. */

+    for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;

+    //for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);

+    //printf("\n");

+    RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);

+    st->pitch_max_path_all = max_path_all;

+    st->best_i = best_i;

+  }

+  best_i = st->best_i;

+  frame_corr = 0;

+  /* Backward pass. */

+  for (sub=7;sub>=0;sub--) {

+    best[2+sub] = PITCH_MAX_PERIOD-best_i;

+    frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];

+    best_i = pitch_prev[sub][best_i];

+  }

+  frame_corr /= 8;

+  for (sub=0;sub<4;sub++) {

+    st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

+    st->features[sub][NB_BANDS + 1] = frame_corr-.5;

+    //printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);

+  }

+  //printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);

+  RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);

+  RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);

+  //printf("\n");

+  RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);

+  if (ffeat) {

+    for (i=0;i<4;i++) {

+      fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);

+    }

+  }

+}

+void process_single_frame(LPCNetEncState *st, FILE *ffeat) {

+  int i;

+  int sub;

+  int best_i;

+  int best[4];

+  int pitch_prev[2][PITCH_MAX_PERIOD];

+  float frame_corr;

+  float frame_weight_sum = 1e-15;

+  for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+2*st->pcount+sub];

+  for(sub=0;sub<2;sub++) st->frame_weight[2+2*st->pcount+sub] *= (2.f/frame_weight_sum);

+  for(sub=0;sub<2;sub++) {

+    float max_path_all = -1e15;

+    best_i = 0;

+    for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {

+      float xc_half = MAX16(MAX16(st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i-1)/2]);

+      if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1) st->xc[2+2*st->pcount+sub][i] *= .8;

+    }

+    for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {

+      int j;

+      float max_prev;

+      max_prev = st->pitch_max_path_all - 6.f;

+      pitch_prev[sub][i] = st->best_i;

+      for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {

+        if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {

+          max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);

+          pitch_prev[sub][i] = i+j;

+        }

+      }

+      st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][i];

+      if (st->pitch_max_path[1][i] > max_path_all) {

+        max_path_all = st->pitch_max_path[1][i];

+        best_i = i;

+      }

+    }

+    /* Renormalize. */

+    for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;

+    //for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);

+    //printf("\n");

+    RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);

+    st->pitch_max_path_all = max_path_all;

+    st->best_i = best_i;

+  }

+  best_i = st->best_i;

+  frame_corr = 0;

+  /* Backward pass. */

+  for (sub=1;sub>=0;sub--) {

+    best[2+sub] = PITCH_MAX_PERIOD-best_i;

+    frame_corr += st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][best_i];

+    best_i = pitch_prev[sub][best_i];

+  }

+  frame_corr /= 2;

+  st->features[st->pcount][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2]+best[3]))-200);

+  st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5;

+  if (ffeat) {

+    fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat);

+  }

+}

 void preemphasis(float *y, float *mem, const float *x, float coef, int N) {

   int i;

   for (i=0;i<N;i++) {

@@ -746,5 +873,16 @@

   for (k=0;k<4;k++) {

     RNN_COPY(&features[k][0], &st->features[k][0], NB_TOTAL_FEATURES);

+  return 0;

+}

+LPCNET_EXPORT int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]) {

+  int i;

+  float x[FRAME_SIZE];

+  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];

+  preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);

+  compute_frame_features(st, x);

+  process_single_frame(st, NULL);

+  RNN_COPY(features, &st->features[0][0], NB_TOTAL_FEATURES);

   return 0;

--- a/dnn/lpcnet_private.h

+++ b/dnn/lpcnet_private.h

@@ -1,6 +1,7 @@

 #ifndef LPCNET_PRIVATE_H

 #define LPCNET_PRIVATE_H

+#include <stdio.h>

 #include "common.h"

 #include "freq.h"

 #include "lpcnet.h"

@@ -73,6 +74,8 @@

 void compute_frame_features(LPCNetEncState *st, const float *in);

 void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);

+void process_single_frame(LPCNetEncState *st, FILE *ffeat);

 void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);

 #endif

--- a/dnn/nnet.c

+++ b/dnn/nnet.c

@@ -283,7 +283,7 @@

    sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, stride, input);

    for (i=0;i<3*N;i++)

       recur[i] = gru->bias[3*N + i];

-   sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);

+   sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state);

    for (i=0;i<2*N;i++)

       zrh[i] += recur[i];

    compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);

@@ -324,9 +324,14 @@

       zrh[i] = gru->bias[i] + gru_b_condition[i];

 #endif

    sparse_sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, gru->input_weights_idx, input);

+#ifdef USE_SU_BIAS

    for (i=0;i<3*N;i++)

+      recur[i] = gru->subias[3*N + i];

+#else

+   for (i=0;i<3*N;i++)

       recur[i] = gru->bias[3*N + i];

-   sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);

+#endif

+   sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state);

    for (i=0;i<2*N;i++)

       zrh[i] += recur[i];

    compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);

@@ -361,7 +366,7 @@

    RNN_COPY(zrh, input, 3*N);

    for (i=0;i<3*N;i++)

       recur[i] = gru->bias[3*N + i];

-   sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);

+   sgemv_accum8x4(recur, gru->recurrent_weights, 3*N, N, stride, state);

    for (i=0;i<2*N;i++)

       zrh[i] += recur[i];

    compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);

--- a/dnn/nnet.h

+++ b/dnn/nnet.h

@@ -59,7 +59,7 @@

   const float *subias;

   const qweight *input_weights;

   const int *input_weights_idx;

-  const float *recurrent_weights;

+  const qweight *recurrent_weights;

   int nb_inputs;

   int nb_neurons;

   int activation;

--- /dev/null

+++ b/dnn/training_tf2/dataloader.py

@@ -1,0 +1,26 @@

+import numpy as np

+from tensorflow.keras.utils import Sequence

+class LPCNetLoader(Sequence):

+    def __init__(self, data, features, periods, batch_size):

+        self.batch_size = batch_size

+        self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size

+        self.data = data[:self.nb_batches*self.batch_size, :]

+        self.features = features[:self.nb_batches*self.batch_size, :]

+        self.periods = periods[:self.nb_batches*self.batch_size, :]

+        self.on_epoch_end()

+    def on_epoch_end(self):

+        self.indices = np.arange(self.nb_batches*self.batch_size)

+        np.random.shuffle(self.indices)

+    def __getitem__(self, index):

+        data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]

+        in_data = data[: , :, :3]

+        out_data = data[: , :, 3:4]

+        features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]

+        periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]

+        return ([in_data, features, periods], out_data)

+    def __len__(self):

+        return self.nb_batches

--- a/dnn/training_tf2/dump_lpcnet.py

+++ b/dnn/training_tf2/dump_lpcnet.py

@@ -138,10 +138,18 @@

     print("printing layer " + name + " of type " + self.__class__.__name__)

     weights = self.get_weights()

     qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)

+    f.write('#ifdef DOT_PROD\n')

+    qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)

+    printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')

+    f.write('#else /*DOT_PROD*/\n')

     printVector(f, weights[1], name + '_recurrent_weights')

+    f.write('#endif /*DOT_PROD*/\n')

     printVector(f, weights[-1], name + '_bias')

     subias = weights[-1].copy()

     subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)

+    subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)

     printVector(f, subias, name + '_subias')

     if hasattr(self, 'activation'):

         activation = self.activation.__name__.upper()

--- a/dnn/training_tf2/lpcnet.py

+++ b/dnn/training_tf2/lpcnet.py

@@ -28,7 +28,7 @@

 import math

 import tensorflow as tf

 from tensorflow.keras.models import Model

-from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation

+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise

 from tensorflow.compat.v1.keras.layers import CuDNNGRU

 from tensorflow.keras import backend as K

 from tensorflow.keras.constraints import Constraint

@@ -70,7 +70,7 @@

     return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))

 class Sparsify(Callback):

-    def __init__(self, t_start, t_end, interval, density):

+    def __init__(self, t_start, t_end, interval, density, quantize=False):

         super(Sparsify, self).__init__()

         self.batch = 0

         self.t_start = t_start

@@ -77,14 +77,12 @@

         self.t_end = t_end

         self.interval = interval

         self.final_density = density

+        self.quantize = quantize

     def on_batch_end(self, batch, logs=None):

         #print("batch number", self.batch)

         self.batch += 1

-        if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end):

-            #print("don't constrain");

-            pass

-        else:

+        if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:

             #print("constrain");

             layer = self.model.get_layer('gru_a')

             w = layer.get_weights()

@@ -96,7 +94,7 @@

             #print ("density = ", density)

             for k in range(nb):

                 density = self.final_density[k]

-                if self.batch < self.t_end:

+                if self.batch < self.t_end and not self.quantize:

                     r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)

                     density = 1 - (1-self.final_density[k])*(1 - r*r*r)

                 A = p[:, k*N:(k+1)*N]

@@ -108,7 +106,7 @@

                 S=np.sum(S, axis=1)

                 SS=np.sort(np.reshape(S, (-1,)))

                 thresh = SS[round(N*N//32*(1-density))]

-                mask = (S>=thresh).astype('float32');

+                mask = (S>=thresh).astype('float32')

                 mask = np.repeat(mask, 4, axis=0)

                 mask = np.repeat(mask, 8, axis=1)

                 mask = np.minimum(1, mask + np.diag(np.ones((N,))))

@@ -116,11 +114,21 @@

                 mask = np.transpose(mask, (1, 0))

                 p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask

                 #print(thresh, np.mean(mask))

+            if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):

+                if self.batch < self.t_end:

+                    threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)

+                else:

+                    threshold = .5

+                quant = np.round(p*128.)

+                res = p*128.-quant

+                mask = (np.abs(res) <= threshold).astype('float32')

+                p = mask/128.*quant + (1-mask)*p

             w[1] = p

             layer.set_weights(w)

 class SparsifyGRUB(Callback):

-    def __init__(self, t_start, t_end, interval, grua_units, density):

+    def __init__(self, t_start, t_end, interval, grua_units, density, quantize=False):

         super(SparsifyGRUB, self).__init__()

         self.batch = 0

         self.t_start = t_start

@@ -128,14 +136,12 @@

         self.interval = interval

         self.final_density = density

         self.grua_units = grua_units

+        self.quantize = quantize

     def on_batch_end(self, batch, logs=None):

         #print("batch number", self.batch)

         self.batch += 1

-        if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end):

-            #print("don't constrain");

-            pass

-        else:

+        if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:

             #print("constrain");

             layer = self.model.get_layer('gru_b')

             w = layer.get_weights()

@@ -144,7 +150,7 @@

             M = p.shape[1]//3

             for k in range(3):

                 density = self.final_density[k]

-                if self.batch < self.t_end:

+                if self.batch < self.t_end and not self.quantize:

                     r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)

                     density = 1 - (1-self.final_density[k])*(1 - r*r*r)

                 A = p[:, k*M:(k+1)*M]

@@ -158,7 +164,7 @@

                 S=np.sum(S, axis=1)

                 SS=np.sort(np.reshape(S, (-1,)))

                 thresh = SS[round(M*N2//32*(1-density))]

-                mask = (S>=thresh).astype('float32');

+                mask = (S>=thresh).astype('float32')

                 mask = np.repeat(mask, 4, axis=0)

                 mask = np.repeat(mask, 8, axis=1)

                 A = np.concatenate([A2*mask, A[N2:,:]], axis=0)

@@ -167,6 +173,16 @@

                 A = np.reshape(A, (N, M))

                 p[:, k*M:(k+1)*M] = A

                 #print(thresh, np.mean(mask))

+            if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):

+                if self.batch < self.t_end:

+                    threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)

+                else:

+                    threshold = .5

+                quant = np.round(p*128.)

+                res = p*128.-quant

+                mask = (np.abs(res) <= threshold).astype('float32')

+                p = mask/128.*quant + (1-mask)*p

             w[0] = p

             layer.set_weights(w)

@@ -215,9 +231,9 @@

 constraint = WeightClip(0.992)

 def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, training=False, adaptation=False, quantize=False, flag_e2e = False):

-    pcm = Input(shape=(None, 3))

-    feat = Input(shape=(None, nb_used_features))

-    pitch = Input(shape=(None, 1))

+    pcm = Input(shape=(None, 3), batch_size=128)

+    feat = Input(shape=(None, nb_used_features), batch_size=128)

+    pitch = Input(shape=(None, 1), batch_size=128)

     dec_feat = Input(shape=(None, 128))

     dec_state1 = Input(shape=(rnn_units1,))

     dec_state2 = Input(shape=(rnn_units2,))

@@ -256,19 +272,20 @@

     quant = quant_regularizer if quantize else None

     if training:

-        rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a',

+        rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a', stateful=True,

               recurrent_constraint = constraint, recurrent_regularizer=quant)

-        rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b',

-               kernel_constraint=constraint, kernel_regularizer=quant)

+        rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b', stateful=True,

+               kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)

     else:

-        rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a',

+        rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', stateful=True,

               recurrent_constraint = constraint, recurrent_regularizer=quant)

-        rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b',

-               kernel_constraint=constraint, kernel_regularizer=quant)

+        rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', stateful=True,

+               kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)

     rnn_in = Concatenate()([cpcm, rep(cfeat)])

     md = MDense(pcm_levels, activation='sigmoid', name='dual_fc')

     gru_out1, _ = rnn(rnn_in)

+    gru_out1 = GaussianNoise(.005)(gru_out1)

     gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))

     ulaw_prob = Lambda(tree_to_pdf_train)(md(gru_out2))

--- a/dnn/training_tf2/train_lpcnet.py

+++ b/dnn/training_tf2/train_lpcnet.py

@@ -28,6 +28,7 @@

 # Train an LPCNet model

 import argparse

+from dataloader import LPCNetLoader

 parser = argparse.ArgumentParser(description='Train an LPCNet model')

@@ -148,10 +149,10 @@

 data = np.reshape(data, (nb_frames, pcm_chunk_size, 4))

-in_data = data[:,:,:3]

-out_exc = data[:,:,3:4]

+#in_data = data[:,:,:3]

+#out_exc = data[:,:,3:4]

-print("ulaw std = ", np.std(out_exc))

+#print("ulaw std = ", np.std(out_exc))

 sizeof = features.strides[-1]

 features = np.lib.stride_tricks.as_strided(features, shape=(nb_frames, feature_chunk_size+4, nb_features),

@@ -171,8 +172,12 @@

 if quantize or retrain:

     #Adapting from an existing model

     model.load_weights(input_model)

-    sparsify = lpcnet.Sparsify(0, 0, 1, density)

-    grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)

+    if quantize:

+        sparsify = lpcnet.Sparsify(10000, 30000, 100, density, quantize=True)

+        grub_sparsify = lpcnet.SparsifyGRUB(10000, 30000, 100, args.grua_size, grub_density, quantize=True)

+    else:

+        sparsify = lpcnet.Sparsify(0, 0, 1, density)

+        grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)

 else:

     #Training from scratch

     sparsify = lpcnet.Sparsify(2000, 40000, 400, density)

@@ -180,4 +185,5 @@

 model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))

 csv_logger = CSVLogger('training_vals.log')

-model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify, grub_sparsify, csv_logger])

+loader = LPCNetLoader(data, features, periods, batch_size)

+model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify, grub_sparsify, csv_logger])

--

⑨