shithub: opus

--- a/dnn/README.md

+++ b/dnn/README.md

@@ -115,7 +115,7 @@

    and move the generated nnet\_data.\* files to the src/ directory.

    Then you just need to rebuild the software and use lpcnet\_demo as explained above.

-# Speech Material for Training

+# Speech Material for Training

 Suitable training material can be obtained from [Open Speech and Language Resources](https://www.openslr.org/).  See the datasets.txt file for details on suitable training data.

@@ -123,5 +123,4 @@

 1. [LPCNet: DSP-Boosted Neural Speech Synthesis](https://people.xiph.org/~jm/demo/lpcnet/)

 1. [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://people.xiph.org/~jm/demo/lpcnet_codec/)

-1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/

+1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/

--- a/dnn/datasets.txt

+++ b/dnn/datasets.txt

@@ -171,4 +171,3 @@

   journal={arXiv preprint arXiv:2104.01497},

   year={2021}

--- a/dnn/download_model.bat

+++ b/dnn/download_model.bat

@@ -9,4 +9,3 @@

 tar -xvzf %model%

 move .\src\*.c .

 move .\src\*.h .

--- a/dnn/dump_data.c

+++ b/dnn/dump_data.c

@@ -98,7 +98,7 @@

     /* Simulate error on excitation. */

     e += noise[k*FRAME_SIZE+i];

     e = IMIN(255, IMAX(0, e));

     RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);

     st->sig_mem[0] = p + ulaw2lin(e);

     st->exc_mem = e;

@@ -241,7 +241,7 @@

     if (fpcm) {

         compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std);

     process_single_frame(st, ffeat);

     if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);

     st->pcount++;

@@ -260,4 +260,3 @@

   lpcnet_encoder_destroy(st);

   return 0;

--- a/dnn/freq.c

+++ b/dnn/freq.c

@@ -326,4 +326,3 @@

     x[WINDOW_SIZE - 1 - i] *= half_window[i];

--- a/dnn/lpcnet.c

+++ b/dnn/lpcnet.c

@@ -61,7 +61,7 @@

   float ntmp[LPC_ORDER] = {0.0};

   RNN_COPY(tmp, rc, LPC_ORDER);

   for(i = 0; i < LPC_ORDER ; i++)

-    {

+    {

         for(j = 0; j <= i-1; j++)

             ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];

@@ -106,7 +106,7 @@

     _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);

 #ifdef END2END

     rc2lpc(lpc, rc);

-#elif FEATURES_DELAY>0

+#elif FEATURES_DELAY>0

     memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));

     memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));

     lpc_from_cepstrum(lpcnet->old_lpc[0], features);

--- a/dnn/nnet.c

+++ b/dnn/nnet.c

@@ -170,7 +170,7 @@

    C = layer->nb_channels;

    celt_assert(N*C <= MAX_MDENSE_TMP);

    stride = M*C;

    celt_assert(N <= DUAL_FC_OUT_SIZE);

    /* Computing all the random thresholds in advance. These thresholds are directly

@@ -188,7 +188,7 @@

       int bit;

       int i;

       float sum1, sum2;

       i = (1<<b) | val;

       sum1 = layer->bias[i];

@@ -426,7 +426,7 @@

 #ifdef USE_SU_BIAS

    bias = &gru->subias[3*N];

 #else

-   bias = &gru->bias[3*N];

+   bias = &gru->bias[3*N];

 #endif

    for (k=0;k<2;k++)

@@ -478,7 +478,7 @@

    for (i=0;i<layer->dim;i++)

       output[i] = layer->embedding_weights[input*layer->dim + i];

-   }

+   }

 void compute_gru_a_input(float *output, const float *input, int N, const EmbeddingLayer *layer1, int val1, const EmbeddingLayer *layer2, int val2, const EmbeddingLayer *layer3, int val3) {

@@ -499,5 +499,5 @@

    for (i=0;i<layer->dim;i++)

       output[i] += layer->embedding_weights[input*layer->dim + i];

-   }

+   }

--- a/dnn/parse_lpcnet_weights.c

+++ b/dnn/parse_lpcnet_weights.c

@@ -45,7 +45,7 @@

   array->type = h->type;

   array->size = h->size;

   array->data = (*data)+WEIGHT_BLOCK_SIZE;

   *data += h->block_size+WEIGHT_BLOCK_SIZE;

   *len -= h->block_size+WEIGHT_BLOCK_SIZE;

   return array->size;

@@ -103,7 +103,7 @@

     if (remain < nb_blocks+1) return NULL;

     for (i=0;i<nb_blocks;i++) {

       int pos = *idx++;

-      if (pos+3 >= nb_in || (pos&0x3)) return NULL;

+      if (pos+3 >= nb_in || (pos&0x3)) return NULL;

     nb_out -= 8;

     remain -= nb_blocks+1;

--- a/dnn/test_vec.c

+++ b/dnn/test_vec.c

@@ -63,7 +63,7 @@

 	out[i] = 0;

 	out_fast[i] = 0;

     for(i=0; i<COLS; i++) {

 	x[i] = i+1;

@@ -101,7 +101,7 @@

 	out[i] = 0;

 	out_fast[i] = 0;

     sparse_sgemv_accum16(out, w, rows, indx, x);

     sparse_sgemv_accum16_fast(out_fast, w, rows, indx, x);

@@ -126,5 +126,3 @@

     int test2 = test_sparse_sgemv_accum16();

     return test1 || test2;

--- a/dnn/torch/rdovae/export_rdovae_weights.py

+++ b/dnn/torch/rdovae/export_rdovae_weights.py

@@ -80,14 +80,14 @@

 def c_export(args, model):

     message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"

     enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message)

     dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message)

     stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message)

     constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True)

     # some custom includes

     for writer in [enc_writer, dec_writer, stats_writer]:

         writer.header.write(

@@ -99,10 +99,10 @@

 #include "nnet.h"

"""

     # encoder

     encoder_dense_layers = [

-        ('core_encoder.module.dense_1'       , 'enc_dense1',   'TANH'),

+        ('core_encoder.module.dense_1'       , 'enc_dense1',   'TANH'),

         ('core_encoder.module.dense_2'       , 'enc_dense3',   'TANH'),

         ('core_encoder.module.dense_3'       , 'enc_dense5',   'TANH'),

         ('core_encoder.module.dense_4'       , 'enc_dense7',   'TANH'),

@@ -110,31 +110,31 @@

         ('core_encoder.module.state_dense_1' , 'gdense1'    ,   'TANH'),

         ('core_encoder.module.state_dense_2' , 'gdense2'    ,   'TANH')

     for name, export_name, activation in encoder_dense_layers:

         layer = model.get_submodule(name)

         dump_torch_weights(enc_writer, layer, name=export_name, activation=activation, verbose=True)

-    encoder_gru_layers = [

+    encoder_gru_layers = [

         ('core_encoder.module.gru_1'         , 'enc_dense2',   'TANH'),

         ('core_encoder.module.gru_2'         , 'enc_dense4',   'TANH'),

         ('core_encoder.module.gru_3'         , 'enc_dense6',   'TANH')

     enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True)

                              for name, export_name, activation in encoder_gru_layers])

-    encoder_conv_layers = [

-        ('core_encoder.module.conv1'         , 'bits_dense' ,   'LINEAR')

+    encoder_conv_layers = [

+        ('core_encoder.module.conv1'         , 'bits_dense' ,   'LINEAR')

-    enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers])

+    enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers])

     del enc_writer

     # decoder

     decoder_dense_layers = [

         ('core_decoder.module.gru_1_init'    , 'state1',        'TANH'),

@@ -151,25 +151,25 @@

     for name, export_name, activation in decoder_dense_layers:

         layer = model.get_submodule(name)

         dump_torch_weights(dec_writer, layer, name=export_name, activation=activation, verbose=True)

     decoder_gru_layers = [

         ('core_decoder.module.gru_1'         , 'dec_dense2',    'TANH'),

         ('core_decoder.module.gru_2'         , 'dec_dense4',    'TANH'),

         ('core_decoder.module.gru_3'         , 'dec_dense6',    'TANH')

     dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True)

                              for name, export_name, activation in decoder_gru_layers])

     del dec_writer

     # statistical model

     qembedding = model.statistical_model.quant_embedding

     dump_statistical_model(stats_writer, qembedding)

     del stats_writer

     # constants

     constants_writer.header.write(

 f"""

@@ -193,12 +193,12 @@

"""

     del constants_writer

 def numpy_export(args, model):

     exchange_name_to_name = {

         'encoder_stack_layer1_dense'    : 'core_encoder.module.dense_1',

         'encoder_stack_layer3_dense'    : 'core_encoder.module.dense_2',

@@ -225,9 +225,9 @@

         'decoder_stack_layer4_gru'      : 'core_decoder.module.gru_2',

         'decoder_stack_layer6_gru'      : 'core_decoder.module.gru_3'

     name_to_exchange_name = {value : key for key, value in exchange_name_to_name.items()}

     for name, exchange_name in name_to_exchange_name.items():

         print(f"printing layer {name}...")

         dump_torch_weights(os.path.join(args.output_dir, exchange_name), model.get_submodule(name))

@@ -234,11 +234,11 @@

 if __name__ == "__main__":

     os.makedirs(args.output_dir, exist_ok=True)

     # load model from checkpoint

     checkpoint = torch.load(args.checkpoint, map_location='cpu')

     model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])

@@ -249,7 +249,7 @@

     if len(unmatched_keys) > 0:

         print(f"warning: the following keys were unmatched {unmatched_keys}")

     if args.format == 'C':

         c_export(args, model)

     elif args.format == 'numpy':

--- a/dnn/torch/rdovae/fec_encoder.py

+++ b/dnn/torch/rdovae/fec_encoder.py

@@ -84,7 +84,7 @@

 # load signal

 if args.input.endswith('.raw') or args.input.endswith('.pcm'):

     signal = np.fromfile(args.input, dtype='int16')

 elif args.input.endswith('.wav'):

     fs, signal = wavfile.read(args.input)

 else:

@@ -94,7 +94,7 @@

 padded_signal_length = len(signal) + total_delay

 tail = padded_signal_length % frame_size

 right_padding = (frame_size - tail) % frame_size

 signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))

 padded_signal_file  = os.path.splitext(args.input)[0] + '_padded.raw'

@@ -152,7 +152,7 @@

         zi = torch.clone(z[:, i - 2 * input_length + 2: i + 1 : 2, :])

         zi, rates = model.quantize(zi, quant_ids)

         zi = model.unquantize(zi, quant_ids)

         features = model.decode(zi, states[:, i : i + 1, :])

         packets.append(features.squeeze(0).numpy())

         packet_size = 8 * int((torch.sum(rates) + 7 + state_size) / 8)

@@ -176,7 +176,7 @@

     count = 2

     for i in range(num_packets):

         if (loss[i] == 0) or (i == num_packets - 1):

             fec_out[ptr:ptr+count,:] = packets[i][foffset:, :]

             ptr    += count

@@ -190,14 +190,14 @@

     fec_out_full[:, : fec_out.shape[-1]] = fec_out

     fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')

 if args.debug_output:

     import itertools

     batches = [4]

     offsets = [0, 2 * args.num_redundancy_frames - 4]

     # sanity checks

     # 1. concatenate features at offset 0

     for batch, offset in itertools.product(batches, offsets):

@@ -210,4 +210,3 @@

         print(f"writing debug output {packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32'}")

         test_features_full.tofile(packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32')

--- a/dnn/torch/rdovae/import_rdovae_weights.py

+++ b/dnn/torch/rdovae/import_rdovae_weights.py

@@ -90,8 +90,8 @@

     cond_size       = args.cond_size

     cond_size2      = args.cond_size2

     state_dim       = args.state_dim

     # model

     checkpoint['model_args']    = (num_features, latent_dim, quant_levels, cond_size, cond_size2)

     checkpoint['model_kwargs']  = {'state_dim': state_dim}

@@ -105,9 +105,9 @@

         'encoder_stack_layer8_dense',

         'encoder_state_layer1_dense',

         'encoder_state_layer2_dense',

-        'decoder_state1_dense',

-        'decoder_state2_dense',

-        'decoder_state3_dense',

+        'decoder_state1_dense',

+        'decoder_state2_dense',

+        'decoder_state3_dense',

         'decoder_stack_layer1_dense',

         'decoder_stack_layer3_dense',

         'decoder_stack_layer5_dense',

@@ -122,7 +122,7 @@

         'encoder_stack_layer6_gru',

         'decoder_stack_layer2_gru',

         'decoder_stack_layer4_gru',

-        'decoder_stack_layer6_gru'

+        'decoder_stack_layer6_gru'

     conv1d_layer_names = [

--- a/dnn/torch/rdovae/packets/fec_packets.c

+++ b/dnn/torch/rdovae/packets/fec_packets.c

@@ -43,7 +43,7 @@

     long offset;

     FILE *fid = fopen(filename, "rb");

     /* read header */

     if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

     if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

@@ -88,7 +88,7 @@

     int16_t rate;

     FILE *fid = fopen(filename, "rb");

     /* read header */

     if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

     if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

--- a/dnn/torch/rdovae/packets/fec_packets.py

+++ b/dnn/torch/rdovae/packets/fec_packets.py

@@ -33,25 +33,25 @@

 def write_fec_packets(filename, packets, rates=None):

     """ writes packets in binary format """

     assert np.dtype(np.float32).itemsize == 4

     assert np.dtype(np.int16).itemsize == 2

-    # derive some sizes

+    # derive some sizes

     num_packets             = len(packets)

     subframes_per_packet    = packets[0].shape[-2]

     num_features            = packets[0].shape[-1]

     # size of float is 4

     subframe_size           = num_features * 4

     packet_size             = subframe_size * subframes_per_packet + 2 # two bytes for rate

     version = 1

     # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)

     header_size = 14

     with open(filename, 'wb') as f:

         # header

         f.write(np.int16(version).tobytes())

         f.write(np.int16(header_size).tobytes())

@@ -60,7 +60,7 @@

         f.write(np.int16(subframe_size).tobytes())

         f.write(np.int16(subframes_per_packet).tobytes())

         f.write(np.int16(num_features).tobytes())

         # packets

         for i, packet in enumerate(packets):

             if type(rates) == type(None):

@@ -67,21 +67,21 @@

                 rate = 0

             else:

                 rate = rates[i]

             f.write(np.int16(rate).tobytes())

             features = np.flip(packet, axis=-2)

             f.write(features.astype(np.float32).tobytes())

 def read_fec_packets(filename):

     """ reads packets from binary format """

     assert np.dtype(np.float32).itemsize == 4

     assert np.dtype(np.int16).itemsize == 2

     with open(filename, 'rb') as f:

         # header

         version                 = np.frombuffer(f.read(2), dtype=np.int16).item()

         header_size             = np.frombuffer(f.read(2), dtype=np.int16).item()

@@ -90,19 +90,19 @@

         subframe_size           = np.frombuffer(f.read(2), dtype=np.int16).item()

         subframes_per_packet    = np.frombuffer(f.read(2), dtype=np.int16).item()

         num_features            = np.frombuffer(f.read(2), dtype=np.int16).item()

         dummy_features          = np.zeros((subframes_per_packet, num_features), dtype=np.float32)

         # packets

         rates = []

         packets = []

         for i in range(num_packets):

             rate = np.frombuffer(f.read(2), dtype=np.int16).item

             rates.append(rate)

             features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)

             packet = np.flip(features, axis=-2)

             packets.append(packet)

     return packets

\ No newline at end of file

--- a/dnn/torch/rdovae/rdovae/dataset.py

+++ b/dnn/torch/rdovae/rdovae/dataset.py

@@ -40,7 +40,7 @@

                 lambda_max=0.0135,

                 quant_levels=16,

                 enc_stride=2):

         self.sequence_length = sequence_length

         self.lambda_min = lambda_min

         self.lambda_max = lambda_max

@@ -50,7 +50,7 @@

         if sequence_length % enc_stride:

             raise ValueError(f"RDOVAEDataset.__init__: enc_stride {enc_stride} does not divide sequence length {sequence_length}")

         self.features = np.reshape(np.fromfile(feature_file, dtype=np.float32), (-1, num_features))

         self.features = self.features[:, :num_used_features]

         self.num_sequences = self.features.shape[0] // sequence_length

@@ -65,4 +65,3 @@

         rate_lambda = self.lambda_min * np.exp(q_ids.astype(np.float32) / self.denominator).astype(np.float32)

         return features, rate_lambda, q_ids

--- a/dnn/torch/rdovae/rdovae/rdovae.py

+++ b/dnn/torch/rdovae/rdovae/rdovae.py

@@ -42,8 +42,8 @@

     # L2 normalization

     x_norm2 = x / (1e-15 + torch.norm(x, dim=-1, keepdim=True))

     with torch.no_grad():

         # quantization loop, no need to track gradients here

         x_norm1 = x / torch.sum(torch.abs(x), dim=-1, keepdim=True)

@@ -84,19 +84,19 @@

             return cache[args]

         else:

             cache[args] = func(*args)

         return cache[args]

     return cached_func

 @cache_parameters

 def pvq_codebook_size(n, k):

     if k == 0:

         return 1

     if n == 0:

         return 0

     return pvq_codebook_size(n - 1, k) + pvq_codebook_size(n, k - 1) + pvq_codebook_size(n - 1, k - 1)

@@ -121,7 +121,7 @@

     p0 = 1 - r ** (0.5 + 0.5 * theta)

     alpha = torch.relu(1 - torch.abs(z_q)) ** 2

     rate = - torch.sum(

-        (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6)

+        (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6)

         + (1 - alpha) * torch.log2(0.5 * (1 - p0) * (1 - r) * r ** (torch.abs(z_q) - 1) + 1e-6)),

         dim=-1

@@ -154,7 +154,7 @@

 def distortion_loss(y_true, y_pred, rate_lambda=None):

     """ custom distortion loss for LPCNet features """

     if y_true.size(-1) != 20:

         raise ValueError('distortion loss is designed to work with 20 features')

@@ -169,7 +169,7 @@

         loss = loss / torch.sqrt(rate_lambda)

     loss = torch.mean(loss)

     return loss

@@ -181,10 +181,10 @@

 def random_split(start, stop, num_splits=3, min_len=3):

     get_min_len = lambda x : min([x[i+1] - x[i] for i in range(len(x) - 1)])

     candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]

-    while get_min_len(candidate) < min_len:

+    while get_min_len(candidate) < min_len:

         candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]

     return candidate

@@ -191,13 +191,13 @@

 # weight initialization and clipping

 def init_weights(module):

     if isinstance(module, nn.GRU):

         for p in module.named_parameters():

             if p[0].startswith('weight_hh_'):

                 nn.init.orthogonal_(p[1])

 def weight_clip_factory(max_value):

     """ weight clipping function concerning sum of abs values of adjecent weights """

     def clip_weight_(w):

@@ -213,13 +213,13 @@

1))

         with torch.no_grad():

             w[:, :stop] *= factor

     def clip_weights(module):

         if isinstance(module, nn.GRU) or isinstance(module, nn.Linear):

             for name, w in module.named_parameters():

                 if name.startswith('weight'):

                     clip_weight_(w)

     return clip_weights

 # RDOVAE module and submodules

@@ -229,12 +229,12 @@

     STATE_HIDDEN = 128

     FRAMES_PER_STEP = 2

     CONV_KERNEL_SIZE = 4

     def __init__(self, feature_dim, output_dim, cond_size, cond_size2, state_size=24):

         """ core encoder for RDOVAE

             Computes latents, initial states, and rate estimates from features and lambda parameter

"""

         super(CoreEncoder, self).__init__()

@@ -289,7 +289,7 @@

         # concatenation of all hidden layer outputs

         x9 = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8), dim=-1)

         # init state for decoder

         states = torch.tanh(self.state_dense_1(x9))

         states = torch.tanh(self.state_dense_2(states))

@@ -309,9 +309,9 @@

     def __init__(self, input_dim, output_dim, cond_size, cond_size2, state_size=24):

         """ core decoder for RDOVAE

             Computes features from latents, initial state, and quantization index

"""

         super(CoreDecoder, self).__init__()

@@ -324,7 +324,7 @@

         self.state_size = state_size

         self.input_size = self.input_dim

         self.concat_size = 4 * self.cond_size + 4 * self.cond_size2

         # layers

@@ -348,7 +348,7 @@

         self.apply(init_weights)

     def forward(self, z, initial_state):

         gru_1_state = torch.tanh(self.gru_1_init(initial_state).permute(1, 0, 2))

         gru_2_state = torch.tanh(self.gru_2_init(initial_state).permute(1, 0, 2))

         gru_3_state = torch.tanh(self.gru_3_init(initial_state).permute(1, 0, 2))

@@ -374,9 +374,9 @@

 class StatisticalModel(nn.Module):

     def __init__(self, quant_levels, latent_dim):

         """ Statistical model for latent space

-            Computes scaling, deadzone, r, and theta

+            Computes scaling, deadzone, r, and theta

"""

         super(StatisticalModel, self).__init__()

@@ -388,7 +388,7 @@

         # quantization embedding

         self.quant_embedding    = nn.Embedding(quant_levels, self.embedding_dim)

         # initialize embedding to 0

         with torch.no_grad():

             self.quant_embedding.weight[:] = 0

@@ -406,8 +406,8 @@

         r_soft      = torch.sigmoid(x[..., 3 * self.latent_dim : 4 * self.latent_dim])

         theta_hard  = torch.sigmoid(x[..., 4 * self.latent_dim : 5 * self.latent_dim])

         r_hard      = torch.sigmoid(x[..., 5 * self.latent_dim : 6 * self.latent_dim])

         return {

             'quant_embedding'   : x,

             'quant_scale'       : quant_scale,

@@ -443,34 +443,34 @@

         self.state_dim      = state_dim

         self.pvq_num_pulses = pvq_num_pulses

         self.state_dropout_rate = state_dropout_rate

         # submodules encoder and decoder share the statistical model

         self.statistical_model = StatisticalModel(quant_levels, latent_dim)

         self.core_encoder = nn.DataParallel(CoreEncoder(feature_dim, latent_dim, cond_size, cond_size2, state_size=state_dim))

         self.core_decoder = nn.DataParallel(CoreDecoder(latent_dim, feature_dim, cond_size, cond_size2, state_size=state_dim))

         self.enc_stride = CoreEncoder.FRAMES_PER_STEP

         self.dec_stride = CoreDecoder.FRAMES_PER_STEP

         if clip_weights:

             self.weight_clip_fn = weight_clip_factory(0.496)

         else:

             self.weight_clip_fn = None

         if self.dec_stride % self.enc_stride != 0:

             raise ValueError(f"get_decoder_chunks_generic: encoder stride does not divide decoder stride")

     def clip_weights(self):

         if not type(self.weight_clip_fn) == type(None):

             self.apply(self.weight_clip_fn)

     def get_decoder_chunks(self, z_frames, mode='split', chunks_per_offset = 4):

         enc_stride = self.enc_stride

         dec_stride = self.dec_stride

         stride = dec_stride // enc_stride

         chunks = []

         for offset in range(stride):

@@ -529,7 +529,7 @@

         z_q = hard_quantize(z) / statistical_model['quant_scale']

         z_n = noise_quantize(z) / statistical_model['quant_scale']

         states_q = soft_pvq(states, self.pvq_num_pulses)

         if self.state_dropout_rate > 0:

             drop = torch.rand(states_q.size(0)) < self.state_dropout_rate

             mask = torch.ones_like(states_q)

@@ -552,7 +552,7 @@

             # decoder with soft quantized input

             z_dec_reverse       = torch.flip(z_n[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :],  [1])

             features_reverse    = self.core_decoder(z_dec_reverse, dec_initial_state)

-            outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))

+            outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))

         return {

             'outputs_hard_quant' : outputs_hq,

@@ -563,24 +563,24 @@

     def encode(self, features):

         """ encoder with quantization and rate estimation """

         z, states = self.core_encoder(features)

         # quantization of initial states

-        states = soft_pvq(states, self.pvq_num_pulses)

+        states = soft_pvq(states, self.pvq_num_pulses)

         state_size = m.log2(pvq_codebook_size(self.state_dim, self.pvq_num_pulses))

         return z, states, state_size

     def decode(self, z, initial_state):

         """ decoder (flips sequences by itself) """

         z_reverse       = torch.flip(z, [1])

         features_reverse = self.core_decoder(z_reverse, initial_state)

         features = torch.flip(features_reverse, [1])

         return features

     def quantize(self, z, q_ids):

         """ quantization of latent vectors """

@@ -602,13 +602,12 @@

         z = zq / stats['quant_scale']

         return z

     def freeze_model(self):

         # freeze all parameters

         for p in self.parameters():

             p.requires_grad = False

         for p in self.statistical_model.parameters():

             p.requires_grad = True

--- a/dnn/torch/rdovae/train_rdovae.py

+++ b/dnn/torch/rdovae/train_rdovae.py

@@ -89,7 +89,7 @@

 checkpoint['batch_size'] = batch_size

 checkpoint['lr'] = lr

-checkpoint['lr_decay_factor'] = lr_decay_factor

+checkpoint['lr_decay_factor'] = lr_decay_factor

 checkpoint['split_mode'] = split_mode

 checkpoint['epochs'] = epochs

 checkpoint['sequence_length'] = sequence_length

@@ -130,10 +130,10 @@

 if args.train_decoder_only:

     if args.initial_checkpoint is None:

         print("warning: training decoder only without providing initial checkpoint")

     for p in model.core_encoder.module.parameters():

         p.requires_grad = False

     for p in model.statistical_model.parameters():

         p.requires_grad = False

@@ -180,15 +180,15 @@

                 # zero out gradients

                 optimizer.zero_grad()

                 # push inputs to device

                 features    = features.to(device)

                 q_ids       = q_ids.to(device)

                 rate_lambda = rate_lambda.to(device)

                 rate_lambda_upsamp = torch.repeat_interleave(rate_lambda, 2, 1)

                 # run model

                 model_output = model(features, q_ids)

@@ -224,17 +224,17 @@

                 # total loss

                 total_loss = rate_loss + (distortion_loss_hard_quant + distortion_loss_soft_quant) / 2

                 if args.enable_first_frame_loss:

                     total_loss = total_loss + 0.5 * torch.relu(first_frame_loss - distortion_loss_hard_quant)

                 total_loss.backward()

                 optimizer.step()

                 model.clip_weights()

                 scheduler.step()

                 # collect running stats

--- a/dnn/training_tf2/diffembed.py

+++ b/dnn/training_tf2/diffembed.py

@@ -3,7 +3,7 @@

     1. Not restricted to be the first layer of a model

     2. Differentiable (allows non-integer lookups)

         - For non integer lookup, this layer linearly interpolates between the adjacent embeddings in the following way to preserver gradient flow

-            - E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x))

+            - E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x))

"""

 import tensorflow as tf

@@ -26,13 +26,13 @@

         self.pcm_init = pcm_init

         self.initializer = initializer

-    def build(self, input_shape):

+    def build(self, input_shape):

         w_init = tf.random_normal_initializer()

-        if self.pcm_init:

+        if self.pcm_init:

             w_init = self.initializer

         self.w = tf.Variable(initial_value=w_init(shape=(self.dict_size, self.units),dtype='float32'),trainable=True)

-    def call(self, inputs):

+    def call(self, inputs):

         alpha = inputs - tf.math.floor(inputs)

         alpha = tf.expand_dims(alpha,axis = -1)

         alpha = tf.tile(alpha,[1,1,1,self.units])

--- a/dnn/training_tf2/dump_lpcnet.py

+++ b/dnn/training_tf2/dump_lpcnet.py

@@ -309,13 +309,13 @@

     else:

         hf.write('/* This is *not* an end-to-end model */\n')

         hf.write('/* #define END2END */\n\n')

     # LPC weighting factor

     if type(args.lpc_gamma) == type(None):

         lpc_gamma = get_parameter(model, 'lpc_gamma', 1)

     else:

         lpc_gamma = args.lpc_gamma

     hf.write('/* LPC weighting factor */\n')

     hf.write('#define LPC_GAMMA ' + str(lpc_gamma) +'f\n\n')

@@ -376,7 +376,7 @@

     hf.write('typedef struct {\n')

     for i, name in enumerate(layer_list):

-        hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

+        hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

     hf.write('} NNetState;\n\n')

     model_struct.write('} LPCNetModel;\n\n')

--- a/dnn/training_tf2/dump_plc.py

+++ b/dnn/training_tf2/dump_plc.py

@@ -283,7 +283,7 @@

 hf.write('typedef struct {\n')

 for i, name in enumerate(layer_list):

-    hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

+    hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

 hf.write('} PLCNetState;\n\n')

 model_struct.write('} PLCModel;\n\n')

--- a/dnn/training_tf2/dump_rdovae.py

+++ b/dnn/training_tf2/dump_rdovae.py

@@ -173,7 +173,7 @@

             dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)

             for name in encoder_conv1d_names

-        ]

+        ]

     # dump Dense layers

@@ -232,13 +232,13 @@

         'dec_dense7',

         'dec_dense8',

         'dec_final'

-    ]

+    ]

     decoder_gru_names = [

         'dec_dense2',

         'dec_dense4',

         'dec_dense6'

-    ]

+    ]

     source_fid = open("dred_rdovae_dec_data.c", 'w')

     header_fid = open("dred_rdovae_dec_data.h", 'w')

--- a/dnn/training_tf2/fec_encoder.py

+++ b/dnn/training_tf2/fec_encoder.py

@@ -97,7 +97,7 @@

 # load signal

 if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'):

     signal = np.fromfile(args.input, dtype='int16')

 elif args.input.endswith('.wav'):

     fs, signal = wavfile.read(args.input)

 else:

@@ -107,7 +107,7 @@

 padded_signal_length = len(signal) + total_delay

 tail = padded_signal_length % frame_size

 right_padding = (frame_size - tail) % frame_size

 signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))

 padded_signal_file  = os.path.splitext(args.input)[0] + '_padded.raw'

@@ -228,8 +228,8 @@

     fec_out_full[:, :nb_used_features] = fec_out

     fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')

 #create packets array like in the original version for debugging purposes

 for i in range(offset, num_frames):

     packets.append(features[i-offset:i-offset+1, :, :])

@@ -254,4 +254,3 @@

         print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}")

         test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32')

--- a/dnn/training_tf2/fec_packets.c

+++ b/dnn/training_tf2/fec_packets.c

@@ -43,7 +43,7 @@

     long offset;

     FILE *fid = fopen(filename, "rb");

     /* read header */

     if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

     if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

@@ -88,7 +88,7 @@

     int16_t rate;

     FILE *fid = fopen(filename, "rb");

     /* read header */

     if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

     if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

--- a/dnn/training_tf2/fec_packets.py

+++ b/dnn/training_tf2/fec_packets.py

@@ -33,25 +33,25 @@

 def write_fec_packets(filename, packets, rates=None):

     """ writes packets in binary format """

     assert np.dtype(np.float32).itemsize == 4

     assert np.dtype(np.int16).itemsize == 2

-    # derive some sizes

+    # derive some sizes

     num_packets             = len(packets)

     subframes_per_packet    = packets[0].shape[-2]

     num_features            = packets[0].shape[-1]

     # size of float is 4

     subframe_size           = num_features * 4

     packet_size             = subframe_size * subframes_per_packet + 2 # two bytes for rate

     version = 1

     # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)

     header_size = 14

     with open(filename, 'wb') as f:

         # header

         f.write(np.int16(version).tobytes())

         f.write(np.int16(header_size).tobytes())

@@ -60,7 +60,7 @@

         f.write(np.int16(subframe_size).tobytes())

         f.write(np.int16(subframes_per_packet).tobytes())

         f.write(np.int16(num_features).tobytes())

         # packets

         for i, packet in enumerate(packets):

             if type(rates) == type(None):

@@ -67,21 +67,21 @@

                 rate = 0

             else:

                 rate = rates[i]

             f.write(np.int16(rate).tobytes())

             features = np.flip(packet, axis=-2)

             f.write(features.astype(np.float32).tobytes())

 def read_fec_packets(filename):

     """ reads packets from binary format """

     assert np.dtype(np.float32).itemsize == 4

     assert np.dtype(np.int16).itemsize == 2

     with open(filename, 'rb') as f:

         # header

         version                 = np.frombuffer(f.read(2), dtype=np.int16).item()

         header_size             = np.frombuffer(f.read(2), dtype=np.int16).item()

@@ -90,19 +90,19 @@

         subframe_size           = np.frombuffer(f.read(2), dtype=np.int16).item()

         subframes_per_packet    = np.frombuffer(f.read(2), dtype=np.int16).item()

         num_features            = np.frombuffer(f.read(2), dtype=np.int16).item()

         dummy_features          = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32)

         # packets

         rates = []

         packets = []

         for i in range(num_packets):

             rate = np.frombuffer(f.read(2), dtype=np.int16).item

             rates.append(rate)

             features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)

             packet = np.flip(features, axis=-2)

             packets.append(packet)

     return packets

\ No newline at end of file

--- a/dnn/training_tf2/lossfuncs.py

+++ b/dnn/training_tf2/lossfuncs.py

@@ -35,7 +35,7 @@

         alpha = e_gt - tf.math.floor(e_gt)

         alpha = tf.tile(alpha,[1,1,256])

         e_gt = tf.cast(e_gt,'int32')

-        e_gt = tf.clip_by_value(e_gt,0,254)

+        e_gt = tf.clip_by_value(e_gt,0,254)

         interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)

         sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)

         loss_mod = sparse_cel + prob_compensation + gamma*regularization

@@ -51,7 +51,7 @@

     alpha = e_gt - tf.math.floor(e_gt)

     alpha = tf.tile(alpha,[1,1,256])

     e_gt = tf.cast(e_gt,'int32')

-    e_gt = tf.clip_by_value(e_gt,0,254)

+    e_gt = tf.clip_by_value(e_gt,0,254)

     interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)

     sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)

     loss_mod = sparse_cel + prob_compensation

@@ -78,7 +78,7 @@

     e_gt = tf_l2u(y_true - p)

     e_gt = tf.round(e_gt)

     e_gt = tf.cast(e_gt,'int32')

-    e_gt = tf.clip_by_value(e_gt,0,255)

+    e_gt = tf.clip_by_value(e_gt,0,255)

     sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)

     return sparse_cel

@@ -97,4 +97,3 @@

         loss_lar_diff = tf.square(loss_lar_diff)

         return tf.reduce_mean(loss_lar_diff, axis=-1)

     return loss

--- a/dnn/training_tf2/lpcnet.py

+++ b/dnn/training_tf2/lpcnet.py

@@ -186,8 +186,8 @@

             w[0] = p

             layer.set_weights(w)

 class PCMInit(Initializer):

     def __init__(self, gain=.1, seed=None):

         self.gain = gain

@@ -264,13 +264,13 @@

         lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)

     else:

         lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size)

     real_preds = diff_pred(name = "real_lpc2preds")([pcm,lpcoeffs])

     weighting = lpc_gamma ** np.arange(1, 17).astype('float32')

     weighted_lpcoeffs = Lambda(lambda x: x[0]*x[1])([lpcoeffs, weighting])

     tensor_preds = diff_pred(name = "lpc2preds")([pcm,weighted_lpcoeffs])

     past_errors = error_calc([pcm,tensor_preds])

     embed = diff_Embed(name='embed_sig',initializer = PCMInit())

     cpcm = Concatenate()([tf_l2u(pcm),tf_l2u(tensor_preds),past_errors])

     cpcm = GaussianNoise(.3)(cpcm)

@@ -277,7 +277,7 @@

     cpcm = Reshape((-1, embed_size*3))(embed(cpcm))

     cpcm_decoder = Reshape((-1, embed_size*3))(embed(dpcm))

     rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))

     quant = quant_regularizer if quantize else None

@@ -305,7 +305,7 @@

         rnn2.trainable=False

         md.trainable=False

         embed.Trainable=False

     m_out = Concatenate(name='pdf')([tensor_preds,real_preds,ulaw_prob])

     if not flag_e2e:

         model = Model([pcm, feat, pitch, lpcoeffs], m_out)

@@ -315,7 +315,7 @@

     model.rnn_units2 = rnn_units2

     model.nb_used_features = nb_used_features

     model.frame_size = frame_size

     if not flag_e2e:

         encoder = Model([feat, pitch], cfeat)

         dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])

@@ -330,7 +330,7 @@

         decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])

     else:

         decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])

     # add parameters to model

     set_parameter(model, 'lpc_gamma', lpc_gamma, dtype='float64')

     set_parameter(model, 'flag_e2e', flag_e2e, dtype='bool')

--- a/dnn/training_tf2/lpcnet_plc.py

+++ b/dnn/training_tf2/lpcnet_plc.py

@@ -88,10 +88,10 @@

     gru_out1, _ = rnn(cfeat)

     gru_out1 = GaussianNoise(.005)(gru_out1)

     gru_out2, _ = rnn2(gru_out1)

     out_dense = Dense(nb_used_features, activation='linear', name='plc_out')

     plc_out = out_dense(gru_out2)

     model = Model([feat, lost], plc_out)

     model.rnn_units = rnn_units

     model.cond_size = cond_size

--- a/dnn/training_tf2/mdense.py

+++ b/dnn/training_tf2/mdense.py

@@ -6,7 +6,7 @@

 import math

 class MDense(Layer):

     def __init__(self, outputs,

                  channels=2,

                  activation=None,

--- a/dnn/training_tf2/parameters.py

+++ b/dnn/training_tf2/parameters.py

@@ -5,9 +5,9 @@

 def set_parameter(model, parameter_name, parameter_value, dtype='float32'):

     """ stores parameter_value as non-trainable weight with name parameter_name:0 """

     weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]

     if len(weights) == 0:

         model.add_weight(parameter_name, trainable=False, initializer=tf.keras.initializers.Constant(parameter_value), dtype=dtype)

     elif len(weights) == 1:

@@ -15,14 +15,14 @@

     else:

         raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")

 def get_parameter(model, parameter_name, default=None):

     """ returns parameter value if parameter is present in model and otherwise default """

     weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]

     if len(weights) == 0:

-        return default

+        return default

     elif len(weights) > 1:

         raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")

     else:

--- a/dnn/training_tf2/plc_loader.py

+++ b/dnn/training_tf2/plc_loader.py

@@ -56,7 +56,7 @@

         lost_mask = np.tile(lost, (1,1,features.shape[2]))

         in_features = features*lost_mask

         in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask

         #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.

         #in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]

         out_lost = np.copy(lost)

--- a/dnn/training_tf2/rdovae.py

+++ b/dnn/training_tf2/rdovae.py

@@ -61,7 +61,7 @@

     #x = 4*x

     #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

     #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

-    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

     return x

 def noise_quantize(x):

@@ -237,7 +237,7 @@

     bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits")

     gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state")

     gru = CuDNNGRU if training else GRU

     dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1')

     dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2')

@@ -300,7 +300,7 @@

     y = []

     for i in range(n-1):

         offset = 2 * (n-1-i)

-        tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2)

+        tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2)

         y.append(tf.expand_dims(tmp, axis=0))

     y.append(tf.expand_dims(x[-1], axis=0))

     return Concatenate(axis=0)(y)

@@ -335,7 +335,7 @@

     dze = dzone([ze,dead_zone])

     ndze = noisequant(dze)

     dze_quant = hardquant(dze)

     div = Lambda(lambda x: x[0]/x[1])

     dze_quant = div([dze_quant,quant_scale])

     ndze_unquant = div([ndze,quant_scale])

@@ -355,13 +355,13 @@

         combined_output.append(tmp)

         tmp = split_decoder([ndze_select, state_select])

-        tmp = cat([tmp, lambda_up])

+        tmp = cat([tmp, lambda_up])

         unquantized_output.append(tmp)

     concat = Lambda(tensor_concat, name="output")

     combined_output = concat(combined_output)

     unquantized_output = concat(unquantized_output)

     e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val])

     e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val])

@@ -370,4 +370,3 @@

     model.nb_used_features = nb_used_features

     return model, encoder, decoder, qembedding

--- a/dnn/training_tf2/rdovae_exchange.py

+++ b/dnn/training_tf2/rdovae_exchange.py

@@ -113,7 +113,7 @@

     # qembedding

     print(f"writing layer {exchange_name['qembedding']}...")

     dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding)

     # decoder

     decoder_dense_names = [

         'state1',

@@ -125,7 +125,7 @@

         'dec_dense7',

         'dec_dense8',

         'dec_final'

-    ]

+    ]

     decoder_gru_names = [

         'dec_dense2',

--- a/dnn/training_tf2/rdovae_import.py

+++ b/dnn/training_tf2/rdovae_import.py

@@ -79,7 +79,7 @@

 if __name__ == "__main__":

     model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)

     encoder_layers = [

         'enc_dense1',

         'enc_dense3',

@@ -93,7 +93,7 @@

         'enc_dense6',

         'bits_dense'

     decoder_layers = [

         'state1',

         'state2',

@@ -108,16 +108,16 @@

         'dec_dense4',

         'dec_dense6'

     for name in encoder_layers:

         print(f"loading weight for layer {name}...")

         load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name))

     print(f"loading weight for layer qembedding...")

     load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding)

     for name in decoder_layers:

         print(f"loading weight for layer {name}...")

         load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name))

-    model.save(args.weights)

\ No newline at end of file

+    model.save(args.weights)

--- a/dnn/training_tf2/test_lpcnet.py

+++ b/dnn/training_tf2/test_lpcnet.py

@@ -118,5 +118,3 @@

             #print(mem)

             np.array([np.round(mem)], dtype='int16').tofile(fout)

         skip = 0

--- a/dnn/training_tf2/tf_funcs.py

+++ b/dnn/training_tf2/tf_funcs.py

@@ -36,12 +36,12 @@

         rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1))

         zpX = Lambda(lambda x: K.concatenate([0*x[:,0:lpcoeffs_N,:], x],axis = 1))

         cX = Lambda(lambda x: K.concatenate([x[:,(lpcoeffs_N - i):(lpcoeffs_N - i + 2400),:] for i in range(lpcoeffs_N)],axis = 2))

         pred = -Multiply()([rept(lpc),cX(zpX(xt))])

         return K.sum(pred,axis = 2,keepdims = True)

-# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion

+# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion

 class diff_rc2lpc(Layer):

     def call(self, inputs, lpcoeffs_N = 16):

         def pred_lpc_recursive(input):

--- a/dnn/training_tf2/train_lpcnet.py

+++ b/dnn/training_tf2/train_lpcnet.py

@@ -134,7 +134,7 @@

 with strategy.scope():

     model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size,

-                                          rnn_units2=args.grub_size,

+                                          rnn_units2=args.grub_size,

                                           batch_size=batch_size, training=True,

                                           quantize=quantize,

                                           flag_e2e=flag_e2e,

--- a/dnn/vec_neon.h

+++ b/dnn/vec_neon.h

@@ -200,14 +200,14 @@

     for (i=0;i<rows;i+=16)

 	float * restrict y = &out[i];

 	/* keep y[0..15] in registers for duration of inner loop */

 	float32x4_t y0_3 = vld1q_f32(&y[0]);

 	float32x4_t y4_7 = vld1q_f32(&y[4]);

 	float32x4_t y8_11 = vld1q_f32(&y[8]);

 	float32x4_t y12_15 = vld1q_f32(&y[12]);

 	for (j=0;j<cols;j++)

 	    const float * restrict w;

@@ -219,9 +219,9 @@

 	    wvec4_7 = vld1q_f32(&w[4]);

 	    wvec8_11 = vld1q_f32(&w[8]);

 	    wvec12_15 = vld1q_f32(&w[12]);

 	    xj = vld1q_dup_f32(&x[j]);

 	    y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);

 	    y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);

 	    y8_11 = vmlaq_f32(y8_11, wvec8_11, xj);

@@ -229,12 +229,12 @@

 	/* save y[0..15] back to memory */

 	vst1q_f32(&y[0], y0_3);

 	vst1q_f32(&y[4], y4_7);

 	vst1q_f32(&y[8], y8_11);

 	vst1q_f32(&y[12], y12_15);

@@ -249,32 +249,32 @@

 	y = &out[i];

 	/* keep y[0..15] in registers for duration of inner loop */

 	float32x4_t y0_3 = vld1q_f32(&y[0]);

 	float32x4_t y4_7 = vld1q_f32(&y[4]);

 	float32x4_t y8_11 = vld1q_f32(&y[8]);

 	float32x4_t y12_15 = vld1q_f32(&y[12]);

 	for (j=0;j<cols;j++)

 	    float32x4_t xj= vld1q_dup_f32(&x[*idx++]);

 	    float32x4_t wvec;

 	    wvec = vld1q_f32(&w[0]); y0_3 = vmlaq_f32(y0_3, wvec, xj);

 	    wvec = vld1q_f32(&w[4]); y4_7 = vmlaq_f32(y4_7, wvec, xj);

 	    wvec = vld1q_f32(&w[8]); y8_11 = vmlaq_f32(y8_11, wvec, xj);

 	    wvec = vld1q_f32(&w[12]); y12_15 = vmlaq_f32(y12_15, wvec, xj);

 	    w += 16;

 	/* save y[0..15] back to memory */

 	vst1q_f32(&y[0], y0_3);

 	vst1q_f32(&y[4], y4_7);

 	vst1q_f32(&y[8], y8_11);

 	vst1q_f32(&y[12], y12_15);

--

⑨