shithub: opus

--- a/dnn/lpcnet_enc.c

+++ b/dnn/lpcnet_enc.c

@@ -590,7 +590,7 @@

       max_prev = st->pitch_max_path_all - 6.f;

       pitch_prev[sub][i] = st->best_i;

       for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {

-        if (st->pitch_max_path[0][i+j] > max_prev) {

+        if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {

           max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);

           pitch_prev[sub][i] = i+j;

@@ -662,10 +662,11 @@

     if (quantize) {

       float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;

       p *= 1 + modulation/16./7.*(2*sub-3);

+      p = MIN16(255, MAX16(32, p));

       st->features[sub][2*NB_BANDS] = .02*(p-100);

       st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;

     } else {

-      st->features[sub][2*NB_BANDS] = .01*(best[2+2*sub]+best[2+2*sub+1]-200);

+      st->features[sub][2*NB_BANDS] = .01*(IMAX(64, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

       st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;

     //printf("%f %d %f\n", st->features[sub][2*NB_BANDS], best[2+2*sub], frame_corr);

--- /dev/null

+++ b/dnn/training_tf2/dump_lpcnet.py

@@ -1,0 +1,267 @@

+#!/usr/bin/python3

+'''Copyright (c) 2017-2018 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+import lpcnet

+import sys

+import numpy as np

+from tensorflow.keras.optimizers import Adam

+from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding

+from ulaw import ulaw2lin, lin2ulaw

+from mdense import MDense

+import h5py

+import re

+max_rnn_neurons = 1

+max_conv_inputs = 1

+max_mdense_tmp = 1

+def printVector(f, vector, name, dtype='float'):

+    v = np.reshape(vector, (-1));

+    #print('static const float ', name, '[', len(v), '] = \n', file=f)

+    f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

+    for i in range(0, len(v)):

+        f.write('{}'.format(v[i]))

+        if (i!=len(v)-1):

+            f.write(',')

+        else:

+            break;

+        if (i%8==7):

+            f.write("\n   ")

+        else:

+            f.write(" ")

+    #print(v, file=f)

+    f.write('\n};\n\n')

+    return;

+def printSparseVector(f, A, name):

+    N = A.shape[0]

+    W = np.zeros((0,))

+    diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])

+    A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))

+    A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))

+    A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))

+    printVector(f, diag, name + '_diag')

+    idx = np.zeros((0,), dtype='int')

+    for i in range(3*N//16):

+        pos = idx.shape[0]

+        idx = np.append(idx, -1)

+        nb_nonzero = 0

+        for j in range(N):

+            if np.sum(np.abs(A[j, i*16:(i+1)*16])) > 1e-10:

+                nb_nonzero = nb_nonzero + 1

+                idx = np.append(idx, j)

+                W = np.concatenate([W, A[j, i*16:(i+1)*16]])

+        idx[pos] = nb_nonzero

+    printVector(f, W, name)

+    #idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)

+    printVector(f, idx, name + '_idx', dtype='int')

+    return;

+def dump_layer_ignore(self, f, hf):

+    print("ignoring layer " + self.name + " of type " + self.__class__.__name__)

+    return False

+Layer.dump_layer = dump_layer_ignore

+def dump_sparse_gru(self, f, hf):

+    global max_rnn_neurons

+    name = 'sparse_' + self.name

+    print("printing layer " + name + " of type sparse " + self.__class__.__name__)

+    weights = self.get_weights()

+    printSparseVector(f, weights[1], name + '_recurrent_weights')

+    printVector(f, weights[-1], name + '_bias')

+    if hasattr(self, 'activation'):

+        activation = self.activation.__name__.upper()

+    else:

+        activation = 'TANH'

+    if hasattr(self, 'reset_after') and not self.reset_after:

+        reset_after = 0

+    else:

+        reset_after = 1

+    neurons = weights[0].shape[1]//3

+    max_rnn_neurons = max(max_rnn_neurons, neurons)

+    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'

+            .format(name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('extern const SparseGRULayer {};\n\n'.format(name));

+    return True

+def dump_gru_layer(self, f, hf):

+    global max_rnn_neurons

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    printVector(f, weights[0], name + '_weights')

+    printVector(f, weights[1], name + '_recurrent_weights')

+    printVector(f, weights[-1], name + '_bias')

+    if hasattr(self, 'activation'):

+        activation = self.activation.__name__.upper()

+    else:

+        activation = 'TANH'

+    if hasattr(self, 'reset_after') and not self.reset_after:

+        reset_after = 0

+    else:

+        reset_after = 1

+    neurons = weights[0].shape[1]//3

+    max_rnn_neurons = max(max_rnn_neurons, neurons)

+    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'

+            .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('extern const GRULayer {};\n\n'.format(name));

+    return True

+GRU.dump_layer = dump_gru_layer

+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):

+    printVector(f, weights, name + '_weights')

+    printVector(f, bias, name + '_bias')

+    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

+    hf.write('extern const DenseLayer {};\n\n'.format(name));

+def dump_dense_layer(self, f, hf):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    activation = self.activation.__name__.upper()

+    dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)

+    return False

+Dense.dump_layer = dump_dense_layer

+def dump_mdense_layer(self, f, hf):

+    global max_mdense_tmp

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    printVector(f, np.transpose(weights[0], (1, 2, 0)), name + '_weights')

+    printVector(f, np.transpose(weights[1], (1, 0)), name + '_bias')

+    printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor')

+    activation = self.activation.__name__.upper()

+    max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2])

+    f.write('const MDenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}_factor,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0]))

+    hf.write('extern const MDenseLayer {};\n\n'.format(name));

+    return False

+MDense.dump_layer = dump_mdense_layer

+def dump_conv1d_layer(self, f, hf):

+    global max_conv_inputs

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    printVector(f, weights[0], name + '_weights')

+    printVector(f, weights[-1], name + '_bias')

+    activation = self.activation.__name__.upper()

+    max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])

+    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))

+    hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))

+    hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))

+    hf.write('extern const Conv1DLayer {};\n\n'.format(name));

+    return True

+Conv1D.dump_layer = dump_conv1d_layer

+def dump_embedding_layer_impl(name, weights, f, hf):

+    printVector(f, weights, name + '_weights')

+    f.write('const EmbeddingLayer {} = {{\n   {}_weights,\n   {}, {}\n}};\n\n'

+            .format(name, name, weights.shape[0], weights.shape[1]))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

+    hf.write('extern const EmbeddingLayer {};\n\n'.format(name));

+def dump_embedding_layer(self, f, hf):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()[0]

+    dump_embedding_layer_impl(name, weights, f, hf)

+    return False

+Embedding.dump_layer = dump_embedding_layer

+model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=384)

+model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

+#model.summary()

+model.load_weights(sys.argv[1])

+if len(sys.argv) > 2:

+    cfile = sys.argv[2];

+    hfile = sys.argv[3];

+else:

+    cfile = 'nnet_data.c'

+    hfile = 'nnet_data.h'

+f = open(cfile, 'w')

+hf = open(hfile, 'w')

+f.write('/*This file is automatically generated from a Keras model*/\n\n')

+f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))

+hf.write('/*This file is automatically generated from a Keras model*/\n\n')

+hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n')

+embed_size = lpcnet.embed_size

+E = model.get_layer('embed_sig').get_weights()[0]

+W = model.get_layer('gru_a').get_weights()[0][:embed_size,:]

+dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf)

+W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:]

+dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf)

+W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:]

+dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf)

+W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]

+#FIXME: dump only half the biases

+b = model.get_layer('gru_a').get_weights()[2]

+dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf)

+layer_list = []

+for i, layer in enumerate(model.layers):

+    if layer.dump_layer(f, hf):

+        layer_list.append(layer.name)

+dump_sparse_gru(model.get_layer('gru_a'), f, hf)

+hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))

+hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))

+hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))

+hf.write('typedef struct {\n')

+for i, name in enumerate(layer_list):

+    hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

+hf.write('} NNetState;\n')

+hf.write('\n\n#endif\n')

+f.close()

+hf.close()

--- /dev/null

+++ b/dnn/training_tf2/lpcnet.py

@@ -1,0 +1,172 @@

+#!/usr/bin/python3

+'''Copyright (c) 2018 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+import math

+from tensorflow.keras.models import Model

+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation

+from tensorflow.keras import backend as K

+from tensorflow.keras.initializers import Initializer

+from tensorflow.keras.callbacks import Callback

+from mdense import MDense

+import numpy as np

+import h5py

+import sys

+frame_size = 160

+pcm_bits = 8

+embed_size = 128

+pcm_levels = 2**pcm_bits

+class Sparsify(Callback):

+    def __init__(self, t_start, t_end, interval, density):

+        super(Sparsify, self).__init__()

+        self.batch = 0

+        self.t_start = t_start

+        self.t_end = t_end

+        self.interval = interval

+        self.final_density = density

+    def on_batch_end(self, batch, logs=None):

+        #print("batch number", self.batch)

+        self.batch += 1

+        if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end):

+            #print("don't constrain");

+            pass

+        else:

+            #print("constrain");

+            layer = self.model.get_layer('gru_a')

+            w = layer.get_weights()

+            p = w[1]

+            nb = p.shape[1]//p.shape[0]

+            N = p.shape[0]

+            #print("nb = ", nb, ", N = ", N);

+            #print(p.shape)

+            #print ("density = ", density)

+            for k in range(nb):

+                density = self.final_density[k]

+                if self.batch < self.t_end:

+                    r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)

+                    density = 1 - (1-self.final_density[k])*(1 - r*r*r)

+                A = p[:, k*N:(k+1)*N]

+                A = A - np.diag(np.diag(A))

+                #A = np.transpose(A, (1, 0))

+                L=np.reshape(A, (N, N//16, 16))

+                S=np.sum(L*L, axis=-1)

+                SS=np.sort(np.reshape(S, (-1,)))

+                thresh = SS[round(N*N//16*(1-density))]

+                mask = (S>=thresh).astype('float32');

+                mask = np.repeat(mask, 16, axis=1)

+                mask = np.minimum(1, mask + np.diag(np.ones((N,))))

+                #mask = np.transpose(mask, (1, 0))

+                p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask

+                #print(thresh, np.mean(mask))

+            w[1] = p

+            layer.set_weights(w)

+class PCMInit(Initializer):

+    def __init__(self, gain=.1, seed=None):

+        self.gain = gain

+        self.seed = seed

+    def __call__(self, shape, dtype=None):

+        num_rows = 1

+        for dim in shape[:-1]:

+            num_rows *= dim

+        num_cols = shape[-1]

+        flat_shape = (num_rows, num_cols)

+        if self.seed is not None:

+            np.random.seed(self.seed)

+        a = np.random.uniform(-1.7321, 1.7321, flat_shape)

+        #a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows

+        #a[:,1] = .5*a[:,0]*a[:,0]*a[:,0]

+        a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1))

+        return self.gain * a

+    def get_config(self):

+        return {

+            'gain': self.gain,

+            'seed': self.seed

+        }

+def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False):

+    pcm = Input(shape=(None, 3))

+    feat = Input(shape=(None, nb_used_features))

+    pitch = Input(shape=(None, 1))

+    dec_feat = Input(shape=(None, 128))

+    dec_state1 = Input(shape=(rnn_units1,))

+    dec_state2 = Input(shape=(rnn_units2,))

+    padding = 'valid' if training else 'same'

+    fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1')

+    fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2')

+    embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig')

+    cpcm = Reshape((-1, embed_size*3))(embed(pcm))

+    pembed = Embedding(256, 64, name='embed_pitch')

+    cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])

+    cfeat = fconv2(fconv1(cat_feat))

+    fdense1 = Dense(128, activation='tanh', name='feature_dense1')

+    fdense2 = Dense(128, activation='tanh', name='feature_dense2')

+    cfeat = fdense2(fdense1(cfeat))

+    rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))

+    rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a')

+    rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b')

+    rnn_in = Concatenate()([cpcm, rep(cfeat)])

+    md = MDense(pcm_levels, activation='softmax', name='dual_fc')

+    gru_out1, _ = rnn(rnn_in)

+    gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))

+    ulaw_prob = md(gru_out2)

+    if adaptation:

+        rnn.trainable=False

+        rnn2.trainable=False

+        md.trainable=False

+        embed.Trainable=False

+    model = Model([pcm, feat, pitch], ulaw_prob)

+    model.rnn_units1 = rnn_units1

+    model.rnn_units2 = rnn_units2

+    model.nb_used_features = nb_used_features

+    model.frame_size = frame_size

+    encoder = Model([feat, pitch], cfeat)

+    dec_rnn_in = Concatenate()([cpcm, dec_feat])

+    dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)

+    dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2)

+    dec_ulaw_prob = md(dec_gru_out2)

+    decoder = Model([pcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])

+    return model, encoder, decoder

--- /dev/null

+++ b/dnn/training_tf2/mdense.py

@@ -1,0 +1,95 @@

+from tensorflow.keras import backend as K

+from tensorflow.keras.layers import Layer, InputSpec

+from tensorflow.keras import activations

+from tensorflow.keras import initializers, regularizers, constraints

+import numpy as np

+import math

+class MDense(Layer):

+    def __init__(self, outputs,

+                 channels=2,

+                 activation=None,

+                 use_bias=True,

+                 kernel_initializer='glorot_uniform',

+                 bias_initializer='zeros',

+                 kernel_regularizer=None,

+                 bias_regularizer=None,

+                 activity_regularizer=None,

+                 kernel_constraint=None,

+                 bias_constraint=None,

+                 **kwargs):

+        if 'input_shape' not in kwargs and 'input_dim' in kwargs:

+            kwargs['input_shape'] = (kwargs.pop('input_dim'),)

+        super(MDense, self).__init__(**kwargs)

+        self.units = outputs

+        self.channels = channels

+        self.activation = activations.get(activation)

+        self.use_bias = use_bias

+        self.kernel_initializer = initializers.get(kernel_initializer)

+        self.bias_initializer = initializers.get(bias_initializer)

+        self.kernel_regularizer = regularizers.get(kernel_regularizer)

+        self.bias_regularizer = regularizers.get(bias_regularizer)

+        self.activity_regularizer = regularizers.get(activity_regularizer)

+        self.kernel_constraint = constraints.get(kernel_constraint)

+        self.bias_constraint = constraints.get(bias_constraint)

+        self.input_spec = InputSpec(min_ndim=2)

+        self.supports_masking = True

+    def build(self, input_shape):

+        assert len(input_shape) >= 2

+        input_dim = input_shape[-1]

+        self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels),

+                                      initializer=self.kernel_initializer,

+                                      name='kernel',

+                                      regularizer=self.kernel_regularizer,

+                                      constraint=self.kernel_constraint)

+        if self.use_bias:

+            self.bias = self.add_weight(shape=(self.units, self.channels),

+                                        initializer=self.bias_initializer,

+                                        name='bias',

+                                        regularizer=self.bias_regularizer,

+                                        constraint=self.bias_constraint)

+        else:

+            self.bias = None

+        self.factor = self.add_weight(shape=(self.units, self.channels),

+                                    initializer='ones',

+                                    name='factor',

+                                    regularizer=self.bias_regularizer,

+                                    constraint=self.bias_constraint)

+        self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})

+        self.built = True

+    def call(self, inputs):

+        output = K.dot(inputs, self.kernel)

+        if self.use_bias:

+            output = output + self.bias

+        output = K.tanh(output) * self.factor

+        output = K.sum(output, axis=-1)

+        if self.activation is not None:

+            output = self.activation(output)

+        return output

+    def compute_output_shape(self, input_shape):

+        assert input_shape and len(input_shape) >= 2

+        assert input_shape[-1]

+        output_shape = list(input_shape)

+        output_shape[-1] = self.units

+        return tuple(output_shape)

+    def get_config(self):

+        config = {

+            'units': self.units,

+            'activation': activations.serialize(self.activation),

+            'use_bias': self.use_bias,

+            'kernel_initializer': initializers.serialize(self.kernel_initializer),

+            'bias_initializer': initializers.serialize(self.bias_initializer),

+            'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),

+            'bias_regularizer': regularizers.serialize(self.bias_regularizer),

+            'activity_regularizer': regularizers.serialize(self.activity_regularizer),

+            'kernel_constraint': constraints.serialize(self.kernel_constraint),

+            'bias_constraint': constraints.serialize(self.bias_constraint)

+        }

+        base_config = super(MDense, self).get_config()

+        return dict(list(base_config.items()) + list(config.items()))

--- /dev/null

+++ b/dnn/training_tf2/train_lpcnet.py

@@ -1,0 +1,124 @@

+#!/usr/bin/python3

+'''Copyright (c) 2018 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+# Train a LPCNet model (note not a Wavenet model)

+import lpcnet

+import sys

+import numpy as np

+from tensorflow.keras.optimizers import Adam

+from tensorflow.keras.callbacks import ModelCheckpoint

+from ulaw import ulaw2lin, lin2ulaw

+import tensorflow.keras.backend as K

+import h5py

+import tensorflow as tf

+gpus = tf.config.experimental.list_physical_devices('GPU')

+if gpus:

+  try:

+    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])

+  except RuntimeError as e:

+    print(e)

+nb_epochs = 120

+# Try reducing batch_size if you run out of memory on your GPU

+batch_size = 64

+model, _, _ = lpcnet.new_lpcnet_model(training=True)

+model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

+model.summary()

+feature_file = sys.argv[1]

+pcm_file = sys.argv[2]     # 16 bit unsigned short PCM samples

+frame_size = model.frame_size

+nb_features = 55

+nb_used_features = model.nb_used_features

+feature_chunk_size = 15

+pcm_chunk_size = frame_size*feature_chunk_size

+# u for unquantised, load 16 bit PCM samples and convert to mu-law

+data = np.fromfile(pcm_file, dtype='uint8')

+nb_frames = len(data)//(4*pcm_chunk_size)

+features = np.fromfile(feature_file, dtype='float32')

+# limit to discrete number of frames

+data = data[:nb_frames*4*pcm_chunk_size]

+features = features[:nb_frames*feature_chunk_size*nb_features]

+features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features))

+sig = np.reshape(data[0::4], (nb_frames, pcm_chunk_size, 1))

+pred = np.reshape(data[1::4], (nb_frames, pcm_chunk_size, 1))

+in_exc = np.reshape(data[2::4], (nb_frames, pcm_chunk_size, 1))

+out_exc = np.reshape(data[3::4], (nb_frames, pcm_chunk_size, 1))

+del data

+print("ulaw std = ", np.std(out_exc))

+features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))

+features = features[:, :, :nb_used_features]

+features[:,:,18:36] = 0

+fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)

+fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0)

+features = np.concatenate([fpad1, features, fpad2], axis=1)

+periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')

+#periods = np.minimum(periods, 255)

+in_data = np.concatenate([sig, pred, in_exc], axis=-1)

+del sig

+del pred

+del in_exc

+# dump models to disk as we go

+checkpoint = ModelCheckpoint('lpcnet32c_384_10_G16_{epoch:02d}.h5')

+#Set this to True to adapt an existing model (e.g. on new data)

+adaptation = False

+if adaptation:

+    #Adapting from an existing model

+    model.load_weights('lpcnet24c_384_10_G16_120.h5')

+    sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))

+    lr = 0.0001

+    decay = 0

+else:

+    #Training from scratch

+    sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))

+    lr = 0.001

+    decay = 5e-5

+model.compile(optimizer=Adam(lr, decay=decay, beta_2=0.99), loss='sparse_categorical_crossentropy')

+model.save_weights('lpcnet32c_384_10_G16_00.h5');

+model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify])

--- /dev/null

+++ b/dnn/training_tf2/ulaw.py

@@ -1,0 +1,19 @@

+import numpy as np

+import math

+scale = 255.0/32768.0

+scale_1 = 32768.0/255.0

+def ulaw2lin(u):

+    u = u - 128

+    s = np.sign(u)

+    u = np.abs(u)

+    return s*scale_1*(np.exp(u/128.*math.log(256))-1)

+def lin2ulaw(x):

+    s = np.sign(x)

+    x = np.abs(x)

+    u = (s*(128*np.log(1+scale*x)/math.log(256)))

+    u = np.clip(128 + np.round(u), 0, 255)

+    return u.astype('int16')

--

⑨