shithub: opus

--- /dev/null

+++ b/dnn/nfec_enc.c

@@ -1,0 +1,56 @@

+#include "nfec_enc.h"

+#include "nnet.h"

+#include "nfec_enc_data.h"

+void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input)

+{

+    float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];

+    int output_index = 0;

+    int input_index = 0;

+    /* run encoder stack and concatenate output in buffer*/

+    compute_dense(&enc_dense1, &buffer[output_index], input);

+    input_index = output_index;

+    output_index += ENC_DENSE1_OUT_SIZE;

+    compute_gru3(&enc_dense2, enc_state->dense2_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += ENC_DENSE2_OUT_SIZE;

+    compute_dense(&enc_dense3, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += ENC_DENSE3_OUT_SIZE;

+    compute_gru3(&enc_dense4, enc_state->dense4_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += ENC_DENSE4_OUT_SIZE;

+    compute_dense(&enc_dense5, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += ENC_DENSE5_OUT_SIZE;

+    compute_gru3(&enc_dense6, enc_state->dense6_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += ENC_DENSE6_OUT_SIZE;

+    compute_dense(&enc_dense7, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += ENC_DENSE7_OUT_SIZE;

+    compute_dense(&enc_dense8, &buffer[output_index], &buffer[input_index]);

+    output_index += ENC_DENSE8_OUT_SIZE;

+    /* compute latents from concatenated input buffer */

+    compute_conv1d(&bits_dense, latents, enc_state->bits_dense_state, buffer);

+    /* next, calculate initial state */

+    compute_dense(&gdense1, &buffer[output_index], buffer);

+    input_index = output_index;

+    compute_dense(&gdense2, initial_state, &buffer[input_index]);

+}

\ No newline at end of file

--- /dev/null

+++ b/dnn/nfec_enc.h

@@ -1,0 +1,15 @@

+#ifndef _NFEC_ENC_H

+#define _NFEC_ENC_H

+#include "nfec_enc_data.h"

+struct NFECEncState{

+    float dense2_state[3 * ENC_DENSE2_STATE_SIZE];

+    float dense4_state[3 * ENC_DENSE4_STATE_SIZE];

+    float dense6_state[3 * ENC_DENSE6_STATE_SIZE];

+    float bits_dense_state[BITS_DENSE_STATE_SIZE];

+};

+void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/nfec_enc_demo.c

@@ -1,0 +1,46 @@

+#include <stdlib.h>

+#include <stdio.h>

+#include "nfec_enc.h"

+void usage()

+{

+    printf("nfec_enc_demo <features>");

+    exit(1);

+}

+int main(int argc, char **argv)

+{

+    struct NFECEncState enc_state;

+    float feature_buffer[32];

+    float dframe[2 * 20];

+    float latents[80];

+    float initial_state[24];

+    int index = 0;

+    FILE *fid;

+    if (argc < 2)

+    {

+        usage();

+    }

+    fid = fopen(argv[1], "rb");

+    if (fid == NULL)

+    {

+        fprintf(stderr, "could not open feature file %s\n", argv[1]);

+        usage();

+    }

+    while (fread(feature_buffer, sizeof(float), 32, fid) == 32)

+    {

+        memcpy(dframe[16 * index++], feature_buffer, 16*sizeof(float));

+        if (index == 2)

+        {

+            nfec_encode_dframe(&enc_state, latents, initial_state, dframe);

+            index = 0;

+        }

+    }

+}

+/* gcc -DDISABLE_DOT_PROD nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c -o nfec_enc_demo */

\ No newline at end of file

--- a/dnn/nnet.c

+++ b/dnn/nnet.c

@@ -38,6 +38,7 @@

 #include "tansig_table.h"

 #include "nnet.h"

 #include "nnet_data.h"

+#include "nfec_enc_data.h"

 #include "plc_data.h"

 #ifdef NO_OPTIMIZATIONS

@@ -129,6 +130,11 @@

    compute_activation(output, output, N, layer->activation);

+void compute_dense(const DenseLayer *layer, float *output, const float *input)

+{

+   return _lpcnet_compute_dense(layer, output, input);

+}

 void compute_mdense(const MDenseLayer *layer, float *output, const float *input)

    int i, c;

@@ -316,7 +322,7 @@

       state[i] = h[i];

-#define MAX_RNN_NEURONS_ALL IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS)

+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), NFEC_ENC_MAX_RNN_NEURONS)

 void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)

@@ -442,12 +448,14 @@

       state[i] = z[i]*state[i] + (1-z[i])*h[i];

+#define MAX_CONV_INPUTS_ALL IMAX(MAX_CONV_INPUTS, NFEC_ENC_MAX_CONV_INPUTS)

 void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const float *input)

    int i;

    int N, M;

    int stride;

-   float tmp[MAX_CONV_INPUTS];

+   float tmp[MAX_CONV_INPUTS_ALL];

    celt_assert(input != output);

    celt_assert(layer->nb_inputs*layer->kernel_size <= MAX_CONV_INPUTS);

    RNN_COPY(tmp, mem, layer->nb_inputs*(layer->kernel_size-1));

--- a/dnn/nnet.h

+++ b/dnn/nnet.h

@@ -98,6 +98,8 @@

 void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *input);

+void compute_dense(const DenseLayer *layer, float *output, const float *input);

 void compute_mdense(const MDenseLayer *layer, float *output, const float *input);

 int sample_mdense(const MDenseLayer *layer,  const float *input, const float *sampling_logit_table, kiss99_ctx *rng);

--- /dev/null

+++ b/dnn/training_tf2/dump_nfec_model.py

@@ -1,0 +1,123 @@

+import argparse

+import os

+parser = argparse.ArgumentParser()

+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')

+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)

+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)

+args = parser.parse_args()

+# now import the heavy stuff

+from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer

+from rdovae import new_rdovae_model

+def start_header(header_fid, header_name):

+    header_guard = "_" + os.path.basename(header_name)[:-2].upper() + "_H"

+    header_fid.write(

+f"""

+#ifndef {header_guard}

+#define {header_guard}

+#include "nnet.h"

+"""

+    )

+def finish_header(header_fid):

+    header_fid.write(

+"""

+#endif

+"""

+    )

+def start_source(source_fid, header_name, weight_file):

+    source_fid.write(

+f"""

+/* this source file was automatically generated from weight file {weight_file} */

+#include "{header_name}"

+"""

+    )

+def finish_source(source_fid):

+    pass

+if __name__ == "__main__":

+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size)

+    model.load_weights(args.weights)

+    # for the time being only dump encoder

+    encoder_dense_names = [

+        'enc_dense1',

+        'enc_dense3',

+        'enc_dense5',

+        'enc_dense7',

+        'enc_dense8',

+        'gdense1',

+        'gdense2'

+    ]

+    encoder_gru_names = [

+        'enc_dense2',

+        'enc_dense4',

+        'enc_dense6'

+    ]

+    encoder_conv1d_names = [

+        'bits_dense'

+    ]

+    source_fid = open("nfec_enc_data.c", 'w')

+    header_fid = open("nfec_enc_data.h", 'w')

+    start_header(header_fid, "nfec_enc_data.h")

+    start_source(source_fid, "nfec_enc_data.h", os.path.basename(args.weights))

+    # dump GRUs

+    max_rnn_neurons = max(

+        [

+            dump_gru_layer(encoder.get_layer(name), source_fid, header_fid)

+            for name in encoder_gru_names

+        ]

+    )

+    # dump conv layers

+    max_conv_inputs = max(

+        [

+            dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)

+            for name in encoder_conv1d_names

+        ]

+    )

+    # dump Dense layers

+    for name in encoder_dense_names:

+        layer = encoder.get_layer(name)

+        dump_dense_layer(layer, source_fid, header_fid)

+    # some global constants

+    header_fid.write(

+f"""

+#define NFEC_NUM_FEATURES 20

+#define NFEC_LATENT_DIM {args.latent_dim}

+#define NFEC_ENC_MAX_RNN_NEURONS {max_rnn_neurons}

+#define NFEC_ENC_MAX_CONV_INPUTS {max_conv_inputs}

+"""

+    )

+    finish_header(header_fid)

+    finish_source(source_fid)

+    header_fid.close()

+    source_fid.close()

--- /dev/null

+++ b/dnn/training_tf2/keraslayerdump.py

@@ -1,0 +1,160 @@

+""" helper functions for dumping some Keras layers to C files """

+import numpy as np

+def printVector(f, vector, name, dtype='float', dotp=False):

+    """ prints vector as one-dimensional C array """

+    if dotp:

+        vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))

+        vector = vector.transpose((2, 0, 3, 1))

+    v = np.reshape(vector, (-1))

+    f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

+    for i in range(0, len(v)):

+        f.write('{}'.format(v[i]))

+        if (i!=len(v)-1):

+            f.write(',')

+        else:

+            break;

+        if (i%8==7):

+            f.write("\n   ")

+        else:

+            f.write(" ")

+    f.write('\n};\n\n')

+    return vector

+def printSparseVector(f, A, name, have_diag=True):

+    N = A.shape[0]

+    M = A.shape[1]

+    W = np.zeros((0,), dtype='int')

+    W0 = np.zeros((0,))

+    if have_diag:

+        diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])

+        A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))

+        A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))

+        A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))

+        printVector(f, diag, name + '_diag')

+    AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')

+    idx = np.zeros((0,), dtype='int')

+    for i in range(M//8):

+        pos = idx.shape[0]

+        idx = np.append(idx, -1)

+        nb_nonzero = 0

+        for j in range(N//4):

+            block = A[j*4:(j+1)*4, i*8:(i+1)*8]

+            qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]

+            if np.sum(np.abs(block)) > 1e-10:

+                nb_nonzero = nb_nonzero + 1

+                idx = np.append(idx, j*4)

+                vblock = qblock.transpose((1,0)).reshape((-1,))

+                W0 = np.concatenate([W0, block.reshape((-1,))])

+                W = np.concatenate([W, vblock])

+        idx[pos] = nb_nonzero

+    f.write('#ifdef DOT_PROD\n')

+    printVector(f, W, name, dtype='qweight')

+    f.write('#else /*DOT_PROD*/\n')

+    printVector(f, W0, name, dtype='qweight')

+    f.write('#endif /*DOT_PROD*/\n')

+    printVector(f, idx, name + '_idx', dtype='int')

+    return AQ

+def dump_sparse_gru(self, f, hf):

+    name = 'sparse_' + self.name

+    print("printing layer " + name + " of type sparse " + self.__class__.__name__)

+    weights = self.get_weights()

+    qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')

+    printVector(f, weights[-1], name + '_bias')

+    subias = weights[-1].copy()

+    subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)

+    printVector(f, subias, name + '_subias')

+    if hasattr(self, 'activation'):

+        activation = self.activation.__name__.upper()

+    else:

+        activation = 'TANH'

+    if hasattr(self, 'reset_after') and not self.reset_after:

+        reset_after = 0

+    else:

+        reset_after = 1

+    neurons = weights[0].shape[1]//3

+    max_rnn_neurons = neurons

+    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'

+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('extern const SparseGRULayer {};\n\n'.format(name));

+    return max_rnn_neurons

+def dump_gru_layer(self, f, hf, dotp=False, sparse=False):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    if sparse:

+        qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)

+    else:

+        qweight = printVector(f, weights[0], name + '_weights')

+    if dotp:

+        f.write('#ifdef DOT_PROD\n')

+        qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)

+        printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')

+        f.write('#else /*DOT_PROD*/\n')

+    else:

+        qweight2 = weights[1]

+    printVector(f, weights[1], name + '_recurrent_weights')

+    if dotp:

+        f.write('#endif /*DOT_PROD*/\n')

+    printVector(f, weights[-1], name + '_bias')

+    subias = weights[-1].copy()

+    subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)

+    subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)

+    printVector(f, subias, name + '_subias')

+    if hasattr(self, 'activation'):

+        activation = self.activation.__name__.upper()

+    else:

+        activation = 'TANH'

+    if hasattr(self, 'reset_after') and not self.reset_after:

+        reset_after = 0

+    else:

+        reset_after = 1

+    neurons = weights[0].shape[1]//3

+    max_rnn_neurons = neurons

+    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   NULL,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'

+            .format(name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('extern const GRULayer {};\n\n'.format(name));

+    return max_rnn_neurons

+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):

+    printVector(f, weights, name + '_weights')

+    printVector(f, bias, name + '_bias')

+    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

+    hf.write('extern const DenseLayer {};\n\n'.format(name));

+def dump_dense_layer(self, f, hf):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    activation = self.activation.__name__.upper()

+    dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)

+    return False

+def dump_conv1d_layer(self, f, hf):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    printVector(f, weights[0], name + '_weights')

+    printVector(f, weights[-1], name + '_bias')

+    activation = self.activation.__name__.upper()

+    max_conv_inputs = weights[0].shape[1]*weights[0].shape[0]

+    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))

+    hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))

+    hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))

+    hf.write('extern const Conv1DLayer {};\n\n'.format(name));

+    return max_conv_inputs

\ No newline at end of file

--- a/dnn/vec_neon.h

+++ b/dnn/vec_neon.h

@@ -33,7 +33,12 @@

 #ifndef DISABLE_DOT_PROD

 #define DOT_PROD

 #endif

+#ifdef DOT_PROD

 typedef signed char qweight;

+#else

+typedef float qweight;

+#endif

 #ifndef LPCNET_TEST

--

⑨