shithub: opus

--- /dev/null

+++ b/dnn/nfec_dec.c

@@ -1,0 +1,118 @@

+#include "nfec_dec.h"

+//#define DEBUG

+#ifdef DEBUG

+#include <stdio.h>

+#endif

+void nfec_dec_init_states(

+    NFECDecState *h,            /* io: state buffer handle */

+    const float *initial_state  /* i: initial state */

+    )

+{

+    /* initialize GRU states from initial state */

+    compute_dense(&state1, h->dense2_state, initial_state);

+    compute_dense(&state2, h->dense4_state, initial_state);

+    compute_dense(&state3, h->dense6_state, initial_state);

+}

+void nfec_dec_unquantize_latent_vector(

+    float *z,       /* o: unquantized latent vector */

+    const int *zq,  /* i: quantized latent vector */

+    int quant_level /* i: quantization level */

+    )

+{

+    int i;

+    /* inverse scaling and type conversion */

+    for (i = 0; i < NFEC_STATS_NUM_LATENTS; i ++)

+    {

+        z[i] = (float) zq[i] / nfec_stats_quant_scales[quant_level * NFEC_STATS_NUM_LATENTS + i];

+    }

+}

+void nfec_decode_qframe(

+    NFECDecState *dec_state,    /* io: state buffer handle */

+    float *qframe,              /* o: quadruple feature frame (four concatenated frames) */

+    const float *input          /* i: latent vector */

+    )

+{

+    float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE];

+    int output_index = 0;

+    int input_index = 0;

+#ifdef DEBUG

+    static FILE *fids[8] = {NULL};

+    int i;

+    char filename[256];

+    for (i=0; i < 8; i ++)

+    {

+        if (fids[i] == NULL)

+        {

+            sprintf(filename, "y%d.f32", i + 1);

+            fids[i] = fopen(filename, "wb");

+        }

+    }

+#endif

+    /* run encoder stack and concatenate output in buffer*/

+    compute_dense(&dec_dense1, &buffer[output_index], input);

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE1_OUT_SIZE, fids[0]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE1_OUT_SIZE;

+    compute_gru2(&dec_dense2, dec_state->dense2_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float));

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE2_OUT_SIZE, fids[1]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE2_OUT_SIZE;

+    compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]);

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE3_OUT_SIZE, fids[2]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE3_OUT_SIZE;

+    compute_gru2(&dec_dense4, dec_state->dense4_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float));

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE4_OUT_SIZE, fids[3]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE4_OUT_SIZE;

+    compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]);

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE5_OUT_SIZE, fids[4]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE5_OUT_SIZE;

+    compute_gru2(&dec_dense6, dec_state->dense6_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float));

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE6_OUT_SIZE, fids[5]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE6_OUT_SIZE;

+    compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]);

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE7_OUT_SIZE, fids[6]);

+#endif

+    input_index = output_index;

+    output_index += DEC_DENSE7_OUT_SIZE;

+    compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]);

+#ifdef DEBUG

+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE8_OUT_SIZE, fids[7]);

+#endif

+    output_index += DEC_DENSE8_OUT_SIZE;

+    compute_dense(&dec_final, qframe, buffer);

+}

\ No newline at end of file

--- /dev/null

+++ b/dnn/nfec_dec.h

@@ -1,0 +1,17 @@

+#ifndef _NFEC_DEC_H

+#define _NFEC_DEC_H

+#include "nfec_dec_data.h"

+#include "nfec_stats_data.h"

+typedef struct {

+    float dense2_state[DEC_DENSE2_STATE_SIZE];

+    float dense4_state[DEC_DENSE2_STATE_SIZE];

+    float dense6_state[DEC_DENSE2_STATE_SIZE];

+} NFECDecState;

+void nfec_dec_init_states(NFECDecState *h, const float * initial_state);

+void nfec_dec_unquantize_latent_vector(float *z, const int *zq, int quant_level);

+void nfec_decode_qframe(NFECDecState *h, float *qframe, const float * z);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/nfec_dec_demo.c

@@ -1,0 +1,68 @@

+#include <stdlib.h>

+#include <stdio.h>

+#include "nfec_dec.h"

+#include "nfec_enc.h"

+void usage()

+{

+    printf("nfec_dec_demo <input> <output>\n");

+    exit(1);

+}

+int main(int argc, char **argv)

+{

+    NFECDecState dec_state;

+    float feature_buffer[36];

+    float qframe[4 * NFEC_DEC_NUM_FEATURES];

+    float latents[80];

+    float initial_state[24];

+    int quantized_latents[80];

+    int index = 0;

+    FILE *in_fid, *out_fid;

+    int qlevel = 0;

+    memset(&dec_state, 0, sizeof(dec_state));

+    if (argc < 3) usage();

+    in_fid = fopen(argv[1], "rb");

+    if (in_fid == NULL)

+    {

+        perror("Could not open input file");

+        usage();

+    }

+    out_fid = fopen(argv[2], "wb");

+    if (out_fid == NULL)

+    {

+        perror("Could not open output file");

+        usage();

+    }

+    /* read initial state from input stream */

+    if (fread(initial_state, sizeof(float), 24, in_fid) != 24)

+    {

+        perror("error while reading initial state");

+        return 1;

+    }

+    /* initialize GRU states */

+    nfec_dec_init_states(&dec_state, initial_state);

+    /* start decoding */

+    while (fread(latents, sizeof(float), 80, in_fid) == 80)

+    {

+        nfec_decode_qframe(&dec_state, qframe, latents);

+        fwrite(qframe, sizeof(float), 4*20, out_fid);

+    }

+    fclose(in_fid);

+    fclose(out_fid);

+    return 0;

+}

+/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_dec_demo.c nfec_dec.c nnet.c nfec_dec_data.c nfec_stats_data.c kiss99.c -g -o nfec_dec_demo */

\ No newline at end of file

--- a/dnn/nfec_enc.c

+++ b/dnn/nfec_enc.c

@@ -1,6 +1,9 @@

+#include <math.h>

 #include "nfec_enc.h"

 #include "nnet.h"

 #include "nfec_enc_data.h"

+#include "nfec_stats_data.h"

 //#define DEBUG

@@ -8,7 +11,12 @@

 #include <stdio.h>

 #endif

-void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input)

+void nfec_encode_dframe(

+    struct NFECEncState *enc_state, /* io: encoder state */

+    float *latents,                 /* o: latent vector */

+    float *initial_state,           /* o: initial state */

+    const float *input              /* i: double feature frame (concatenated) */

+    )

     float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];

     int output_index = 0;

@@ -105,4 +113,28 @@

     input_index = output_index;

     compute_dense(&gdense2, initial_state, &buffer[input_index]);

+}

+void nfec_quantize_latent_vector(

+    int *z_q,           /* o: quantized latent vector */

+    const float *z,     /* i: unquantized latent vector */

+    int quant_level     /* i: quantization level */

+    )

+{

+    int i;

+    float delta;

+    float tmp[NFEC_LATENT_DIM];

+    for (i = 0; i < NFEC_LATENT_DIM; i ++)

+    {

+        /* dead-zone transform */

+        delta = nfec_stats_dead_zone_theta[quant_level * NFEC_LATENT_DIM + i] - .5f;

+        tmp[i] = z[i] - delta * tanhf(z[i] / (delta + 0.1f));

+        /* scaling */

+        tmp[i] *= nfec_stats_quant_scales[quant_level * NFEC_LATENT_DIM + i];

+        /* quantization by rounding (CAVE: is there a quantization routine with overlfow check available?) */

+        z_q[i] = (int) roundf(tmp[i]);

+    }

\ No newline at end of file

--- a/dnn/nfec_enc.h

+++ b/dnn/nfec_enc.h

@@ -11,5 +11,6 @@

};

 void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input);

+void nfec_quantize_latent_vector(int *z_q, const float *z, int quant_level);

 #endif

\ No newline at end of file

--- a/dnn/nfec_enc_demo.c

+++ b/dnn/nfec_enc_demo.c

@@ -16,8 +16,9 @@

     float dframe[2 * NFEC_NUM_FEATURES];

     float latents[80];

     float initial_state[24];

+    int quantized_latents[NFEC_LATENT_DIM];

     int index = 0;

-    FILE *fid, *latents_fid, *states_fid;

+    FILE *fid, *latents_fid, *quantized_latents_fid, *states_fid;

     memset(&enc_state, 0, sizeof(enc_state));

@@ -40,6 +41,16 @@

         usage();

+    char filename[256];

+    strcpy(filename, argv[2]);

+    strcat(filename, ".quantized.f32");

+    quantized_latents_fid = fopen(filename, "wb");

+    if (latents_fid == NULL)

+    {

+        fprintf(stderr, "could not open latents file %s\n", filename);

+        usage();

+    }

     states_fid = fopen(argv[3], "wb");

     if (states_fid == NULL)

@@ -55,8 +66,10 @@

         if (index == 2)

             nfec_encode_dframe(&enc_state, latents, initial_state, dframe);

+            nfec_quantize_latent_vector(quantized_latents, latents, 0);

             index = 0;

             fwrite(latents, sizeof(float), NFEC_LATENT_DIM, latents_fid);

+            fwrite(quantized_latents, sizeof(int), NFEC_LATENT_DIM, quantized_latents_fid);

             fwrite(initial_state, sizeof(float), GDENSE2_OUT_SIZE, states_fid);

@@ -64,6 +77,9 @@

     fclose(fid);

     fclose(states_fid);

     fclose(latents_fid);

+    fclose(quantized_latents_fid);

+    return 0;

-/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c kiss99.c -g -o nfec_enc_demo */

\ No newline at end of file

+/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c nfec_stats_data.c kiss99.c -g -o nfec_enc_demo */

\ No newline at end of file

--- a/dnn/training_tf2/dump_nfec_model.py

+++ b/dnn/training_tf2/dump_nfec_model.py

@@ -1,6 +1,7 @@

 import argparse

 import os

+os.environ['CUDA_VISIBLE_DEVICES'] = ""

 parser = argparse.ArgumentParser()

@@ -59,17 +60,17 @@

     r               = 0.5 + 0.5 * tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()

     theta           = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()

-    printVector(f, quant_scales[:], 'nfec_stats_quant_scales')

-    printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta')

-    printVector(f, r, 'nfec_stats_r')

-    printVector(f, theta, 'nfec_stats_theta')

+    printVector(f, quant_scales[:], 'nfec_stats_quant_scales', static=False)

+    printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta', static=False)

+    printVector(f, r, 'nfec_stats_r', static=False)

+    printVector(f, theta, 'nfec_stats_theta', static=False)

     fh.write(

 f"""

-extern float nfec_stats_quant_scales;

-extern float nfec_stats_dead_zone_theta;

-extern float nfec_stats_r;

-extern float nfec_stats_theta;

+extern const float nfec_stats_quant_scales[{levels * N}];

+extern const float nfec_stats_dead_zone_theta[{levels * N}];

+extern const float nfec_stats_r[{levels * N}];

+extern const float nfec_stats_theta[{levels * N}];

"""

@@ -159,6 +160,7 @@

     header_fid.write(

 f"""

 #define NFEC_STATS_NUM_LEVELS {num_levels}

+#define NFEC_STATS_NUM_LATENTS {args.latent_dim}

"""

@@ -171,3 +173,60 @@

     header_fid.close()

     source_fid.close()

+    # decoder

+    decoder_dense_names = [

+        'state1',

+        'state2',

+        'state3',

+        'dec_dense1',

+        'dec_dense3',

+        'dec_dense5',

+        'dec_dense7',

+        'dec_dense8',

+        'dec_final'

+    ]

+    decoder_gru_names = [

+        'dec_dense2',

+        'dec_dense4',

+        'dec_dense6'

+    ]

+    source_fid = open("nfec_dec_data.c", 'w')

+    header_fid = open("nfec_dec_data.h", 'w')

+    start_header(header_fid, "nfec_dec_data.h")

+    start_source(source_fid, "nfec_dec_data.h", os.path.basename(args.weights))

+    # some global constants

+    header_fid.write(

+f"""

+#define NFEC_DEC_NUM_FEATURES 20

+#define NFEC_DEC_LATENT_DIM {args.latent_dim}

+#define NFEC_DEC_MAX_RNN_NEURONS {max_rnn_neurons}

+"""

+    )

+    # dump GRUs

+    max_rnn_neurons = max(

+        [

+            dump_gru_layer(decoder.get_layer(name), source_fid, header_fid)

+            for name in decoder_gru_names

+        ]

+    )

+    # dump Dense layers

+    for name in decoder_dense_names:

+        layer = decoder.get_layer(name)

+        dump_dense_layer(layer, source_fid, header_fid)

+    finish_header(header_fid)

+    finish_source(source_fid)

+    header_fid.close()

+    source_fid.close()

\ No newline at end of file

--- a/dnn/training_tf2/keraslayerdump.py

+++ b/dnn/training_tf2/keraslayerdump.py

@@ -3,13 +3,16 @@

 import numpy as np

-def printVector(f, vector, name, dtype='float', dotp=False):

+def printVector(f, vector, name, dtype='float', dotp=False, static=True):

     """ prints vector as one-dimensional C array """

     if dotp:

         vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))

         vector = vector.transpose((2, 0, 3, 1))

     v = np.reshape(vector, (-1))

-    f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

+    if static:

+        f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

+    else:

+        f.write('const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

     for i in range(0, len(v)):

         f.write('{}'.format(v[i]))

         if (i!=len(v)-1):

--

⑨