shithub: opus

Download patch

ref: a8673d0e253c9946b86c27ac95070f929501c775
parent: 5b9b4381eb410107a3b3783ec150f41ae4b535f4
author: jbuethe <jbuethe@amazon.de>
date: Thu Jan 12 09:15:39 EST 2023

gru2 -> gruB and dotp included in dump_rdovae

--- a/dnn/dred_rdovae_dec.c
+++ b/dnn/dred_rdovae_dec.c
@@ -55,6 +55,7 @@
     float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE];
     int output_index = 0;
     int input_index = 0;
+    float zero_vector[1024] = {0};
 
     /* run encoder stack and concatenate output in buffer*/
     _lpcnet_compute_dense(&dec_dense1, &buffer[output_index], input);
@@ -61,7 +62,7 @@
     input_index = output_index;
     output_index += DEC_DENSE1_OUT_SIZE;
 
-    compute_gru2(&dec_dense2, dec_state->dense2_state, &buffer[input_index]);
+    compute_gruB(&dec_dense2, zero_vector, dec_state->dense2_state, &buffer[input_index]);
     memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float));
     input_index = output_index;
     output_index += DEC_DENSE2_OUT_SIZE;
@@ -70,7 +71,7 @@
     input_index = output_index;
     output_index += DEC_DENSE3_OUT_SIZE;
 
-    compute_gru2(&dec_dense4, dec_state->dense4_state, &buffer[input_index]);
+    compute_gruB(&dec_dense4, zero_vector, dec_state->dense4_state, &buffer[input_index]);
     memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float));
     input_index = output_index;
     output_index += DEC_DENSE4_OUT_SIZE;
@@ -79,7 +80,7 @@
     input_index = output_index;
     output_index += DEC_DENSE5_OUT_SIZE;
 
-    compute_gru2(&dec_dense6, dec_state->dense6_state, &buffer[input_index]);
+    compute_gruB(&dec_dense6, zero_vector, dec_state->dense6_state, &buffer[input_index]);
     memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float));
     input_index = output_index;
     output_index += DEC_DENSE6_OUT_SIZE;
--- a/dnn/dred_rdovae_enc.c
+++ b/dnn/dred_rdovae_enc.c
@@ -45,6 +45,7 @@
     float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];
     int output_index = 0;
     int input_index = 0;
+    float zero_vector[1024] = {0};
 
     /* run encoder stack and concatenate output in buffer*/
     _lpcnet_compute_dense(&enc_dense1, &buffer[output_index], input);
@@ -51,7 +52,7 @@
     input_index = output_index;
     output_index += ENC_DENSE1_OUT_SIZE;
 
-    compute_gru2(&enc_dense2, enc_state->dense2_state, &buffer[input_index]);
+    compute_gruB(&enc_dense2, zero_vector, enc_state->dense2_state, &buffer[input_index]);
     memcpy(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE * sizeof(float));
     input_index = output_index;
     output_index += ENC_DENSE2_OUT_SIZE;
@@ -60,7 +61,7 @@
     input_index = output_index;
     output_index += ENC_DENSE3_OUT_SIZE;
 
-    compute_gru2(&enc_dense4, enc_state->dense4_state, &buffer[input_index]);
+    compute_gruB(&enc_dense4, zero_vector, enc_state->dense4_state, &buffer[input_index]);
     memcpy(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE * sizeof(float));
     input_index = output_index;
     output_index += ENC_DENSE4_OUT_SIZE;
@@ -69,7 +70,7 @@
     input_index = output_index;
     output_index += ENC_DENSE5_OUT_SIZE;
 
-    compute_gru2(&enc_dense6, enc_state->dense6_state, &buffer[input_index]);
+    compute_gruB(&enc_dense6, zero_vector, enc_state->dense6_state, &buffer[input_index]);
     memcpy(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE * sizeof(float));
     input_index = output_index;
     output_index += ENC_DENSE6_OUT_SIZE;
--- a/dnn/training_tf2/dump_rdovae.py
+++ b/dnn/training_tf2/dump_rdovae.py
@@ -50,7 +50,7 @@
 from rdovae import new_rdovae_model
 
 def start_header(header_fid, header_name):
-    header_guard = "_" + os.path.basename(header_name)[:-2].upper() + "_H"
+    header_guard = os.path.basename(header_name)[:-2].upper() + "_H"
     header_fid.write(
 f"""
 #ifndef {header_guard}
@@ -72,6 +72,10 @@
 f"""
 /* this source file was automatically generated from weight file {weight_file} */
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include "{header_name}"
 
 """
@@ -159,7 +163,7 @@
     # dump GRUs
     max_rnn_neurons_enc = max(
         [
-            dump_gru_layer(encoder.get_layer(name), source_fid, header_fid)
+            dump_gru_layer(encoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
             for name in encoder_gru_names
         ]
     )
@@ -254,7 +258,7 @@
     # dump GRUs
     max_rnn_neurons_dec = max(
         [
-            dump_gru_layer(decoder.get_layer(name), source_fid, header_fid)
+            dump_gru_layer(decoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
             for name in decoder_gru_names
         ]
     )
--- a/dnn/training_tf2/keraslayerdump.py
+++ b/dnn/training_tf2/keraslayerdump.py
@@ -149,8 +149,8 @@
         reset_after = 1
     neurons = weights[0].shape[1]//3
     max_rnn_neurons = neurons
-    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   NULL,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
-            .format(name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
+    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {},\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
+            .format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
     hf.write('extern const GRULayer {};\n\n'.format(name));
--