ref: 7cef98ec8c6757225e9aa5fcd977b728543acc9f
parent: 714380e71b969ba6b3eff3e1a37ca16792aba68d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Tue Jul 6 14:45:23 EDT 2021
Minor optimization: merging all 3 embeddings
--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@@ -81,10 +81,14 @@
{
float gru_a_input[3*GRU_A_STATE_SIZE];
float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
+#if 1
+ compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &gru_a_embed_sig, last_sig, &gru_a_embed_pred, pred, &gru_a_embed_exc, last_exc);
+#else
RNN_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
accum_embedding(&gru_a_embed_sig, gru_a_input, last_sig);
accum_embedding(&gru_a_embed_pred, gru_a_input, pred);
accum_embedding(&gru_a_embed_exc, gru_a_input, last_exc);
+#endif
/*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
compute_sparse_gru(&sparse_gru_a, net->gru_a_state, gru_a_input);
RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -395,6 +395,15 @@
}
}
+void compute_gru_a_input(float *output, const float *input, int N, const EmbeddingLayer *layer1, int val1, const EmbeddingLayer *layer2, int val2, const EmbeddingLayer *layer3, int val3) {
+ int i;
+ for (i=0;i<3*N;i++) {
+ output[i] = input[i] + layer1->embedding_weights[val1*layer1->dim + i]
+ + layer2->embedding_weights[val2*layer2->dim + i]
+ + layer3->embedding_weights[val3*layer3->dim + i];
+ }
+}
+
void accum_embedding(const EmbeddingLayer *layer, float *output, int input)
{
int i;
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -113,6 +113,8 @@
void accum_embedding(const EmbeddingLayer *layer, float *output, int input);
+void compute_gru_a_input(float *output, const float *input, int N, const EmbeddingLayer *layer1, int val1, const EmbeddingLayer *layer2, int val2, const EmbeddingLayer *layer3, int val3);
+
int sample_from_pdf(const float *pdf, int N, float exp_boost, float pdf_floor);
#endif /* _MLP_H_ */
--
⑨