ref: f867f61e8b024bc3f1684ff8c1b4fc0ac21af97b
parent: ba5dde539a21177ad91e84a00dfa0606c8f7149a
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Thu Jun 1 19:06:45 EDT 2023
Convert RDOVAE to blob format
--- a/dnn/Makefile.am
+++ b/dnn/Makefile.am
@@ -1,6 +1,6 @@
ACLOCAL_AMFLAGS = -I m4
-AM_CFLAGS = -I$(top_srcdir)/include $(DEPS_CFLAGS)
+AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/ $(DEPS_CFLAGS)
dist_doc_DATA = COPYING AUTHORS README README.md
--- a/dnn/dred_rdovae.c
+++ b/dnn/dred_rdovae.c
@@ -35,16 +35,17 @@
#include "dred_rdovae_dec.h"
#include "dred_rdovae_stats_data.h"
-void DRED_rdovae_decode_all(float *features, const float *state, const float *latents, int nb_latents)
+void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents)
{
int i;
- RDOVAEDec dec;
+ RDOVAEDecState dec;
memset(&dec, 0, sizeof(dec));
- DRED_rdovae_dec_init_states(&dec, state);
+ DRED_rdovae_dec_init_states(&dec, model, state);
for (i = 0; i < 2*nb_latents; i += 2)
{
DRED_rdovae_decode_qframe(
&dec,
+ model,
&features[2*i*DRED_NUM_FEATURES],
&latents[(i/2)*DRED_LATENT_DIM]);
}
@@ -52,65 +53,65 @@
size_t DRED_rdovae_get_enc_size()
{
- return sizeof(RDOVAEEnc);
+ return sizeof(RDOVAEEncState);
}
size_t DRED_rdovae_get_dec_size()
{
- return sizeof(RDOVAEDec);
+ return sizeof(RDOVAEDecState);
}
-void DRED_rdovae_init_encoder(RDOVAEEnc *enc_state)
+void DRED_rdovae_init_encoder(RDOVAEEncState *enc_state)
{
memset(enc_state, 0, sizeof(*enc_state));
}
-void DRED_rdovae_init_decoder(RDOVAEDec *dec_state)
+void DRED_rdovae_init_decoder(RDOVAEDecState *dec_state)
{
memset(dec_state, 0, sizeof(*dec_state));
}
-RDOVAEEnc * DRED_rdovae_create_encoder()
+RDOVAEEncState * DRED_rdovae_create_encoder()
{
- RDOVAEEnc *enc;
- enc = (RDOVAEEnc*) calloc(sizeof(*enc), 1);
+ RDOVAEEncState *enc;
+ enc = (RDOVAEEncState*) calloc(sizeof(*enc), 1);
DRED_rdovae_init_encoder(enc);
return enc;
}
-RDOVAEDec * DRED_rdovae_create_decoder()
+RDOVAEDecState * DRED_rdovae_create_decoder()
{
- RDOVAEDec *dec;
- dec = (RDOVAEDec*) calloc(sizeof(*dec), 1);
+ RDOVAEDecState *dec;
+ dec = (RDOVAEDecState*) calloc(sizeof(*dec), 1);
DRED_rdovae_init_decoder(dec);
return dec;
}
-void DRED_rdovae_destroy_decoder(RDOVAEDec* dec)
+void DRED_rdovae_destroy_decoder(RDOVAEDecState* dec)
{
free(dec);
}
-void DRED_rdovae_destroy_encoder(RDOVAEEnc* enc)
+void DRED_rdovae_destroy_encoder(RDOVAEEncState* enc)
{
free(enc);
}
-void DRED_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input)
+void DRED_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input)
{
- dred_rdovae_encode_dframe(enc_state, latents, initial_state, input);
+ dred_rdovae_encode_dframe(enc_state, model, latents, initial_state, input);
}
-void DRED_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state)
+void DRED_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state)
{
- dred_rdovae_dec_init_states(h, initial_state);
+ dred_rdovae_dec_init_states(h, model, initial_state);
}
-void DRED_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float *z)
+void DRED_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float *z)
{
- dred_rdovae_decode_qframe(h, qframe, z);
+ dred_rdovae_decode_qframe(h, model, qframe, z);
}
--- a/dnn/dred_rdovae_dec.c
+++ b/dnn/dred_rdovae_dec.c
@@ -35,19 +35,21 @@
void dred_rdovae_dec_init_states(
- RDOVAEDec *h, /* io: state buffer handle */
+ RDOVAEDecState *h, /* io: state buffer handle */
+ const RDOVAEDec *model,
const float *initial_state /* i: initial state */
)
{
/* initialize GRU states from initial state */
- _lpcnet_compute_dense(&state1, h->dense2_state, initial_state);
- _lpcnet_compute_dense(&state2, h->dense4_state, initial_state);
- _lpcnet_compute_dense(&state3, h->dense6_state, initial_state);
+ _lpcnet_compute_dense(&model->state1, h->dense2_state, initial_state);
+ _lpcnet_compute_dense(&model->state2, h->dense4_state, initial_state);
+ _lpcnet_compute_dense(&model->state3, h->dense6_state, initial_state);
}
void dred_rdovae_decode_qframe(
- RDOVAEDec *dec_state, /* io: state buffer handle */
+ RDOVAEDecState *dec_state, /* io: state buffer handle */
+ const RDOVAEDec *model,
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
const float *input /* i: latent vector */
)
@@ -58,39 +60,39 @@
float zero_vector[1024] = {0};
/* run encoder stack and concatenate output in buffer*/
- _lpcnet_compute_dense(&dec_dense1, &buffer[output_index], input);
+ _lpcnet_compute_dense(&model->dec_dense1, &buffer[output_index], input);
input_index = output_index;
output_index += DEC_DENSE1_OUT_SIZE;
- compute_gruB(&dec_dense2, zero_vector, dec_state->dense2_state, &buffer[input_index]);
+ compute_gruB(&model->dec_dense2, zero_vector, dec_state->dense2_state, &buffer[input_index]);
RNN_COPY(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE);
input_index = output_index;
output_index += DEC_DENSE2_OUT_SIZE;
- _lpcnet_compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->dec_dense3, &buffer[output_index], &buffer[input_index]);
input_index = output_index;
output_index += DEC_DENSE3_OUT_SIZE;
- compute_gruB(&dec_dense4, zero_vector, dec_state->dense4_state, &buffer[input_index]);
+ compute_gruB(&model->dec_dense4, zero_vector, dec_state->dense4_state, &buffer[input_index]);
RNN_COPY(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE);
input_index = output_index;
output_index += DEC_DENSE4_OUT_SIZE;
- _lpcnet_compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->dec_dense5, &buffer[output_index], &buffer[input_index]);
input_index = output_index;
output_index += DEC_DENSE5_OUT_SIZE;
- compute_gruB(&dec_dense6, zero_vector, dec_state->dense6_state, &buffer[input_index]);
+ compute_gruB(&model->dec_dense6, zero_vector, dec_state->dense6_state, &buffer[input_index]);
RNN_COPY(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE);
input_index = output_index;
output_index += DEC_DENSE6_OUT_SIZE;
- _lpcnet_compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->dec_dense7, &buffer[output_index], &buffer[input_index]);
input_index = output_index;
output_index += DEC_DENSE7_OUT_SIZE;
- _lpcnet_compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->dec_dense8, &buffer[output_index], &buffer[input_index]);
output_index += DEC_DENSE8_OUT_SIZE;
- _lpcnet_compute_dense(&dec_final, qframe, buffer);
+ _lpcnet_compute_dense(&model->dec_final, qframe, buffer);
}
--- a/dnn/dred_rdovae_dec.h
+++ b/dnn/dred_rdovae_dec.h
@@ -38,7 +38,7 @@
float dense6_state[DEC_DENSE2_STATE_SIZE];
};
-void dred_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state);
-void dred_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float * z);
+void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state);
+void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z);
-#endif
\ No newline at end of file
+#endif
--- a/dnn/dred_rdovae_enc.c
+++ b/dnn/dred_rdovae_enc.c
@@ -36,7 +36,8 @@
#include "common.h"
void dred_rdovae_encode_dframe(
- RDOVAEEnc *enc_state, /* io: encoder state */
+ RDOVAEEncState *enc_state, /* io: encoder state */
+ const RDOVAEEnc *model,
float *latents, /* o: latent vector */
float *initial_state, /* o: initial state */
const float *input /* i: double feature frame (concatenated) */
@@ -48,47 +49,47 @@
float zero_vector[1024] = {0};
/* run encoder stack and concatenate output in buffer*/
- _lpcnet_compute_dense(&enc_dense1, &buffer[output_index], input);
+ _lpcnet_compute_dense(&model->enc_dense1, &buffer[output_index], input);
input_index = output_index;
output_index += ENC_DENSE1_OUT_SIZE;
- compute_gruB(&enc_dense2, zero_vector, enc_state->dense2_state, &buffer[input_index]);
+ compute_gruB(&model->enc_dense2, zero_vector, enc_state->dense2_state, &buffer[input_index]);
RNN_COPY(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE);
input_index = output_index;
output_index += ENC_DENSE2_OUT_SIZE;
- _lpcnet_compute_dense(&enc_dense3, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->enc_dense3, &buffer[output_index], &buffer[input_index]);
input_index = output_index;
output_index += ENC_DENSE3_OUT_SIZE;
- compute_gruB(&enc_dense4, zero_vector, enc_state->dense4_state, &buffer[input_index]);
+ compute_gruB(&model->enc_dense4, zero_vector, enc_state->dense4_state, &buffer[input_index]);
RNN_COPY(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE);
input_index = output_index;
output_index += ENC_DENSE4_OUT_SIZE;
- _lpcnet_compute_dense(&enc_dense5, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->enc_dense5, &buffer[output_index], &buffer[input_index]);
input_index = output_index;
output_index += ENC_DENSE5_OUT_SIZE;
- compute_gruB(&enc_dense6, zero_vector, enc_state->dense6_state, &buffer[input_index]);
+ compute_gruB(&model->enc_dense6, zero_vector, enc_state->dense6_state, &buffer[input_index]);
RNN_COPY(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE);
input_index = output_index;
output_index += ENC_DENSE6_OUT_SIZE;
- _lpcnet_compute_dense(&enc_dense7, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->enc_dense7, &buffer[output_index], &buffer[input_index]);
input_index = output_index;
output_index += ENC_DENSE7_OUT_SIZE;
- _lpcnet_compute_dense(&enc_dense8, &buffer[output_index], &buffer[input_index]);
+ _lpcnet_compute_dense(&model->enc_dense8, &buffer[output_index], &buffer[input_index]);
output_index += ENC_DENSE8_OUT_SIZE;
/* compute latents from concatenated input buffer */
- compute_conv1d(&bits_dense, latents, enc_state->bits_dense_state, buffer);
+ compute_conv1d(&model->bits_dense, latents, enc_state->bits_dense_state, buffer);
/* next, calculate initial state */
- _lpcnet_compute_dense(&gdense1, &buffer[output_index], buffer);
+ _lpcnet_compute_dense(&model->gdense1, &buffer[output_index], buffer);
input_index = output_index;
- _lpcnet_compute_dense(&gdense2, initial_state, &buffer[input_index]);
+ _lpcnet_compute_dense(&model->gdense2, initial_state, &buffer[input_index]);
}
--- a/dnn/dred_rdovae_enc.h
+++ b/dnn/dred_rdovae_enc.h
@@ -39,7 +39,7 @@
float bits_dense_state[BITS_DENSE_STATE_SIZE];
};
-void dred_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input);
+void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input);
-#endif
\ No newline at end of file
+#endif
--- a/dnn/include/dred_rdovae.h
+++ b/dnn/include/dred_rdovae.h
@@ -32,10 +32,12 @@
#include "opus_types.h"
-typedef struct RDOVAEDecStruct RDOVAEDec;
-typedef struct RDOVAEEncStruct RDOVAEEnc;
+typedef struct RDOVAEDec RDOVAEDec;
+typedef struct RDOVAEEnc RDOVAEEnc;
+typedef struct RDOVAEDecStruct RDOVAEDecState;
+typedef struct RDOVAEEncStruct RDOVAEEncState;
-void DRED_rdovae_decode_all(float *features, const float *state, const float *latents, int nb_latents);
+void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents);
size_t DRED_rdovae_get_enc_size(void);
@@ -42,19 +44,19 @@
size_t DRED_rdovae_get_dec_size(void);
-RDOVAEDec * DRED_rdovae_create_decoder(void);
-RDOVAEEnc * DRED_rdovae_create_encoder(void);
-void DRED_rdovae_destroy_decoder(RDOVAEDec* h);
-void DRED_rdovae_destroy_encoder(RDOVAEEnc* h);
+RDOVAEDecState * DRED_rdovae_create_decoder(void);
+RDOVAEEncState * DRED_rdovae_create_encoder(void);
+void DRED_rdovae_destroy_decoder(RDOVAEDecState* h);
+void DRED_rdovae_destroy_encoder(RDOVAEEncState* h);
-void DRED_rdovae_init_encoder(RDOVAEEnc *enc_state);
+void DRED_rdovae_init_encoder(RDOVAEEncState *enc_state);
-void DRED_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input);
+void DRED_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input);
-void DRED_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state);
+void DRED_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state);
-void DRED_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float * z);
+void DRED_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z);
const opus_uint16 * DRED_rdovae_get_p0_pointer(void);
const opus_uint16 * DRED_rdovae_get_dead_zone_pointer(void);
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -148,6 +148,8 @@
extern const WeightArray lpcnet_arrays[];
extern const WeightArray lpcnet_plc_arrays[];
+extern const WeightArray rdovae_enc_arrays[];
+extern const WeightArray rdovae_dec_arrays[];
int mdense_init(MDenseLayer *layer, const WeightArray *arrays,
const char *bias,
--- a/dnn/write_lpcnet_weights.c
+++ b/dnn/write_lpcnet_weights.c
@@ -41,6 +41,8 @@
#endif
#include "nnet_data.c"
#include "plc_data.c"
+#include "dred_rdovae_enc_data.c"
+#include "dred_rdovae_dec_data.c"
void write_weights(const WeightArray *list, FILE *fout)
{
@@ -69,6 +71,8 @@
FILE *fout = fopen("weights_blob.bin", "w");
write_weights(lpcnet_arrays, fout);
write_weights(lpcnet_plc_arrays, fout);
+ write_weights(rdovae_enc_arrays, fout);
+ write_weights(rdovae_dec_arrays, fout);
fclose(fout);
return 0;
}
--
⑨