shithub: opus

Download patch

ref: cfa714806d32b794c138146ba1dc3987b60d34f3
parent: 6ccab3bfbeac67b5707c8676085b4664ee51c321
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Mon Jul 14 12:57:00 EDT 2025

Standalone decoding of DRED features

--- a/dnn/dred_decoder.c
+++ b/dnn/dred_decoder.c
@@ -39,7 +39,7 @@
 #include "dred_rdovae_stats_data.h"
 #include "dred_rdovae_constants.h"
 
-static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
+void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
     int i;
     for (i=0;i<dim;i++) {
         int q;
--- a/dnn/fargan_demo.c
+++ b/dnn/fargan_demo.c
@@ -38,6 +38,10 @@
 #include "os_support.h"
 #include "fargan.h"
 #include "cpu_support.h"
+#include "dred_rdovae_dec.h"
+#include "dred_rdovae_dec_data.h"
+#include "dred_rdovae_stats_data.h"
+#include "entdec.h"
 
 #ifdef USE_WEIGHTS_FILE
 # if __unix__
@@ -111,7 +115,11 @@
 /*#define MODE_ADDLPC 5*/
 /*#define MODE_FWGAN_SYNTHESIS 6*/
 #define MODE_FARGAN_SYNTHESIS 7
+#define MODE_DRED_DECODING 8
 
+#define DRED_CHUNKS 50
+#define MAX_DRED_PACKET 100000
+
 void usage(void) {
     fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
     fprintf(stderr, "       lpcnet_demo -fargan-synthesis <features.f32> <output.pcm>\n");
@@ -122,10 +130,19 @@
     exit(1);
 }
 
+void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim);
+
+static opus_uint32 char_to_int(unsigned char ch[4])
+{
+    return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
+         | ((opus_uint32)ch[2]<< 8) |  (opus_uint32)ch[3];
+}
+
 int main(int argc, char **argv) {
     int mode=0;
     int arch;
     FILE *fin, *fout;
+    int q0=-1;
 #ifdef USE_WEIGHTS_FILE
     int len;
     void *data;
@@ -135,7 +152,9 @@
     if (argc < 4) usage();
     if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
     else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
-    else {
+    else if (strcmp(argv[1], "-dred-decoding") == 0) {
+       mode=MODE_DRED_DECODING;
+    } else {
         usage();
     }
     if (argc != 4) usage();
@@ -202,6 +221,82 @@
             fwrite(pcm+skip, sizeof(pcm[0]), LPCNET_FRAME_SIZE-skip, fout);
             skip=0;
         }
+#ifdef ENABLE_DRED
+    } else if (mode == MODE_DRED_DECODING) {
+        size_t ret;
+        int i;
+        float features[2*DRED_CHUNKS*DRED_NUM_FEATURES];
+        float latents[DRED_CHUNKS*DRED_LATENT_DIM];
+        float initial_state[DRED_STATE_DIM];
+        ec_dec dec;
+        unsigned char bits[MAX_DRED_PACKET];
+        RDOVAEDecState rdovae_dec;
+        RDOVAEDec rdovae_dec_model;
+        init_rdovaedec(&rdovae_dec_model, rdovaedec_arrays);
+        while (1) {
+           unsigned char ch[4];
+           int nb_bytes;
+           int nb_chunks;
+           int state_qoffset;
+           ret = fread(ch, 4, 1, fin);
+           if (feof(fin) || ret != 1) break;
+           q0 = char_to_int(ch);
+           state_qoffset = q0*DRED_STATE_DIM;
+           ret = fread(ch, 4, 1, fin);
+           if (feof(fin) || ret != 1) break;
+           nb_chunks = char_to_int(ch);
+           ret = fread(ch, 4, 1, fin);
+           if (feof(fin) || ret != 1) break;
+           nb_bytes = char_to_int(ch);
+           if (nb_bytes > MAX_DRED_PACKET) {
+              fprintf(stderr, "packet too big: %d\n", nb_bytes);
+              exit(1);
+           }
+
+           ret = fread(bits, 1, nb_bytes, fin);
+           if (feof(fin) || (int)ret != nb_bytes) break;
+
+           ec_dec_init(&dec,  bits, nb_bytes);
+           memset(&rdovae_dec, 0, sizeof(rdovae_dec));
+           dred_decode_latents(
+                 &dec,
+                 initial_state,
+                 dred_state_quant_scales_q8 + state_qoffset,
+                 dred_state_r_q8 + state_qoffset,
+                 dred_state_p0_q8 + state_qoffset,
+                 DRED_STATE_DIM);
+
+           dred_rdovae_dec_init_states(&rdovae_dec, &rdovae_dec_model, initial_state, arch);
+           for (i=nb_chunks-1;i>=0;i-=2) {
+              int k;
+              float dec_tmp[4*DRED_NUM_FEATURES];
+              int offset = q0 * DRED_LATENT_DIM;
+
+              dred_decode_latents(
+                    &dec,
+                    &latents[i*DRED_LATENT_DIM],
+                    dred_latent_quant_scales_q8 + offset,
+                    dred_latent_r_q8 + offset,
+                    dred_latent_p0_q8 + offset,
+                    DRED_LATENT_DIM
+              );
+
+              dred_rdovae_decode_qframe(
+                    &rdovae_dec,
+                    &rdovae_dec_model,
+                    dec_tmp,
+                    &latents[i*DRED_LATENT_DIM],
+                    arch);
+              for (k=0;k<4;k++) {
+                 OPUS_COPY(&features[(2*i-2+k)*DRED_NUM_FEATURES], &dec_tmp[(3-k)*DRED_NUM_FEATURES], DRED_NUM_FEATURES);
+              }
+           }
+           for (i=0;i<nb_chunks;i++) {
+              fwrite(&features[2*i*DRED_NUM_FEATURES], sizeof(float), NB_FEATURES, fout);
+              fwrite(&features[(2*i+1)*DRED_NUM_FEATURES], sizeof(float), NB_FEATURES, fout);
+           }
+        }
+#endif
     } else {
         fprintf(stderr, "unknown action\n");
     }
--