shithub: opus

Download patch

ref: 634defacdca0e5b701ae3967154248fa886734b4
parent: 8623012b30035dae057ef91dc4438f18a2c9801d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Tue Dec 6 13:01:53 EST 2022

DRED cleanup, support for variable number of frames

--- a/silk/dred_decoder.c
+++ b/silk/dred_decoder.c
@@ -53,6 +53,7 @@
     const opus_uint16 *quant_scales    = DRED_rdovae_get_quant_scales_pointer();
     const opus_uint16 *r               = DRED_rdovae_get_r_pointer();
 
+    ec_dec ec;
     int q_level;
     int i;
     int offset;
@@ -64,17 +65,20 @@
     celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
 
     /* decode initial state and initialize RDOVAE decoder */
-    ec_dec_init(&dec->ec_dec, (unsigned char*)bytes, num_bytes);
-    dred_decode_state(&dec->ec_dec, state);
+    ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
+    dred_decode_state(&ec, state);
     DRED_rdovae_dec_init_states(dec->rdovae_dec, state);
 
     /* decode newest to oldest and store oldest to newest */
     for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2)
     {
+        /* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */
+        if (8*num_bytes - ec_tell(&ec) <= 7)
+           break;
         q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2));
         offset = q_level * DRED_LATENT_DIM;
         dred_decode_latents(
-            &dec->ec_dec,
+            &ec,
             latents,
             quant_scales + offset,
             r + offset,
--- a/silk/dred_decoder.h
+++ b/silk/dred_decoder.h
@@ -30,7 +30,6 @@
 #include "entcode.h"
 
 typedef struct {
-    ec_dec ec_dec;
     RDOVAEDec *rdovae_dec;
 } DREDDec;
 
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -55,10 +55,11 @@
     const opus_uint16 *p0              = DRED_rdovae_get_p0_pointer();
     const opus_uint16 *quant_scales    = DRED_rdovae_get_quant_scales_pointer();
     const opus_uint16 *r               = DRED_rdovae_get_r_pointer();
-    
+    float feature_buffer[2 * 36];
+
     float input_buffer[2*DRED_NUM_FEATURES] = {0};
+    ec_enc ec_encoder;
 
-    int bytes;
     int q_level;
     int i;
     int offset;
@@ -71,27 +72,28 @@
     memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer));
 
     /* calculate LPCNet features */
-    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, enc->feature_buffer);
-    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + 36);
+    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, feature_buffer);
+    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36);
 
     /* prepare input buffer (discard LPC coefficients) */
-    memcpy(input_buffer, enc->feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
-    memcpy(input_buffer + DRED_NUM_FEATURES, enc->feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
+    memcpy(input_buffer, feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
+    memcpy(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
 
     /* run RDOVAE encoder */
     DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, input_buffer);
+    enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
 
     /* entropy coding of state and latents */
-    ec_enc_init(&enc->ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE);
-    dred_encode_state(&enc->ec_encoder, enc->state_buffer);   
+    ec_enc_init(&ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE);
+    dred_encode_state(&ec_encoder, enc->state_buffer);
 
-    for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2)
+    for (i = 0; i < enc->latents_buffer_fill-1; i += 2)
     {
-        q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2));
+        q_level = (int) floor(0.5f + DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2));
         offset = q_level * DRED_LATENT_DIM;
 
         dred_encode_latents(
-            &enc->ec_encoder,
+            &ec_encoder,
             enc->latents_buffer + i * DRED_LATENT_DIM,
             quant_scales + offset,
             dead_zone + offset,
@@ -100,12 +102,12 @@
         );
     }
 
-    bytes = (ec_tell(&enc->ec_encoder)+7)/8;
-    ec_enc_shrink(&enc->ec_encoder, bytes);
-    ec_enc_done(&enc->ec_encoder);
+    enc->ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8;
+    ec_enc_shrink(&ec_encoder, enc->ec_buffer_fill);
+    ec_enc_done(&ec_encoder);
 
-#if 1
-    printf("packet size: %d\n", bytes*8);
+#if 0
+    printf("packet size: %d\n", enc->ec_buffer_fill*8);
 
     static FILE *fbs = NULL;
     if (fbs == NULL)
@@ -112,8 +114,8 @@
     {
         fbs = fopen("dred_bitstream.bin", "wb");
     }
-    fwrite(&bytes, sizeof(bytes), 1, fbs);
-    fwrite(ec_get_buffer(&enc->ec_encoder), 1, bytes, fbs);
+    fwrite(&enc->ec_buffer_fill, sizeof(enc->ec_buffer_fill), 1, fbs);
+    fwrite(ec_get_buffer(&ec_encoder), 1, enc->ec_buffer_fill, fbs);
 #endif
 
 #if 0
--- a/silk/dred_encoder.h
+++ b/silk/dred_encoder.h
@@ -37,11 +37,11 @@
 
 typedef struct {
     opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY];
-    float feature_buffer[2 * 36];
     float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
+    int latents_buffer_fill;
     float state_buffer[24];
     unsigned char ec_buffer[DRED_MAX_DATA_SIZE];
-    ec_enc ec_encoder;
+    int ec_buffer_fill;
     LPCNetEncState *lpcnet_enc_state;
     RDOVAEEnc *rdovae_enc;
 } DREDEnc;
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -150,6 +150,9 @@
 
    celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0));
 
+#ifdef ENABLE_NEURAL_FEC
+   init_dred_decoder(&((silk_decoder_state*)silk_dec)->sPLC.dred_decoder);
+#endif
    st->prev_mode = 0;
    st->frame_size = Fs/400;
    st->arch = opus_select_arch();
@@ -1089,6 +1092,7 @@
    {
       silk_decoder_state *silk_dec;
       silk_dec = (silk_decoder_state*)((char*)st+st->silk_dec_offset);
+      /*printf("Found: %p of size %d\n", payload, payload_len);*/
       dred_decode_redundancy_package(&silk_dec->sPLC.dred_decoder, silk_dec->sPLC.fec_features, payload, payload_len);
       /* Found something -- do the decoding. */
       return 1;
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -2182,7 +2182,7 @@
 #ifdef ENABLE_NEURAL_FEC
     if (1) {
        DREDEnc *dred = &((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.dred_encoder;
-       opus_extension_data extension = {127, 0, dred->ec_buffer, dred->ec_encoder.storage};
+       opus_extension_data extension = {127, 0, dred->ec_buffer, dred->ec_buffer_fill};
        ret = opus_packet_pad_impl(data, ret, max_data_bytes, !st->use_vbr, &extension, 1);
        if (ret < 0)
        {
--