shithub: opus

Download patch

ref: 1f53f1e0a9b1e055222b28a70b2e327787e50d09
parent: 183a820212381f6c447b5a7c9b92b34fa01c629b
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Feb 5 09:24:02 EST 2024

Support for extra offset

Allows us to exclude the most recent silence from DRED

--- a/silk/dred_config.h
+++ b/silk/dred_config.h
@@ -32,7 +32,7 @@
 #define DRED_EXTENSION_ID 126
 
 /* Remove these two completely once DRED gets an extension number assigned. */
-#define DRED_EXPERIMENTAL_VERSION 8
+#define DRED_EXPERIMENTAL_VERSION 9
 #define DRED_EXPERIMENTAL_BYTES 2
 
 
--- a/silk/dred_decoder.c
+++ b/silk/dred_decoder.c
@@ -39,12 +39,6 @@
 #include "dred_rdovae_stats_data.h"
 #include "dred_rdovae_constants.h"
 
-/* From http://graphics.stanford.edu/~seander/bithacks.html#FixedSignExtend */
-static int sign_extend(int x, int b) {
-  int m = 1U << (b - 1);
-  return (x ^ m) - m;
-}
-
 static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
     int i;
     for (i=0;i<dim;i++) {
@@ -64,17 +58,19 @@
   int q0;
   int dQ;
   int state_qoffset;
+  int extra_offset;
 
-
   /* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
   celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
 
   /* decode initial state and initialize RDOVAE decoder */
   ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
-  /* Compute total offset, including DRED position in a multiframe packet. */
-  dec->dred_offset = sign_extend(ec_dec_uint(&ec, 32), 5) + dred_frame_offset;
   q0 = ec_dec_uint(&ec, 16);
   dQ = ec_dec_uint(&ec, 8);
+  if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
+  else extra_offset = 0;
+  /* Compute total offset, including DRED position in a multiframe packet. */
+  dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
   /*printf("%d %d %d\n", dred_offset, q0, dQ);*/
 
   state_qoffset = q0*DRED_STATE_DIM;
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -267,17 +267,35 @@
     int state_qoffset;
     ec_enc ec_bak;
     int prev_active=0;
+    int latent_offset;
+    int extra_dred_offset=0;
     int dred_encoded=0;
+    int total_offset;
 
+    latent_offset = enc->latent_offset;
+    while (latent_offset < enc->latents_buffer_fill && !dred_voice_active(activity_mem, latent_offset)) {
+       latent_offset++;
+       extra_dred_offset++;
+    }
+
     /* entropy coding of state and latents */
     ec_enc_init(&ec_encoder, buf, max_bytes);
-    ec_enc_uint(&ec_encoder, enc->dred_offset, 32);
     ec_enc_uint(&ec_encoder, q0, 16);
     ec_enc_uint(&ec_encoder, dQ, 8);
+    total_offset = 16 - (enc->dred_offset - extra_dred_offset*8);
+    celt_assert(total_offset>=0);
+    if (total_offset > 31) {
+       ec_enc_uint(&ec_encoder, 1, 2);
+       ec_enc_uint(&ec_encoder, total_offset>>5, 256);
+       ec_enc_uint(&ec_encoder, total_offset&31, 32);
+    } else {
+       ec_enc_uint(&ec_encoder, 0, 2);
+       ec_enc_uint(&ec_encoder, total_offset, 32);
+    }
     state_qoffset = q0*DRED_STATE_DIM;
     dred_encode_latents(
         &ec_encoder,
-        &enc->state_buffer[enc->latent_offset*DRED_STATE_DIM],
+        &enc->state_buffer[latent_offset*DRED_STATE_DIM],
         dred_state_quant_scales_q8 + state_qoffset,
         dred_state_dead_zone_q8 + state_qoffset,
         dred_state_r_q8 + state_qoffset,
@@ -288,7 +306,7 @@
       return 0;
     }
     ec_bak = ec_encoder;
-    for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-enc->latent_offset-1); i += 2)
+    for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-latent_offset-1); i += 2)
     {
         int active;
         q_level = compute_quantizer(q0, dQ, i/2);
@@ -296,7 +314,7 @@
 
         dred_encode_latents(
             &ec_encoder,
-            enc->latents_buffer + (i+enc->latent_offset) * DRED_LATENT_DIM,
+            enc->latents_buffer + (i+latent_offset) * DRED_LATENT_DIM,
             dred_latent_quant_scales_q8 + offset,
             dred_latent_dead_zone_q8 + offset,
             dred_latent_r_q8 + offset,
@@ -309,7 +327,7 @@
           if (i==0) return 0;
           break;
         }
-        active = dred_voice_active(activity_mem, i+enc->latent_offset);
+        active = dred_voice_active(activity_mem, i+latent_offset);
         if (active || prev_active) {
            ec_bak = ec_encoder;
            dred_encoded = i+2;
@@ -317,8 +335,7 @@
         prev_active = active;
     }
     /* Avoid sending empty DRED packets. */
-    if (dred_encoded==0) return 0;
-
+    if (dred_encoded==0 || (dred_encoded<=2 && extra_dred_offset)) return 0;
     ec_encoder = ec_bak;
 
     ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8;
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -703,7 +703,7 @@
          if (feature_offset <= 4*dred->nb_latents-1 && feature_offset >= 0) {
            lpcnet_plc_fec_add(&st->lpcnet, dred->fec_features+feature_offset*DRED_NUM_FEATURES);
          } else {
-           lpcnet_plc_fec_add(&st->lpcnet, NULL);
+           if (feature_offset >= 0) lpcnet_plc_fec_add(&st->lpcnet, NULL);
          }
 
       }
@@ -1417,7 +1417,7 @@
       dred_ec_decode(dred, payload, payload_len, min_feature_frames, dred_frame_offset);
       if (!defer_processing)
          opus_dred_process(dred_dec, dred, dred);
-      return dred->nb_latents*sampling_rate/25 - sampling_rate/50;
+      return IMAX(0, dred->nb_latents*sampling_rate/25 - dred->dred_offset* sampling_rate/400);
    }
    return 0;
 #else
--