shithub: opus

Download patch

ref: cfc118d52e8b6611a54676d4cc6c6d1b7e867d91
parent: 62cd1c963baf1fdccb990260bb5dc53b780e45eb
parent: 4600e77583fc29738fd5c2aa6cb5ceb30596cc7d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Jul 19 21:42:07 EDT 2023

Merge branch 'dred_bitrate4' into opus-ng

--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -1565,10 +1565,13 @@
       vbr_rate = 0;
       tmp = st->bitrate*frame_size;
       if (tell>1)
-         tmp += tell;
+         tmp += tell*mode->Fs;
       if (st->bitrate!=OPUS_BITRATE_MAX)
+      {
          nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
                (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+         ec_enc_shrink(enc, nbCompressedBytes);
+      }
       effectiveBytes = nbCompressedBytes - nbFilledBytes;
    }
    equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50);
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -105,8 +105,11 @@
     opus_int     gain_lock[ MAX_NB_SUBFR ] = {0};
     opus_int16   best_gain_mult[ MAX_NB_SUBFR ];
     opus_int     best_sum[ MAX_NB_SUBFR ];
+    opus_int     bits_margin;
     SAVE_STACK;
 
+    /* For CBR, 5 bits below budget is close enough. For VBR, allow up to 25% below the cap if we initially busted the budget. */
+    bits_margin = useCBR ? 5 : maxBits/4;
     /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
     LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
 
@@ -282,7 +285,7 @@
                     gainMult_upper = gainMult_Q8;
                     gainsID_upper = gainsID;
                 }
-            } else if( nBits < maxBits - 5 ) {
+            } else if( nBits < maxBits - bits_margin ) {
                 found_lower = 1;
                 nBits_lower = nBits;
                 gainMult_lower = gainMult_Q8;
@@ -296,7 +299,7 @@
                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
                 }
             } else {
-                /* Within 5 bits of budget: close enough */
+                /* Close enough */
                 break;
             }
 
@@ -318,17 +321,10 @@
             if( ( found_lower & found_upper ) == 0 ) {
                 /* Adjust gain according to high-rate rate/distortion curve */
                 if( nBits > maxBits ) {
-                    if (gainMult_Q8 < 16384) {
-                        gainMult_Q8 *= 2;
-                    } else {
-                        gainMult_Q8 = 32767;
-                    }
+                    gainMult_Q8 = silk_min_32( 1024, gainMult_Q8*3/2 );
                 } else {
-                    opus_int32 gain_factor_Q16;
-                    gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
-                    gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+                    gainMult_Q8 = silk_max_32( 64, gainMult_Q8*4/5 );
                 }
-
             } else {
                 /* Adjust gain by interpolating */
                 gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
--- a/silk/float/encode_frame_FLP.c
+++ b/silk/float/encode_frame_FLP.c
@@ -107,7 +107,10 @@
     opus_int     gain_lock[ MAX_NB_SUBFR ] = {0};
     opus_int16   best_gain_mult[ MAX_NB_SUBFR ];
     opus_int     best_sum[ MAX_NB_SUBFR ];
+    opus_int     bits_margin;
 
+    /* For CBR, 5 bits below budget is close enough. For VBR, allow up to 25% below the cap if we initially busted the budget. */
+    bits_margin = useCBR ? 5 : maxBits/4;
     /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
     LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
 
@@ -270,7 +273,7 @@
                     gainMult_upper = gainMult_Q8;
                     gainsID_upper = gainsID;
                 }
-            } else if( nBits < maxBits - 5 ) {
+            } else if( nBits < maxBits - bits_margin ) {
                 found_lower = 1;
                 nBits_lower = nBits;
                 gainMult_lower = gainMult_Q8;
@@ -284,7 +287,7 @@
                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
                 }
             } else {
-                /* Within 5 bits of budget: close enough */
+                /* Close enough */
                 break;
             }
 
@@ -306,15 +309,9 @@
             if( ( found_lower & found_upper ) == 0 ) {
                 /* Adjust gain according to high-rate rate/distortion curve */
                 if( nBits > maxBits ) {
-                    if (gainMult_Q8 < 16384) {
-                        gainMult_Q8 *= 2;
-                    } else {
-                        gainMult_Q8 = 32767;
-                    }
+                    gainMult_Q8 = silk_min_32( 1024, gainMult_Q8*3/2 );
                 } else {
-                    opus_int32 gain_factor_Q16;
-                    gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
-                    gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+                    gainMult_Q8 = silk_max_32( 64, gainMult_Q8*4/5 );
                 }
             } else {
                 /* Adjust gain by interpolating */
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -556,6 +556,24 @@
    return st;
 }
 
+#ifdef ENABLE_NEURAL_FEC
+static opus_int32 compute_dred_bitrate(OpusEncoder *st, opus_int32 bitrate_bps, int frame_size)
+{
+   float dred_frac;
+   int bitrate_offset;
+   opus_int32 dred_bitrate;
+   opus_int32 target_dred_bitrate;
+   opus_int32 max_dred_bitrate;
+   if (st->dred_duration > 0) max_dred_bitrate = (120 + 6*st->dred_duration)*st->Fs/frame_size;
+   else max_dred_bitrate = 0;
+   dred_frac = MIN16(.75f, 3.f*st->silk_mode.packetLossPercentage/100.f);
+   bitrate_offset = st->silk_mode.useInBandFEC ? 18000 : 12000;
+   target_dred_bitrate = IMAX(0, (int)(dred_frac*(bitrate_bps-bitrate_offset)));
+   dred_bitrate = IMIN(target_dred_bitrate, max_dred_bitrate);
+   return dred_bitrate;
+}
+#endif
+
 static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
 {
   if(!frame_size)frame_size=st->Fs/400;
@@ -1101,6 +1119,9 @@
     int analysis_read_subframe_bak=-1;
     int is_silence = 0;
 #endif
+#ifdef ENABLE_NEURAL_FEC
+    opus_int32 dred_bitrate_bps;
+#endif
     opus_int activity = VAD_NO_DECISION;
 
     VARDECL(opus_val16, tmp_prefill);
@@ -1235,6 +1256,11 @@
        /* Make sure we provide at least one byte to avoid failing. */
        max_data_bytes = IMAX(1, cbrBytes);
     }
+#ifdef ENABLE_NEURAL_FEC
+    /* Allocate some of the bits to DRED if needed. */
+    dred_bitrate_bps = compute_dred_bitrate(st, st->bitrate_bps, frame_size);
+    st->bitrate_bps -= dred_bitrate_bps;
+#endif
     if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
        || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
     {
@@ -1816,7 +1842,10 @@
         {
            if (st->mode == MODE_HYBRID)
            {
-              st->silk_mode.maxBits = IMIN(st->silk_mode.maxBits, st->silk_mode.bitRate * frame_size / st->Fs);
+              /* Allow SILK to steal up to 25% of the remaining bits */
+              opus_int16 other_bits = IMAX(0, st->silk_mode.maxBits - st->silk_mode.bitRate * frame_size / st->Fs);
+              st->silk_mode.maxBits = IMAX(0, st->silk_mode.maxBits - other_bits*3/4);
+              st->silk_mode.useCBR = 0;
            }
         } else {
            /* Constrained VBR. */
@@ -1929,26 +1958,10 @@
     if (st->mode != MODE_SILK_ONLY)
     {
         opus_val32 celt_pred=2;
-        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
         /* We may still decide to disable prediction later */
         if (st->silk_mode.reducedDependency)
            celt_pred = 0;
         celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
-
-        if (st->mode == MODE_HYBRID)
-        {
-            if( st->use_vbr ) {
-                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
-                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
-            }
-        } else {
-            if (st->use_vbr)
-            {
-                celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
-                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
-                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
-            }
-        }
     }
 
     ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
@@ -2042,8 +2055,21 @@
         ec_enc_done(&enc);
         nb_compr_bytes = ret;
     } else {
-       nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
-       ec_enc_shrink(&enc, nb_compr_bytes);
+        nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
+#ifdef ENABLE_NEURAL_FEC
+        if (st->dred_duration > 0)
+        {
+            int max_celt_bytes;
+            opus_int32 dred_bytes = dred_bitrate_bps/(frame_rate*8);
+            /* Allow CELT to steal up to 25% of the remaining bits. */
+            max_celt_bytes = nb_compr_bytes - dred_bytes*3/4;
+            /* But try to give CELT at least 4 bytes */
+            max_celt_bytes = IMAX(ec_tell(&enc)/8 + 4, max_celt_bytes);
+            /* Subject to the original max. */
+            nb_compr_bytes = IMIN(nb_compr_bytes, max_celt_bytes);
+        }
+#endif
+        ec_enc_shrink(&enc, nb_compr_bytes);
     }
 
 #ifndef DISABLE_FLOAT_API
@@ -2078,6 +2104,34 @@
 
     if (st->mode != MODE_SILK_ONLY)
     {
+        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
+        if (st->mode == MODE_HYBRID)
+        {
+            if( st->use_vbr ) {
+                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
+            }
+        } else {
+            if (st->use_vbr)
+            {
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
+                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
+            }
+        }
+#ifdef ENABLE_NEURAL_FEC
+        /* When Using DRED CBR, we can actually make the CELT part VBR and have DRED pick up the slack. */
+        if (!st->use_vbr && st->dred_duration > 0)
+        {
+            opus_int32 celt_bitrate = st->bitrate_bps;
+            celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+            celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
+            if (st->mode == MODE_HYBRID) {
+                celt_bitrate -= st->silk_mode.bitRate;
+            }
+            celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(celt_bitrate));
+        }
+#endif
         if (st->mode != st->prev_mode && st->prev_mode > 0)
         {
            unsigned char dummy[2];
@@ -2090,10 +2144,6 @@
         /* If false, we already busted the budget and we'll end up with a "PLC frame" */
         if (ec_tell(&enc) <= 8*nb_compr_bytes)
         {
-           /* Set the bitrate again if it was overridden in the redundancy code above*/
-           if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr)
-              celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
-           celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
            ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
            if (ret < 0)
            {
@@ -2104,7 +2154,7 @@
            if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr)
            {
               OPUS_MOVE(data+ret, data+nb_compr_bytes, redundancy_bytes);
-              nb_compr_bytes = nb_compr_bytes+redundancy_bytes;
+              nb_compr_bytes = ret+redundancy_bytes;
            }
         }
     }
@@ -2206,7 +2256,8 @@
        int dred_chunks;
        int dred_bytes_left;
        dred_chunks = IMIN((st->dred_duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
-       dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-2);
+       /* Remaining space for DRED, accounting for cost the 3 extra bytes for code 3, padding length, and extension number. */
+       dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-3);
        /* Check whether we actually have something to encode. */
        if (dred_chunks >= 1 && dred_bytes_left >= DRED_MIN_BYTES+2) {
            int dred_bytes;
--