shithub: opus

Download patch

ref: 0f7085439681d3709a2dc447cf2b3db14bf154e6
parent: 5b547e0d2e95c49f332e8605f2dd57273b5d5664
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Mon Jun 5 12:12:06 EDT 2023

Handle DRED frames != 20 ms

--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -49,11 +49,12 @@
     RNN_CLEAR((char*)&enc->DREDENC_RESET_START,
               sizeof(DREDEnc)-
               ((char*)&enc->DREDENC_RESET_START - (char*)enc));
+    enc->input_buffer_fill = DRED_SILK_ENCODER_DELAY;
     lpcnet_encoder_init(&enc->lpcnet_enc_state);
     DRED_rdovae_init_encoder(&enc->rdovae_enc);
 }
 
-void dred_encoder_init(DREDEnc* enc, int Fs, int channels)
+void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels)
 {
     enc->Fs = Fs;
     enc->channels = channels;
@@ -63,15 +64,10 @@
     dred_encoder_reset(enc);
 }
 
-void dred_process_silk_frame(DREDEnc *enc, const float *silk_frame)
+static void dred_process_frame(DREDEnc *enc)
 {
-    int i;
     float feature_buffer[2 * 36];
-
     float input_buffer[2*DRED_NUM_FEATURES] = {0};
-    /* delay signal by 79 samples */
-    OPUS_MOVE(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY);
-    for (i=0;i<DRED_DFRAME_SIZE;i++) enc->input_buffer[DRED_SILK_ENCODER_DELAY+i] = FLOAT2INT16(silk_frame[i]);
 
     /* shift latents buffer */
     OPUS_MOVE(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM);
@@ -87,6 +83,28 @@
     /* run RDOVAE encoder */
     DRED_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer);
     enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
+}
+
+void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size)
+{
+    int frame_size16k = frame_size * 16000 / enc->Fs;
+    while (frame_size16k > 0) {
+        int i;
+        int process_size16k;
+        int process_size;
+        process_size16k = IMIN(2*DRED_FRAME_SIZE - enc->input_buffer_fill, frame_size16k);
+        process_size = process_size16k * enc->Fs / 16000;
+        for (i=0;i<process_size16k;i++) enc->input_buffer[enc->input_buffer_fill+i] = FLOAT2INT16(pcm[i]);
+        enc->input_buffer_fill += process_size16k;
+        if (enc->input_buffer_fill == 2*DRED_FRAME_SIZE)
+        {
+          dred_process_frame(enc);
+          enc->input_buffer_fill = 0;
+        }
+
+        pcm += process_size;
+        frame_size16k -= process_size;
+    }
 }
 
 int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes) {
--- a/silk/dred_encoder.h
+++ b/silk/dred_encoder.h
@@ -38,11 +38,12 @@
 
 typedef struct {
     RDOVAEEnc model;
-    int Fs;
+    opus_int32 Fs;
     int channels;
 
 #define DREDENC_RESET_START input_buffer
-    float input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY];
+    float input_buffer[DRED_DFRAME_SIZE];
+    int input_buffer_fill;
     float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
     int latents_buffer_fill;
     float state_buffer[24];
@@ -51,12 +52,12 @@
 } DREDEnc;
 
 
-void dred_encoder_init(DREDEnc* enc, int Fs, int channels);
+void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels);
 void dred_encoder_reset(DREDEnc* enc);
 
 void dred_deinit_encoder(DREDEnc *enc);
 
-void dred_process_silk_frame(DREDEnc *enc, const float *silk_frame);
+void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size);
 
 int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes);
 
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1829,7 +1829,7 @@
 #ifdef ENABLE_NEURAL_FEC
             if ( st->dred_duration > 0 ) {
                 /* DRED Encoder */
-                dred_process_silk_frame( &st->dred_encoder, &pcm_buf[total_buffer*st->channels] );
+                dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size );
             } else {
                 st->dred_encoder.latents_buffer_fill = 0;
             }
--