shithub: opus

Download patch

ref: 969bd7662fd83ff1b0ad7d546449d7dcbbc0bb6c
parent: b2b2e226c3af2547e769731517c3ff7389e65026
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Fri Aug 13 23:58:11 EDT 2021

Adding API for PLC

Packet loss concealment based on LPCNet (work in progress)

--- a/dnn/Makefile.am
+++ b/dnn/Makefile.am
@@ -36,7 +36,8 @@
 	pitch.c \
 	freq.c \
 	kiss_fft.c \
-	celt_lpc.c
+	celt_lpc.c \
+	lpcnet_plc.c
 
 liblpcnet_la_LIBADD = $(DEPS_LIBS) $(lrintf_lib) $(LIBM)
 liblpcnet_la_LDFLAGS = -no-undefined \
--- a/dnn/include/lpcnet.h
+++ b/dnn/include/lpcnet.h
@@ -58,7 +58,9 @@
 
 typedef struct LPCNetEncState LPCNetEncState;
 
+typedef struct LPCNetPLCState LPCNetPLCState;
 
+
 /** Gets the size of an <code>LPCNetDecState</code> structure.
   * @returns The size in bytes.
   */
@@ -173,5 +175,16 @@
   * @retval 0 Success
   */
 LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *st, const float *features, short *output, int N);
+
+
+LPCNET_EXPORT void lpcnet_plc_init(LPCNetPLCState *st);
+
+LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create();
+
+LPCNET_EXPORT void lpcnet_plc_destroy(LPCNetPLCState *st);
+
+LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm);
+
+LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm);
 
 #endif
--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@@ -171,13 +171,9 @@
 }
 
 
-LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N)
+void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload)
 {
     int i;
-    float lpc[LPC_ORDER];
-    float gru_a_condition[3*GRU_A_STATE_SIZE];
-    float gru_b_condition[3*GRU_B_STATE_SIZE];
-    run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, features);
 
     if (lpcnet->frame_count <= FEATURES_DELAY)
     {
@@ -192,10 +188,11 @@
         int last_sig_ulaw;
         int pred_ulaw;
         float pred = 0;
-        for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpc[j];
+        for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
         last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
         pred_ulaw = lin2ulaw(pred);
-        exc = run_sample_network(&lpcnet->nnet, gru_a_condition, gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
+        exc = run_sample_network(&lpcnet->nnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
+        if (i < preload) exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
         pcm = pred + ulaw2lin(exc);
         RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
         lpcnet->last_sig[0] = pcm;
@@ -208,6 +205,15 @@
     }
 }
 
+void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload)
+{
+    run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
+    lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
+}
+
+LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N) {
+    lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
+}
 
 LPCNET_EXPORT int lpcnet_decoder_get_size()
 {
--- a/dnn/lpcnet_demo.c
+++ b/dnn/lpcnet_demo.c
@@ -38,16 +38,19 @@
 #define MODE_DECODE 1
 #define MODE_FEATURES 2
 #define MODE_SYNTHESIS 3
+#define MODE_PLC 4
 
 int main(int argc, char **argv) {
     int mode;
+    int plc_percent=0;
     FILE *fin, *fout;
-    if (argc != 4)
+    if (argc != 4 && !(argc == 5 && strcmp(argv[1], "-plc") == 0))
     {
         fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");
         fprintf(stderr, "       lpcnet_demo -decode <compressed.lpcnet> <output.pcm>\n");
         fprintf(stderr, "       lpcnet_demo -features <input.pcm> <features.f32>\n");
         fprintf(stderr, "       lpcnet_demo -synthesis <features.f32> <output.pcm>\n");
+        fprintf(stderr, "       lpcnet_demo -plc <percent> <input.pcm> <output.pcm>\n");
         return 0;
     }
     if (strcmp(argv[1], "-encode") == 0) mode=MODE_ENCODE;
@@ -54,7 +57,11 @@
     else if (strcmp(argv[1], "-decode") == 0) mode=MODE_DECODE;
     else if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
     else if (strcmp(argv[1], "-synthesis") == 0) mode=MODE_SYNTHESIS;
-    else {
+    else if (strcmp(argv[1], "-plc") == 0) {
+        mode=MODE_PLC;
+        plc_percent = atoi(argv[2]);
+        argv++;
+    } else {
         exit(1);
     }
     fin = fopen(argv[2], "rb");
@@ -123,6 +130,23 @@
             fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
         }
         lpcnet_destroy(net);
+    } else if (mode == MODE_PLC) {
+        int count=0;
+        int loss=0;
+        LPCNetPLCState *net;
+        net = lpcnet_plc_create();
+        while (1) {
+            short pcm[FRAME_SIZE];
+            size_t ret;
+            ret = fread(pcm, sizeof(pcm[0]), FRAME_SIZE, fin);
+            if (feof(fin) || ret != FRAME_SIZE) break;
+            if (count % 2 == 0) loss = rand() < RAND_MAX*(float)plc_percent/100.f;
+            if (loss) lpcnet_plc_conceal(net, pcm);
+            else lpcnet_plc_update(net, pcm);
+            fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
+            count++;
+        }
+        lpcnet_plc_destroy(net);
     } else {
         fprintf(stderr, "unknown action\n");
     }
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -894,7 +894,7 @@
   return 0;
 }
 
-LPCNET_EXPORT int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]) {
+int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]) {
   int i;
   float x[FRAME_SIZE];
   for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
--- /dev/null
+++ b/dnn/lpcnet_plc.c
@@ -1,0 +1,131 @@
+/* Copyright (c) 2021 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+
+LPCNET_EXPORT void lpcnet_plc_init(LPCNetPLCState *st) {
+  lpcnet_init(&st->lpcnet);
+  lpcnet_encoder_init(&st->enc);
+  RNN_CLEAR(st->pcm, PLC_BUF_SIZE);
+  st->pcm_fill = PLC_BUF_SIZE;
+  st->skip_analysis = 0;
+  st->blend = 0;
+}
+
+LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create() {
+  LPCNetPLCState *st;
+  st = malloc(sizeof(*st));
+  lpcnet_plc_init(st);
+  return st;
+}
+
+LPCNET_EXPORT void lpcnet_plc_destroy(LPCNetPLCState *st) {
+  free(st);
+}
+
+LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
+  int i;
+  float x[FRAME_SIZE];
+  short output[FRAME_SIZE];
+  st->enc.pcount = 0;
+  if (st->skip_analysis) {
+    //fprintf(stderr, "skip update\n");
+    if (st->blend) {
+      short tmp[FRAME_SIZE-TRAINING_OFFSET];
+      lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
+      for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) {
+        float w;
+        w = .5 - .5*cos(M_PI*i/(FRAME_SIZE-TRAINING_OFFSET));
+        pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*tmp[i]);
+      }
+      st->blend = 0;
+      RNN_COPY(st->pcm, &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
+      st->pcm_fill = TRAINING_OFFSET;
+    } else {
+      RNN_COPY(&st->pcm[st->pcm_fill], pcm, FRAME_SIZE);
+      st->pcm_fill += FRAME_SIZE;
+    }
+    //fprintf(stderr, "fill at %d\n", st->pcm_fill);
+  }
+  /* Update state. */
+  //fprintf(stderr, "update state\n");
+  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+  preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+  compute_frame_features(&st->enc, x);
+  process_single_frame(&st->enc, NULL);
+  if (st->skip_analysis) {
+    float lpc[LPC_ORDER];
+    float gru_a_condition[3*GRU_A_STATE_SIZE];
+    float gru_b_condition[3*GRU_B_STATE_SIZE];
+    /* FIXME: backtrack state, replace features. */
+    run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);
+    st->skip_analysis--;
+  } else {
+    for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];
+    RNN_COPY(output, &st->pcm[0], FRAME_SIZE);
+    lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);
+
+    RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
+  }
+  RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);
+  return 0;
+}
+
+LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
+  short output[FRAME_SIZE];
+  st->enc.pcount = 0;
+  /* If we concealed the previous frame, finish synthesizing the rest of the samples. */
+  /* FIXME: Copy/predict features. */
+  while (st->pcm_fill > 0) {
+    //fprintf(stderr, "update state for PLC %d\n", st->pcm_fill);
+    int update_count;
+    update_count = IMIN(st->pcm_fill, FRAME_SIZE);
+    RNN_COPY(output, &st->pcm[0], update_count);
+
+    lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], output, update_count, update_count);
+    RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
+    st->pcm_fill -= update_count;
+    st->skip_analysis++;
+  }
+  lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);
+  lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
+  {
+    int i;
+    float x[FRAME_SIZE];
+    /* FIXME: Can we do better? */
+    for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+    preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+    compute_frame_features(&st->enc, x);
+    process_single_frame(&st->enc, NULL);
+  }
+  st->blend = 1;
+  return 0;
+}
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -32,8 +32,11 @@
     float old_lpc[FEATURES_DELAY][LPC_ORDER];
 #endif
     float sampling_logit_table[256];
+    float gru_a_condition[3*GRU_A_STATE_SIZE];
+    float gru_b_condition[3*GRU_B_STATE_SIZE];
     int frame_count;
     float deemph_mem;
+    float lpc[LPC_ORDER];
     kiss99_ctx rng;
 };
 
@@ -63,6 +66,16 @@
   int exc_mem;
 };
 
+#define PLC_BUF_SIZE (FEATURES_DELAY*FRAME_SIZE + TRAINING_OFFSET)
+struct LPCNetPLCState {
+  LPCNetState lpcnet;
+  LPCNetEncState enc;
+  short pcm[PLC_BUF_SIZE+FRAME_SIZE];
+  int pcm_fill;
+  int skip_analysis;
+  int blend;
+  float features[NB_TOTAL_FEATURES];
+};
 
 extern float ceps_codebook1[];
 extern float ceps_codebook2[];
@@ -78,6 +91,13 @@
 void compute_frame_features(LPCNetEncState *st, const float *in);
 
 void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);
+
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
+void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload);
+void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload);
+void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const short *pcm_in, short *output, int N);
+void process_single_frame(LPCNetEncState *st, FILE *ffeat);
+int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]);
 
 void process_single_frame(LPCNetEncState *st, FILE *ffeat);
 
--