shithub: opus

Download patch

ref: 4ccfbdff04cf8071084884174915d77996b4a3ad
parent: 538f25565a03989540f49e7a9a97a07cb29ee55a
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Nov 26 13:41:54 EST 2018

Frame network seems to be working

--- a/dnn/dump_lpcnet.py
+++ b/dnn/dump_lpcnet.py
@@ -135,6 +135,7 @@
             .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
     hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
+    hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
     hf.write('extern const Conv1DLayer {};\n\n'.format(name));
     return True
 Conv1D.dump_layer = dump_conv1d_layer
--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@@ -33,6 +33,8 @@
 #include "lpcnet.h"
 
 #define NB_FEATURES 38
+#define NB_TOTAL_FEATURES 55
+
 #define LPC_ORDER 16
 
 
@@ -43,10 +45,12 @@
 
 #define SAMPLE_INPUT_SIZE (2*EMBED_SIG_OUT_SIZE + EMBED_EXC_OUT_SIZE + FEATURE_DENSE2_OUT_SIZE)
 
+#define FEATURES_DELAY (FEATURE_CONV1_DELAY + FEATURE_CONV2_DELAY)
 struct LPCNetState {
     NNetState nnet;
     int last_exc;
     short last_sig[LPC_ORDER];
+    float old_input[FEATURES_DELAY][FEATURE_CONV2_OUT_SIZE];
 };
 
 
@@ -73,19 +77,23 @@
     return (int)floor(.5 + u);
 }
 
-void run_frame_network(NNetState *net, float *condition, float *lpc, const float *features, int pitch)
+void run_frame_network(LPCNetState *lpcnet, float *condition, float *lpc, const float *features, int pitch)
 {
     int i;
+    NNetState *net;
     float in[FRAME_INPUT_SIZE];
     float conv1_out[FEATURE_CONV1_OUT_SIZE];
     float conv2_out[FEATURE_CONV2_OUT_SIZE];
     float dense1_out[FEATURE_DENSE1_OUT_SIZE];
+    net = &lpcnet->nnet;
     RNN_COPY(in, features, NB_FEATURES);
     compute_embedding(&embed_pitch, &in[NB_FEATURES], pitch);
     compute_conv1d(&feature_conv1, conv1_out, net->feature_conv1_state, in);
     compute_conv1d(&feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
     celt_assert(FRAME_INPUT_SIZE == FEATURE_CONV2_OUT_SIZE);
-    for (i=0;i<FEATURE_CONV2_OUT_SIZE;i++) conv2_out[i] += in[i];
+    for (i=0;i<FEATURE_CONV2_OUT_SIZE;i++) conv2_out[i] += lpcnet->old_input[FEATURES_DELAY-1][i];
+    memmove(lpcnet->old_input[1], lpcnet->old_input[0], (FEATURES_DELAY-1)*FRAME_INPUT_SIZE*sizeof(in[0]));
+    memcpy(lpcnet->old_input[0], in, FRAME_INPUT_SIZE*sizeof(in[0]));
     compute_dense(&feature_dense1, dense1_out, conv2_out);
     compute_dense(&feature_dense2, condition, dense1_out);
     /* FIXME: Actually compute the LPC on the middle frame. */
@@ -127,10 +135,11 @@
     float pdf[DUAL_FC_OUT_SIZE];
     int pitch;
     float pitch_gain;
-    pitch = (int)floor(.5 + 50*features[36]+100);
+    /* FIXME: Do proper rounding once the Python code rounds properly. */
+    pitch = (int)floor(50*features[36]+100);
     /* FIXME: get the pitch gain from 2 frames in the past. */
     pitch_gain = features[PITCH_GAIN_FEATURE];
-    run_frame_network(&lpcnet->nnet, condition, lpc, features, pitch);
+    run_frame_network(lpcnet, condition, lpc, features, pitch);
     for (i=0;i<N;i++)
     {
         int j;
@@ -154,13 +163,16 @@
 
 #if 1
 #define FRAME_SIZE 160
-int main(int argc, char **argv) {
+int main() {
     LPCNetState *net;
     net = lpcnet_create();
     while (1) {
+        float in_features[NB_TOTAL_FEATURES];
         float features[NB_FEATURES];
         short pcm[FRAME_SIZE];
-        fread(features, sizeof(features[0]), NB_FEATURES, stdin);
+        fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, stdin);
+        RNN_COPY(features, in_features, NB_FEATURES);
+        RNN_CLEAR(&features[18], 18);
         if (feof(stdin)) break;
         lpcnet_synthesize(net, pcm, features, FRAME_SIZE);
         fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, stdout);
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -109,7 +109,7 @@
       for (i=0;i<N;i++)
          output[i] = sum*output[i];
    } else {
-      celt_assert(layer->activation == ACTIVATION_LINEAR);
+      celt_assert(activation == ACTIVATION_LINEAR);
       for (i=0;i<N;i++)
          output[i] = input[i];
    }
@@ -231,6 +231,9 @@
 void compute_embedding(const EmbeddingLayer *layer, float *output, int input)
 {
    int i;
+   celt_assert(input >= 0);
+   celt_assert(input < layer->nb_inputs);
+   /*if (layer->dim == 64) printf("%d\n", input);*/
    for (i=0;i<layer->dim;i++)
    {
       output[i] = layer->embedding_weights[input*layer->dim + i];
--