shithub: opus

Download patch

ref: 91d90676e19e815ab10265f9b64c2fc3688f54b5
parent: b05f950e38d22b6f4f097f67834b04c38d19a943
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sat Dec 1 07:05:23 EST 2018

Remove the need for useless exc and pred files

--- a/dnn/README.md
+++ b/dnn/README.md
@@ -19,7 +19,7 @@
 
 1. Then, run the resulting executable:
    ```
-   ./dump_data input.s16 exc.s8 features.f32 pred.s16 pcm.s16
+   ./dump_data input.s16 features.f32 pcm.s16
    ```
 
    where the first file contains 16 kHz 16-bit raw PCM audio (no header)
@@ -29,7 +29,7 @@
 
 1. Now that you have your files, you can do the training with:
    ```
-   ./train_lpcnet.py exc.s8 features.f32 pred.s16 pcm.s16
+   ./train_lpcnet.py features.f32 pcm.s16
    ```
    and it will generate a wavenet*.h5 file for each iteration. If it stops with a 
    "Failed to allocate RNN reserve space" message try reducing the *batch\_size* variable in train_wavenet_audio.py.
--- a/dnn/denoise.c
+++ b/dnn/denoise.c
@@ -579,9 +579,7 @@
   float mem_preemph=0;
   float x[FRAME_SIZE];
   FILE *f1;
-  FILE *fexc;
   FILE *ffeat;
-  FILE *fpred;
   FILE *fpcm;
   signed char iexc[FRAME_SIZE];
   short pred[FRAME_SIZE];
@@ -588,15 +586,13 @@
   short pcm[FRAME_SIZE];
   DenoiseState *st;
   st = rnnoise_create();
-  if (argc!=6) {
-    fprintf(stderr, "usage: %s <speech> <exc out> <features out> <prediction out> <pcm out> \n", argv[0]);
+  if (argc!=4) {
+    fprintf(stderr, "usage: %s <speech> <features out>\n", argv[0]);
     return 1;
   }
   f1 = fopen(argv[1], "r");
-  fexc = fopen(argv[2], "w");
-  ffeat = fopen(argv[3], "w");
-  fpred = fopen(argv[4], "w");
-  fpcm = fopen(argv[5], "w");
+  ffeat = fopen(argv[2], "w");
+  fpcm = fopen(argv[3], "w");
   while (1) {
     kiss_fft_cpx X[FREQ_SIZE], P[WINDOW_SIZE];
     float Ex[NB_BANDS], Ep[NB_BANDS];
@@ -617,17 +613,14 @@
     preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
 
     compute_frame_features(st, iexc, pred, pcm, X, P, Ex, Ep, Exp, features, x);
-#if 1
-    fwrite(iexc, sizeof(signed char), FRAME_SIZE, fexc);
     fwrite(features, sizeof(float), NB_FEATURES, ffeat);
-    fwrite(pred, sizeof(short), FRAME_SIZE, fpred);
     fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
-#endif
     count++;
   }
   //fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
   fclose(f1);
-  fclose(fexc);
+  fclose(ffeat);
+  fclose(fpcm);
   return 0;
 }
 
--- a/dnn/train_lpcnet.py
+++ b/dnn/train_lpcnet.py
@@ -56,10 +56,8 @@
 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
 model.summary()
 
-exc_file = sys.argv[1]     # not used at present
-feature_file = sys.argv[2]
-pred_file = sys.argv[3]    # LPC predictor samples. Not used at present, see below
-pcm_file = sys.argv[4]     # 16 bit unsigned short PCM samples
+feature_file = sys.argv[1]
+pcm_file = sys.argv[2]     # 16 bit unsigned short PCM samples
 frame_size = 160
 nb_features = 55
 nb_used_features = model.nb_used_features
@@ -96,8 +94,7 @@
 # Note: the LPC predictor output is now calculated by the loop below, this code was
 # for an ealier version that implemented the prediction filter in C
 
-upred = np.fromfile(pred_file, dtype='int16')
-upred = upred[:nb_frames*pcm_chunk_size]
+upred = np.zeros((nb_frames*pcm_chunk_size,), dtype='int16')
 
 # Use 16th order LPC to generate LPC prediction output upred[] and (in
 # mu-law form) pred[]
--