shithub: opus

--- a/dnn/README.md

+++ b/dnn/README.md

@@ -19,7 +19,7 @@

 1. Then, run the resulting executable:

```

-   ./dump_data input.s16 exc.s8 features.f32 pred.s16 pcm.s16

+   ./dump_data input.s16 features.f32 pcm.s16

```

    where the first file contains 16 kHz 16-bit raw PCM audio (no header)

@@ -29,7 +29,7 @@

 1. Now that you have your files, you can do the training with:

```

-   ./train_lpcnet.py exc.s8 features.f32 pred.s16 pcm.s16

+   ./train_lpcnet.py features.f32 pcm.s16

```

    and it will generate a wavenet*.h5 file for each iteration. If it stops with a

    "Failed to allocate RNN reserve space" message try reducing the *batch\_size* variable in train_wavenet_audio.py.

--- a/dnn/denoise.c

+++ b/dnn/denoise.c

@@ -579,9 +579,7 @@

   float mem_preemph=0;

   float x[FRAME_SIZE];

   FILE *f1;

-  FILE *fexc;

   FILE *ffeat;

-  FILE *fpred;

   FILE *fpcm;

   signed char iexc[FRAME_SIZE];

   short pred[FRAME_SIZE];

@@ -588,15 +586,13 @@

   short pcm[FRAME_SIZE];

   DenoiseState *st;

   st = rnnoise_create();

-  if (argc!=6) {

-    fprintf(stderr, "usage: %s <speech> <exc out> <features out> <prediction out> <pcm out> \n", argv[0]);

+  if (argc!=4) {

+    fprintf(stderr, "usage: %s <speech> <features out>\n", argv[0]);

     return 1;

   f1 = fopen(argv[1], "r");

-  fexc = fopen(argv[2], "w");

-  ffeat = fopen(argv[3], "w");

-  fpred = fopen(argv[4], "w");

-  fpcm = fopen(argv[5], "w");

+  ffeat = fopen(argv[2], "w");

+  fpcm = fopen(argv[3], "w");

   while (1) {

     kiss_fft_cpx X[FREQ_SIZE], P[WINDOW_SIZE];

     float Ex[NB_BANDS], Ep[NB_BANDS];

@@ -617,17 +613,14 @@

     preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);

     compute_frame_features(st, iexc, pred, pcm, X, P, Ex, Ep, Exp, features, x);

-#if 1

-    fwrite(iexc, sizeof(signed char), FRAME_SIZE, fexc);

     fwrite(features, sizeof(float), NB_FEATURES, ffeat);

-    fwrite(pred, sizeof(short), FRAME_SIZE, fpred);

     fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);

-#endif

     count++;

   //fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);

   fclose(f1);

-  fclose(fexc);

+  fclose(ffeat);

+  fclose(fpcm);

   return 0;

--- a/dnn/train_lpcnet.py

+++ b/dnn/train_lpcnet.py

@@ -56,10 +56,8 @@

 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

 model.summary()

-exc_file = sys.argv[1]     # not used at present

-feature_file = sys.argv[2]

-pred_file = sys.argv[3]    # LPC predictor samples. Not used at present, see below

-pcm_file = sys.argv[4]     # 16 bit unsigned short PCM samples

+feature_file = sys.argv[1]

+pcm_file = sys.argv[2]     # 16 bit unsigned short PCM samples

 frame_size = 160

 nb_features = 55

 nb_used_features = model.nb_used_features

@@ -96,8 +94,7 @@

 # Note: the LPC predictor output is now calculated by the loop below, this code was

 # for an ealier version that implemented the prediction filter in C

-upred = np.fromfile(pred_file, dtype='int16')

-upred = upred[:nb_frames*pcm_chunk_size]

+upred = np.zeros((nb_frames*pcm_chunk_size,), dtype='int16')

 # Use 16th order LPC to generate LPC prediction output upred[] and (in

 # mu-law form) pred[]

--

⑨