shithub: opus

Download patch

ref: 37ddc0a8b48e89318f02b736fa7f09c230a2c37d
parent: 74b98437bad697c863fb3e764820ec730a89d957
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Dec 10 21:00:51 EST 2018

Add -test or -train option

-train does data augmentation, cuts silence, and outputs the preemphasis
-test only outputs the features for resynthesis

--- a/dnn/denoise.c
+++ b/dnn/denoise.c
@@ -633,7 +633,7 @@
   int gain_change_count=0;
   FILE *f1;
   FILE *ffeat;
-  FILE *fpcm;
+  FILE *fpcm=NULL;
   signed char iexc[FRAME_SIZE];
   short pred[FRAME_SIZE];
   short pcm[FRAME_SIZE];
@@ -644,25 +644,31 @@
   float old_speech_gain = 1;
   int one_pass_completed = 0;
   DenoiseState *st;
+  int training = -1;
   st = rnnoise_create();
-  if (argc!=4) {
-    fprintf(stderr, "usage: %s <speech> <features out>\n", argv[0]);
+  if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
+  if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
+  if (training == -1) {
+    fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv[0]);
+    fprintf(stderr, "  or   %s -test <speech> <features out>\n", argv[0]);
     return 1;
   }
-  f1 = fopen(argv[1], "r");
+  f1 = fopen(argv[2], "r");
   if (f1 == NULL) {
-      fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[1]);
-      exit(1);
+    fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
+    exit(1);
   }
-  ffeat = fopen(argv[2], "w");
+  ffeat = fopen(argv[3], "w");
   if (ffeat == NULL) {
-      fprintf(stderr,"Error opening output feature file: %s\n", argv[2]);
-      exit(1);
+    fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
+    exit(1);
   }
-  fpcm = fopen(argv[3], "w");
-  if (ffeat == NULL) {
-      fprintf(stderr,"Error opening output PCM file: %s\n", argv[2]);
+  if (training) {
+    fpcm = fopen(argv[4], "w");
+    if (fpcm == NULL) {
+      fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
       exit(1);
+    }
   }
   while (1) {
     kiss_fft_cpx X[FREQ_SIZE], P[WINDOW_SIZE];
@@ -676,28 +682,31 @@
     for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
     fread(tmp, sizeof(short), FRAME_SIZE, f1);
     if (feof(f1)) {
+      if (!training) break;
       rewind(f1);
       fread(tmp, sizeof(short), FRAME_SIZE, f1);
       one_pass_completed = 1;
     }
     for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];
-    silent = E < 5000 || (last_silent && E < 20000);
-    if (!last_silent && silent) {
-      for (i=0;i<FRAME_SIZE;i++) savedX[i] = x[i];
-    }
-    if (last_silent && !silent) {
-        for (i=0;i<FRAME_SIZE;i++) {
-          float f = (float)i/FRAME_SIZE;
-          tmp[i] = (int)floor(.5 + f*tmp[i] + (1-f)*savedX[i]);
-        }
-    }
-    if (last_silent) {
+    if (training) {
+      silent = E < 5000 || (last_silent && E < 20000);
+      if (!last_silent && silent) {
+        for (i=0;i<FRAME_SIZE;i++) savedX[i] = x[i];
+      }
+      if (last_silent && !silent) {
+          for (i=0;i<FRAME_SIZE;i++) {
+            float f = (float)i/FRAME_SIZE;
+            tmp[i] = (int)floor(.5 + f*tmp[i] + (1-f)*savedX[i]);
+          }
+      }
+      if (last_silent) {
+        last_silent = silent;
+        continue;
+      }
       last_silent = silent;
-      continue;
     }
-    last_silent = silent;
     if (count==5000000 && one_pass_completed) break;
-    if (++gain_change_count > 2821) {
+    if (training && ++gain_change_count > 2821) {
       speech_gain = pow(10., (-20+(rand()%40))/20.);
       if (rand()%20==0) speech_gain *= .01;
       if (rand()%100==0) speech_gain = 0;
@@ -716,7 +725,7 @@
     for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
     compute_frame_features(st, iexc, pred, pcm, X, P, Ex, Ep, Exp, features, x);
     fwrite(features, sizeof(float), NB_FEATURES, ffeat);
-    fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
+    if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
     old_speech_gain = speech_gain;
     count++;
   }
@@ -723,7 +732,7 @@
   //fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
   fclose(f1);
   fclose(ffeat);
-  fclose(fpcm);
+  if (fpcm) fclose(fpcm);
   return 0;
 }
 
--