shithub: opus

--- a/dnn/dump_data.c

+++ b/dnn/dump_data.c

@@ -83,7 +83,7 @@

     float p=0;

     float e;

     int j;

-    for (j=0;j<LPC_ORDER;j++) p -= st->features[k][2*NB_BANDS+3+j]*st->sig_mem[j];

+    for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j];

     e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);

     /* Signal. */

     data[4*i] = lin2ulaw(st->sig_mem[0]);

--- a/dnn/include/lpcnet.h

+++ b/dnn/include/lpcnet.h

@@ -42,8 +42,8 @@

 #endif

-#define NB_FEATURES 38

-#define NB_TOTAL_FEATURES 55

+#define NB_FEATURES 20

+#define NB_TOTAL_FEATURES 36

 /** Number of bytes in a compressed packet. */

 #define LPCNET_COMPRESSED_SIZE 8

--- a/dnn/lpcnet.c

+++ b/dnn/lpcnet.c

@@ -139,7 +139,7 @@

     float gru_b_condition[3*GRU_B_STATE_SIZE];

     int pitch;

     /* Matches the Python code -- the 0.1 avoids rounding issues. */

-    pitch = (int)floor(.1 + 50*features[36]+100);

+    pitch = (int)floor(.1 + 50*features[18]+100);

     pitch = IMIN(255, IMAX(33, pitch));

     memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));

     lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];

--- a/dnn/lpcnet_dec.c

+++ b/dnn/lpcnet_dec.c

@@ -124,8 +124,8 @@

     float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;

     p *= 1 + modulation/16./7.*(2*sub-3);

     p = MIN16(255, MAX16(33, p));

-    features[sub][2*NB_BANDS] = .02*(p-100);

-    features[sub][2*NB_BANDS + 1] = frame_corr-.5;

+    features[sub][NB_BANDS] = .02*(p-100);

+    features[sub][NB_BANDS + 1] = frame_corr-.5;

   features[3][0] = (c0_id-64)/4.;

--- a/dnn/lpcnet_demo.c

+++ b/dnn/lpcnet_demo.c

@@ -115,7 +115,6 @@

             fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);

             if (feof(fin)) break;

             RNN_COPY(features, in_features, NB_FEATURES);

-            RNN_CLEAR(&features[18], 18);

             lpcnet_synthesize(net, features, pcm, LPCNET_FRAME_SIZE);

             fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);

--- a/dnn/lpcnet_enc.c

+++ b/dnn/lpcnet_enc.c

@@ -43,7 +43,7 @@

 #include "lpcnet.h"

-//#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)

+//#define NB_FEATURES (NB_BANDS+2+LPC_ORDER)

 #define SURVIVORS 5

@@ -499,7 +499,6 @@

   float E = 0;

   float Ly[NB_BANDS];

   float follow, logMax;

-  float g;

   kiss_fft_cpx X[FREQ_SIZE];

   float Ex[NB_BANDS];

   float xcorr[PITCH_MAX_PERIOD];

@@ -519,9 +518,8 @@

   dct(st->features[st->pcount], Ly);

   st->features[st->pcount][0] -= 4;

-  g = lpc_from_cepstrum(st->lpc, st->features[st->pcount]);

-  st->features[st->pcount][2*NB_BANDS+2] = log10(g);

-  for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][2*NB_BANDS+3+i] = st->lpc[i];

+  lpc_from_cepstrum(st->lpc, st->features[st->pcount]);

+  for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][NB_BANDS+2+i] = st->lpc[i];

   RNN_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);

   RNN_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);

   for (i=0;i<FRAME_SIZE;i++) {

@@ -663,13 +661,13 @@

       float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;

       p *= 1 + modulation/16./7.*(2*sub-3);

       p = MIN16(255, MAX16(33, p));

-      st->features[sub][2*NB_BANDS] = .02*(p-100);

-      st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;

+      st->features[sub][NB_BANDS] = .02*(p-100);

+      st->features[sub][NB_BANDS + 1] = frame_corr-.5;

     } else {

-      st->features[sub][2*NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

-      st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;

+      st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

+      st->features[sub][NB_BANDS + 1] = frame_corr-.5;

-    //printf("%f %d %f\n", st->features[sub][2*NB_BANDS], best[2+2*sub], frame_corr);

+    //printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);

   //printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);

   RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);

@@ -686,9 +684,8 @@

     perform_double_interp(st->features, st->vq_mem, interp_id);

   for (sub=0;sub<4;sub++) {

-    float g = lpc_from_cepstrum(st->lpc, st->features[sub]);

-    st->features[sub][2*NB_BANDS+2] = log10(g);

-    for (i=0;i<LPC_ORDER;i++) st->features[sub][2*NB_BANDS+3+i] = st->lpc[i];

+    lpc_from_cepstrum(st->lpc, st->features[sub]);

+    for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i];

   //printf("\n");

   RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);

--- a/dnn/test_lpcnet.c

+++ b/dnn/test_lpcnet.c

@@ -59,7 +59,6 @@

         fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);

         if (feof(fin)) break;

         RNN_COPY(features, in_features, NB_FEATURES);

-        RNN_CLEAR(&features[18], 18);

         lpcnet_synthesize(net, features, pcm, FRAME_SIZE);

         fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);

--- a/dnn/training_tf2/lpcnet.py

+++ b/dnn/training_tf2/lpcnet.py

@@ -212,7 +212,7 @@

 constraint = WeightClip(0.992)

-def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False):

+def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, training=False, adaptation=False, quantize=False):

     pcm = Input(shape=(None, 3))

     feat = Input(shape=(None, nb_used_features))

     pitch = Input(shape=(None, 1))

--- a/dnn/training_tf2/test_lpcnet.py

+++ b/dnn/training_tf2/test_lpcnet.py

@@ -40,7 +40,7 @@

 feature_file = sys.argv[1]

 out_file = sys.argv[2]

 frame_size = model.frame_size

-nb_features = 55

+nb_features = 36

 nb_used_features = model.nb_used_features

 features = np.fromfile(feature_file, dtype='float32')

@@ -50,12 +50,11 @@

 pcm_chunk_size = frame_size*feature_chunk_size

 features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))

-features[:,:,18:36] = 0

-periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')

+periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')

-model.load_weights('lpcnet34bq17_384_01.h5')

+model.load_weights('lpcnet38Sn_384_02.h5');

 order = 16

@@ -81,7 +80,7 @@

             p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])

             #Lower the temperature for voiced frames to reduce noisiness

-            p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 37] - .5))

+            p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5))

             p = p/(1e-18 + np.sum(p))

             #Cut off the tail of the remaining distribution

             p = np.maximum(p-0.002, 0).astype('float64')

--- a/dnn/training_tf2/train_lpcnet.py

+++ b/dnn/training_tf2/train_lpcnet.py

@@ -104,7 +104,7 @@

 feature_file = args.features

 pcm_file = args.data     # 16 bit unsigned short PCM samples

 frame_size = model.frame_size

-nb_features = 55

+nb_features = 36

 nb_used_features = model.nb_used_features

 feature_chunk_size = 15

 pcm_chunk_size = frame_size*feature_chunk_size

@@ -130,7 +130,6 @@

 features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))

 features = features[:, :, :nb_used_features]

-features[:,:,18:36] = 0

 fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)

 fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0)

@@ -137,7 +136,7 @@

 features = np.concatenate([fpad1, features, fpad2], axis=1)

-periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')

+periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')

 #periods = np.minimum(periods, 255)

 # dump models to disk as we go

--

⑨