shithub: opus

Download patch

ref: 7d8b00f11d2ae57128ce8f235f341da36bc6f7bf
parent: e8f70128d50de17bff79bf169162c03ff86eb2cb
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Jul 7 23:33:44 EDT 2021

Sampling directly from the logit

Avoids having to compute a sigmoid

--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@@ -77,7 +77,7 @@
     if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
 }
 
-int run_sample_network(NNetState *net, const float *condition, const float *gru_a_condition, int last_exc, int last_sig, int pred)
+int run_sample_network(NNetState *net, const float *condition, const float *gru_a_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table)
 {
     float gru_a_input[3*GRU_A_STATE_SIZE];
     float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
@@ -94,7 +94,7 @@
     RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
     RNN_COPY(&in_b[GRU_A_STATE_SIZE], condition, FEATURE_DENSE2_OUT_SIZE);
     compute_gru2(&gru_b, net->gru_b_state, in_b);
-    return sample_mdense(&dual_fc, net->gru_b_state);
+    return sample_mdense(&dual_fc, net->gru_b_state, sampling_logit_table);
 }
 
 LPCNET_EXPORT int lpcnet_get_size()
@@ -104,8 +104,13 @@
 
 LPCNET_EXPORT int lpcnet_init(LPCNetState *lpcnet)
 {
+    int i;
     memset(lpcnet, 0, lpcnet_get_size());
     lpcnet->last_exc = lin2ulaw(0.f);
+    for (i=0;i<256;i++) {
+        float prob = .025+.95*i/255.;
+        lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
+    }
     return 0;
 }
 
@@ -155,7 +160,7 @@
         for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpc[j];
         last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
         pred_ulaw = lin2ulaw(pred);
-        exc = run_sample_network(&lpcnet->nnet, condition, gru_a_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw);
+        exc = run_sample_network(&lpcnet->nnet, condition, gru_a_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table);
         pcm = pred + ulaw2lin(exc);
         RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
         lpcnet->last_sig[0] = pcm;
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -29,6 +29,7 @@
     float old_input[FEATURES_DELAY][FEATURE_CONV2_OUT_SIZE];
     float old_lpc[FEATURES_DELAY][LPC_ORDER];
     float old_gain[FEATURES_DELAY];
+    float sampling_logit_table[256];
     int frame_count;
     float deemph_mem;
 };
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -141,7 +141,7 @@
    compute_activation(output, output, N, layer->activation);
 }
 
-int sample_mdense(const MDenseLayer *layer, const float *input)
+int sample_mdense(const MDenseLayer *layer, const float *input, const float *sampling_logit_table)
 {
    int b, j, N, M, C, stride;
    M = layer->nb_inputs;
@@ -152,7 +152,12 @@
    
    celt_assert(N <= DUAL_FC_OUT_SIZE);
    int val=0;
-    
+   float thresholds[8];
+
+   /* Computing all the random thresholds in advance. These thresholds are directly
+      based on the logit to avoid computing the sigmoid.*/
+   for (b=0;b<8;b++) thresholds[b] = sampling_logit_table[rand()&0xFF];
+
    for (b=0;b<8;b++)
    {
       int bit;
@@ -171,9 +176,12 @@
       sum2 = layer->factor[N + i]*tanh_approx(sum2);
       sum1 += sum2;
       //sum1 = 1.f/(1 + exp(-sum1));
+#if 1 /* Sample the decision based on the logit. */
+      bit = thresholds[b] < sum1;
+#else
       sum1 = sigmoid_approx(sum1);
-      
       bit = .025+.95*((rand()+.5f)/(RAND_MAX+1.f)) < sum1;
+#endif
       val = (val << 1) | bit;
    }
    return val;
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -97,7 +97,7 @@
 
 void compute_mdense(const MDenseLayer *layer, float *output, const float *input);
 
-int sample_mdense(const MDenseLayer *layer,  const float *input);
+int sample_mdense(const MDenseLayer *layer,  const float *input, const float *sampling_logit_table);
 
 void compute_gru(const GRULayer *gru, float *state, const float *input);
 
--