shithub: opus

--- a/dnn/torch/fargan/fargan.py

+++ b/dnn/torch/fargan/fargan.py

@@ -81,7 +81,7 @@

 class GLU(nn.Module):

     def __init__(self, feat_size):

         super(GLU, self).__init__()

         torch.manual_seed(5)

         self.gate = weight_norm(nn.Linear(feat_size, feat_size, bias=False))

@@ -89,7 +89,7 @@

         self.init_weights()

     def init_weights(self):

         for m in self.modules():

             if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\

             or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):

@@ -96,9 +96,9 @@

                 nn.init.orthogonal_(m.weight.data)

     def forward(self, x):

-        out = x * torch.sigmoid(self.gate(x))

+        out = x * torch.sigmoid(self.gate(x))

         return out

 class FWConv(nn.Module):

@@ -160,7 +160,7 @@

         self.subframe_size = subframe_size

         self.nb_subframes = nb_subframes

         self.cond_size = cond_size

         #self.sig_dense1 = nn.Linear(4*self.subframe_size+self.passthrough_size+self.cond_size, self.cond_size, bias=False)

         self.fwc0 = FWConv(4*self.subframe_size+80, self.cond_size)

         self.sig_dense2 = nn.Linear(self.cond_size, self.cond_size, bias=False)

@@ -167,7 +167,7 @@

         self.gru1 = nn.GRUCell(self.cond_size, self.cond_size, bias=False)

         self.gru2 = nn.GRUCell(self.cond_size, self.cond_size, bias=False)

         self.gru3 = nn.GRUCell(self.cond_size, self.cond_size, bias=False)

         self.dense1_glu = GLU(self.cond_size)

         self.dense2_glu = GLU(self.cond_size)

         self.gru1_glu = GLU(self.cond_size)

@@ -174,7 +174,7 @@

         self.gru2_glu = GLU(self.cond_size)

         self.gru3_glu = GLU(self.cond_size)

         self.ptaps_dense = nn.Linear(4*self.cond_size, 5)

         self.sig_dense_out = nn.Linear(4*self.cond_size, self.subframe_size, bias=False)

         self.gain_dense_out = nn.Linear(4*self.cond_size, 1)

@@ -184,7 +184,7 @@

     def forward(self, cond, prev, exc_mem, phase, period, states, gain=None):

         device = exc_mem.device

         #print(cond.shape, prev.shape)

         dump_signal(prev, 'prev_in.f32')

         idx = 256-torch.clamp(period[:,None], min=self.subframe_size+2, max=254)

@@ -283,4 +283,3 @@

                 prev = out

         states = [s.detach() for s in states]

         return sig, states

--- a/dnn/torch/fargan/filters.py

+++ b/dnn/torch/fargan/filters.py

@@ -41,6 +41,6 @@

     A = toeplitz_from_filter(a)

     #print(A)

     R = filter_iir_response(a, 5)

     RA = toeplitz_from_filter(R)

     print(RA)

--- a/dnn/torch/fargan/stft_loss.py

+++ b/dnn/torch/fargan/stft_loss.py

@@ -17,7 +17,7 @@

     Returns:

         Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).

"""

     #x_stft = torch.stft(x, fft_size, hop_size, win_length, window, return_complex=False)

     #real = x_stft[..., 0]

     #imag = x_stft[..., 1]

@@ -83,26 +83,26 @@

         var_x = torch.var(x, dim=1, keepdim=True)

         var_y = torch.var(y, dim=1, keepdim=True)

         std_x = torch.std(x, dim=1, keepdim=True)

         std_y = torch.std(y, dim=1, keepdim=True)

         x_minus_mean = x - mean_x

         y_minus_mean = y - mean_y

         pearson_corr = torch.sum(x_minus_mean * y_minus_mean, dim=1, keepdim=True) / \

                     (torch.sqrt(torch.sum(x_minus_mean ** 2, dim=1, keepdim=True) + 1e-7) * \

                     torch.sqrt(torch.sum(y_minus_mean ** 2, dim=1, keepdim=True) + 1e-7))

         numerator = 2.0 * pearson_corr * std_x * std_y

         denominator = var_x + var_y + (mean_y - mean_x)**2

         ccc = numerator/denominator

         ccc_loss = F.l1_loss(1.0 - ccc, torch.zeros_like(ccc))'''

         return error_loss #+ ccc_loss#+ ccc_loss

 class STFTLoss(torch.nn.Module):

     """STFT loss module."""

--- a/dnn/torch/fargan/test_fargan.py

+++ b/dnn/torch/fargan/test_fargan.py

@@ -55,35 +55,35 @@

 gamma = checkpoint['model_kwargs']['gamma']

 def lpc_synthesis_one_frame(frame, filt, buffer, weighting_vector=np.ones(16)):

     out = np.zeros_like(frame)

     filt = np.flip(filt)

     inp = frame[:]

     for i in range(0, inp.shape[0]):

         s = inp[i] - np.dot(buffer*weighting_vector, filt)

         buffer[0] = s

         buffer = np.roll(buffer, -1)

         out[i] = s

     return out

 def inverse_perceptual_weighting (pw_signal, filters, weighting_vector):

     #inverse perceptual weighting= H_preemph / W(z/gamma)

     signal = np.zeros_like(pw_signal)

     buffer = np.zeros(16)

     num_frames = pw_signal.shape[0] //160

     assert num_frames == filters.shape[0]

     for frame_idx in range(0, num_frames):

         in_frame = pw_signal[frame_idx*160: (frame_idx+1)*160][:]

         out_sig_frame = lpc_synthesis_one_frame(in_frame, filters[frame_idx, :], buffer, weighting_vector)

         signal[frame_idx*160: (frame_idx+1)*160] = out_sig_frame[:]

@@ -97,11 +97,11 @@

     features = torch.tensor(features).to(device)

     #lpc = torch.tensor(lpc).to(device)

     periods = torch.tensor(periods).to(device)

     sig, _ = model(features, periods, nb_frames - 4)

     weighting_vector = np.array([gamma**i for i in range(16,0,-1)])

     sig = sig.detach().numpy().flatten()

     sig = inverse_perceptual_weighting(sig, lpc[0,:,:], weighting_vector)

     pcm = np.round(32768*np.clip(sig, a_max=.99, a_min=-.99)).astype('int16')

     pcm.tofile(signal_file)

--- a/dnn/torch/fargan/train_fargan.py

+++ b/dnn/torch/fargan/train_fargan.py

@@ -141,9 +141,9 @@

                 loss.backward()

                 optimizer.step()

                 #model.clip_weights()

                 scheduler.step()

                 running_specc += specc_loss.detach().cpu().item()

--

⑨