ref: 82f48d368b41d8bc4286e1375419daacbd10dbca
parent: e7beaec3fb49df389b077799c5d1778ccb68610e
author: Jan Buethe <jbuethe@amazon.de>
date: Wed Sep 13 12:57:28 EDT 2023
removed trailing whitespace in fargan Signed-off-by: Jan Buethe <jbuethe@amazon.de>
--- a/dnn/torch/fargan/fargan.py
+++ b/dnn/torch/fargan/fargan.py
@@ -81,7 +81,7 @@
class GLU(nn.Module):
def __init__(self, feat_size):
super(GLU, self).__init__()
-
+
torch.manual_seed(5)
self.gate = weight_norm(nn.Linear(feat_size, feat_size, bias=False))
@@ -89,7 +89,7 @@
self.init_weights()
def init_weights(self):
-
+
for m in self.modules():
if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
@@ -96,9 +96,9 @@
nn.init.orthogonal_(m.weight.data)
def forward(self, x):
-
- out = x * torch.sigmoid(self.gate(x))
-
+
+ out = x * torch.sigmoid(self.gate(x))
+
return out
class FWConv(nn.Module):
@@ -160,7 +160,7 @@
self.subframe_size = subframe_size
self.nb_subframes = nb_subframes
self.cond_size = cond_size
-
+
#self.sig_dense1 = nn.Linear(4*self.subframe_size+self.passthrough_size+self.cond_size, self.cond_size, bias=False)
self.fwc0 = FWConv(4*self.subframe_size+80, self.cond_size)
self.sig_dense2 = nn.Linear(self.cond_size, self.cond_size, bias=False)
@@ -167,7 +167,7 @@
self.gru1 = nn.GRUCell(self.cond_size, self.cond_size, bias=False)
self.gru2 = nn.GRUCell(self.cond_size, self.cond_size, bias=False)
self.gru3 = nn.GRUCell(self.cond_size, self.cond_size, bias=False)
-
+
self.dense1_glu = GLU(self.cond_size)
self.dense2_glu = GLU(self.cond_size)
self.gru1_glu = GLU(self.cond_size)
@@ -174,7 +174,7 @@
self.gru2_glu = GLU(self.cond_size)
self.gru3_glu = GLU(self.cond_size)
self.ptaps_dense = nn.Linear(4*self.cond_size, 5)
-
+
self.sig_dense_out = nn.Linear(4*self.cond_size, self.subframe_size, bias=False)
self.gain_dense_out = nn.Linear(4*self.cond_size, 1)
@@ -184,7 +184,7 @@
def forward(self, cond, prev, exc_mem, phase, period, states, gain=None):
device = exc_mem.device
#print(cond.shape, prev.shape)
-
+
dump_signal(prev, 'prev_in.f32')
idx = 256-torch.clamp(period[:,None], min=self.subframe_size+2, max=254)
@@ -283,4 +283,3 @@
prev = out
states = [s.detach() for s in states]
return sig, states
-
--- a/dnn/torch/fargan/filters.py
+++ b/dnn/torch/fargan/filters.py
@@ -41,6 +41,6 @@
A = toeplitz_from_filter(a)
#print(A)
R = filter_iir_response(a, 5)
-
+
RA = toeplitz_from_filter(R)
print(RA)
--- a/dnn/torch/fargan/stft_loss.py
+++ b/dnn/torch/fargan/stft_loss.py
@@ -17,7 +17,7 @@
Returns:
Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
"""
-
+
#x_stft = torch.stft(x, fft_size, hop_size, win_length, window, return_complex=False)
#real = x_stft[..., 0]
#imag = x_stft[..., 1]
@@ -83,26 +83,26 @@
var_x = torch.var(x, dim=1, keepdim=True)
var_y = torch.var(y, dim=1, keepdim=True)
-
+
std_x = torch.std(x, dim=1, keepdim=True)
std_y = torch.std(y, dim=1, keepdim=True)
-
+
x_minus_mean = x - mean_x
y_minus_mean = y - mean_y
-
+
pearson_corr = torch.sum(x_minus_mean * y_minus_mean, dim=1, keepdim=True) / \
(torch.sqrt(torch.sum(x_minus_mean ** 2, dim=1, keepdim=True) + 1e-7) * \
torch.sqrt(torch.sum(y_minus_mean ** 2, dim=1, keepdim=True) + 1e-7))
-
+
numerator = 2.0 * pearson_corr * std_x * std_y
denominator = var_x + var_y + (mean_y - mean_x)**2
-
+
ccc = numerator/denominator
-
+
ccc_loss = F.l1_loss(1.0 - ccc, torch.zeros_like(ccc))'''
return error_loss #+ ccc_loss#+ ccc_loss
-
+
class STFTLoss(torch.nn.Module):
"""STFT loss module."""
--- a/dnn/torch/fargan/test_fargan.py
+++ b/dnn/torch/fargan/test_fargan.py
@@ -55,35 +55,35 @@
gamma = checkpoint['model_kwargs']['gamma']
def lpc_synthesis_one_frame(frame, filt, buffer, weighting_vector=np.ones(16)):
-
+
out = np.zeros_like(frame)
filt = np.flip(filt)
-
+
inp = frame[:]
-
-
+
+
for i in range(0, inp.shape[0]):
-
+
s = inp[i] - np.dot(buffer*weighting_vector, filt)
-
+
buffer[0] = s
-
+
buffer = np.roll(buffer, -1)
-
+
out[i] = s
-
+
return out
def inverse_perceptual_weighting (pw_signal, filters, weighting_vector):
-
+
#inverse perceptual weighting= H_preemph / W(z/gamma)
-
+
signal = np.zeros_like(pw_signal)
buffer = np.zeros(16)
num_frames = pw_signal.shape[0] //160
assert num_frames == filters.shape[0]
for frame_idx in range(0, num_frames):
-
+
in_frame = pw_signal[frame_idx*160: (frame_idx+1)*160][:]
out_sig_frame = lpc_synthesis_one_frame(in_frame, filters[frame_idx, :], buffer, weighting_vector)
signal[frame_idx*160: (frame_idx+1)*160] = out_sig_frame[:]
@@ -97,11 +97,11 @@
features = torch.tensor(features).to(device)
#lpc = torch.tensor(lpc).to(device)
periods = torch.tensor(periods).to(device)
-
+
sig, _ = model(features, periods, nb_frames - 4)
weighting_vector = np.array([gamma**i for i in range(16,0,-1)])
sig = sig.detach().numpy().flatten()
sig = inverse_perceptual_weighting(sig, lpc[0,:,:], weighting_vector)
-
+
pcm = np.round(32768*np.clip(sig, a_max=.99, a_min=-.99)).astype('int16')
pcm.tofile(signal_file)
--- a/dnn/torch/fargan/train_fargan.py
+++ b/dnn/torch/fargan/train_fargan.py
@@ -141,9 +141,9 @@
loss.backward()
optimizer.step()
-
+
#model.clip_weights()
-
+
scheduler.step()
running_specc += specc_loss.detach().cpu().item()
--
⑨