ref: 8bdbbfa18d4697be7ba9fc47176809d669d33f77
parent: 4c0e2248655c3042c4cd93b3dd663cc798e5aa22
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Thu Jul 15 23:07:26 EDT 2021
Support for sparse GRU B input matrices Only on the C side, no sparse GRU B training yet
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -323,7 +323,7 @@
for (i=0;i<3*N;i++)
zrh[i] = gru->bias[i] + gru_b_condition[i];
#endif
- sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, stride, input);
+ sparse_sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, gru->input_weights_idx, input);
for (i=0;i<3*N;i++)
recur[i] = gru->bias[3*N + i];
sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -58,6 +58,7 @@
const float *bias;
const float *subias;
const qweight *input_weights;
+ const int *input_weights_idx;
const float *recurrent_weights;
int nb_inputs;
int nb_neurons;
--- a/dnn/training_tf2/dump_lpcnet.py
+++ b/dnn/training_tf2/dump_lpcnet.py
@@ -60,18 +60,20 @@
f.write('\n};\n\n')
return;
-def printSparseVector(f, A, name):
+def printSparseVector(f, A, name, have_diag=True):
N = A.shape[0]
+ M = A.shape[1]
W = np.zeros((0,), dtype='int')
W0 = np.zeros((0,))
- diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
- A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
- A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
- A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+ if have_diag:
+ diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
+ A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
+ A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
+ A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+ printVector(f, diag, name + '_diag')
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
- printVector(f, diag, name + '_diag')
idx = np.zeros((0,), dtype='int')
- for i in range(3*N//8):
+ for i in range(M//8):
pos = idx.shape[0]
idx = np.append(idx, -1)
nb_nonzero = 0
@@ -131,12 +133,7 @@
name = self.name
print("printing layer " + name + " of type " + self.__class__.__name__)
weights = self.get_weights()
- f.write('#ifdef DOT_PROD\n')
- qweight = np.clip(np.round(128.*weights[0][:gru_a_size, :]).astype('int'), -128, 127)
- printVector(f, qweight, name + '_weights', dotp=True, dtype='qweight')
- f.write('#else /*DOT_PROD*/\n')
- printVector(f, weights[0][:gru_a_size, :], name + '_weights')
- f.write('#endif /*DOT_PROD*/\n')
+ qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)
printVector(f, weights[1], name + '_recurrent_weights')
printVector(f, weights[-1], name + '_bias')
subias = weights[-1].copy()
@@ -152,8 +149,8 @@
reset_after = 1
neurons = weights[0].shape[1]//3
max_rnn_neurons = max(max_rnn_neurons, neurons)
- f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
- .format(name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
+ f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {}_weights_idx,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
+ .format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
hf.write('extern const GRULayer {};\n\n'.format(name));
return True
--
⑨