ref: 0b9f6bab8114131dc70a9474d552072682a1593a
parent: ae2ae5ead6be8ddbc40ecf7c31aae373c878ea92
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sun Jun 20 21:34:38 EDT 2021
Remove unnecessary mask in exp() approximation This isn't necessary since valid exponents can't flip the sign bit
--- a/dnn/vec_avx.h
+++ b/dnn/vec_avx.h
@@ -47,7 +47,6 @@
const __m256 log2_E = _mm256_set1_ps(1.44269504);
const __m256 max_in = _mm256_set1_ps(50.f);
const __m256 min_in = _mm256_set1_ps(-50.f);
- const __m256i mask = _mm256_set1_epi32(0x7fffffff);
__m256 XF, Y;
__m256i I;
X = _mm256_mul_ps(X, log2_E);
@@ -57,7 +56,7 @@
X = _mm256_sub_ps(X, XF);
Y = _mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_fmadd_ps(K3, X, K2), X, K1), X, K0);
I = _mm256_slli_epi32(I, 23);
- Y = _mm256_castsi256_ps(_mm256_and_si256(mask, _mm256_add_epi32(I, _mm256_castps_si256(Y))));
+ Y = _mm256_castsi256_ps(_mm256_add_epi32(I, _mm256_castps_si256(Y)));
return Y;
}
#else
--
⑨