ref: deadda3deacf9a2d1664bce4d5f52fc591b36c01
parent: a914ffad978a6961189d6ffccefac04ab1c679e9
parent: fdd1186f9775cec72bcaaf98739b4d5f2e08b0ac
author: James Zern <jzern@google.com>
date: Thu Sep 22 22:49:26 EDT 2016
Merge "vpx_idct32x32_34_add_sse2: rm unneeded transposes"
--- a/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/vpx_dsp/x86/inv_txfm_sse2.c
@@ -3066,17 +3066,7 @@
in[6] = load_input_data(input + 192);
in[7] = load_input_data(input + 224);
- for (i = 8; i < 32; ++i) {
- in[i] = _mm_setzero_si128();
- }
-
array_transpose_8x8(in, in);
- // TODO(hkuang): Following transposes are unnecessary. But remove them will
- // lead to performance drop on some devices.
- array_transpose_8x8(in + 8, in + 8);
- array_transpose_8x8(in + 16, in + 16);
- array_transpose_8x8(in + 24, in + 24);
-
IDCT32_34
// 1_D: Store 32 intermediate results for each 8x32 block.