ref: 5c22224e9e43befc414cce2cf163616c9d56b0d3
parent: 26a0721268cfbb921f8b2320ccecab2c3d8982f2
author: Peter de Rivaz <peter.derivaz@gmail.com>
date: Thu Dec 11 10:54:23 EST 2014
Corrected optimization of 8x8 DCT code The 8x8 DCT uses a fast version whenever possible. There was a mistake in the checking code which meant sometimes the fast version was used when it was not safe to do so. Change-Id: I154c84c9e2d836764768a11082947ca30f4b5ab7 (cherry picked from commit fd05fb0c21e253b4d6f92d7e0b752850ff8ab188)
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -4260,7 +4260,7 @@
// N.B. Only first 4 cols contain non-zero coeffs
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
- for (i = 2; i < 4; i++) {
+ for (i = 2; i < 8; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
--
⑨