shithub: libvpx

Download patch

ref: c77b1f5acd09852aff1ba09d7f371728a60634d7
parent: 778845da05b2d92e9802839bd5b438ea6112de16
author: James Zern <jzern@google.com>
date: Sat Apr 25 11:59:58 EDT 2015

vp9: RECON_AND_STORE4X4: remove dest offset

offsetting by a variable stride prevents instruction reordering,
resulting in poor assembly

Change-Id: Id62d6b3299cdd23f8c44f97b630abf4fea241446

--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -17,8 +17,7 @@
   d0 = _mm_unpacklo_epi8(d0, zero); \
   d0 = _mm_add_epi16(in_x, d0); \
   d0 = _mm_packus_epi16(d0, d0); \
-  *(int *)dest = _mm_cvtsi128_si32(d0); \
-  dest += stride; \
+  *(int *)(dest) = _mm_cvtsi128_si32(d0); \
 }
 
 void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
@@ -162,10 +161,10 @@
 
   dc_value = _mm_set1_epi16(a);
 
-  RECON_AND_STORE4X4(dest, dc_value);
-  RECON_AND_STORE4X4(dest, dc_value);
-  RECON_AND_STORE4X4(dest, dc_value);
-  RECON_AND_STORE4X4(dest, dc_value);
+  RECON_AND_STORE4X4(dest + 0 * stride, dc_value);
+  RECON_AND_STORE4X4(dest + 1 * stride, dc_value);
+  RECON_AND_STORE4X4(dest + 2 * stride, dc_value);
+  RECON_AND_STORE4X4(dest + 3 * stride, dc_value);
 }
 
 static INLINE void transpose_4x4(__m128i *res) {