shithub: libvpx

Download patch

ref: 540d910350121895597dab8c9c219aa28c0f64e4
parent: 5c2696c378fbbfe8ab894506e23fb79d596d0c43
author: Jingning Han <jingning@google.com>
date: Mon Jun 2 14:48:33 EDT 2014

Fix potential overflow issue in SSSE3 forward 8x8 2D-DCT

The SSSE3 implementation might find a potential overflow issue in
its second 1-D transform, if all input residual pixels are close to
255. This commit fixes the issue and re-enables the unit test on
the SSSE3 version.

Change-Id: I0520478abdab7afd3ff2842516bec951111e9b3c

--- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
@@ -23,6 +23,7 @@
 pw_%2_m%1:  dw  %2, -%1,  %2, -%1,  %2, -%1,  %2, -%1
 %endmacro
 
+TRANSFORM_COEFFS 11585,  11585
 TRANSFORM_COEFFS 15137,   6270
 TRANSFORM_COEFFS 16069,   3196
 TRANSFORM_COEFFS  9102,  13623
@@ -83,7 +84,7 @@
 %endmacro
 
 ; 1D forward 8x8 DCT transform
-%macro FDCT8_1D 0
+%macro FDCT8_1D 1
   SUM_SUB            0,  7,  9
   SUM_SUB            1,  6,  9
   SUM_SUB            2,  5,  9
@@ -92,14 +93,21 @@
   SUM_SUB            0,  3,  9
   SUM_SUB            1,  2,  9
   SUM_SUB            6,  5,  9
+%if %1 == 0
   SUM_SUB            0,  1,  9
+%endif
 
   BUTTERFLY_4X       2,  3,  6270,  15137,  m8,  9,  10
 
   pmulhrsw           m6, m12
   pmulhrsw           m5, m12
+%if %1 == 0
   pmulhrsw           m0, m12
   pmulhrsw           m1, m12
+%else
+  BUTTERFLY_4X       1,  0,  11585, 11585,  m8,  9,  10
+  SWAP               0,  1
+%endif
 
   SUM_SUB            4,  5,  9
   SUM_SUB            7,  6,  9
@@ -150,10 +158,10 @@
   psllw              m7, 2
 
   ; column transform
-  FDCT8_1D
+  FDCT8_1D  0
   TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
 
-  FDCT8_1D
+  FDCT8_1D  1
   TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
 
   DIVIDE_ROUND_2X   0, 1, 9, 10