shithub: libvpx

--- a/test/predict_test.cc

+++ b/test/predict_test.cc

@@ -146,9 +146,15 @@

   void TestWithRandomData(PredictFunc reference) {

     ACMRandom rnd(ACMRandom::DeterministicSeed());

-    // Run tests for all possible offsets.

+    // Run tests for almost all possible offsets.

     for (int xoffset = 0; xoffset < 8; ++xoffset) {

       for (int yoffset = 0; yoffset < 8; ++yoffset) {

+        if (xoffset == 0 && yoffset == 0) {

+          // This represents a copy which is not required to be handled by this

+          // module.

+          continue;

+        }

         for (int i = 0; i < kSrcSize; ++i) {

           src_[i] = rnd.Rand8();

@@ -172,6 +178,9 @@

     if (width_ == 4 && height_ == 4) {

       for (int xoffset = 0; xoffset < 8; ++xoffset) {

         for (int yoffset = 0; yoffset < 8; ++yoffset) {

+          if (xoffset == 0 && yoffset == 0) {

+            continue;

+          }

           for (int i = 0; i < kSrcSize; ++i) {

             src_[i] = rnd.Rand8();

@@ -354,7 +363,7 @@

 #endif

 #if HAVE_SSSE3

 INSTANTIATE_TEST_CASE_P(

-    DISABLED_SSSE3, BilinearPredictTest,

+    SSSE3, BilinearPredictTest,

     ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3),

                       make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3)));

 #endif

--- a/vp8/common/filter.c

+++ b/vp8/common/filter.c

@@ -8,8 +8,9 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "filter.h"

+#include <assert.h>

 #include "./vp8_rtcd.h"

+#include "vp8/common/filter.h"

 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = {

   { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },

@@ -324,27 +325,11 @@

   const short *HFilter;

   const short *VFilter;

+  // This represents a copy and is not required to be handled by optimizations.

+  assert((xoffset | yoffset) != 0);

   HFilter = vp8_bilinear_filters[xoffset];

   VFilter = vp8_bilinear_filters[yoffset];

-#if 0

-    {

-        int i;

-        unsigned char temp1[16];

-        unsigned char temp2[16];

-        bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);

-        filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);

-        for (i = 0; i < 16; ++i)

-        {

-            if (temp1[i] != temp2[i])

-            {

-                bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);

-                filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);

-            }

-        }

-    }

-#endif

   filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,

                      VFilter, 4, 4);

@@ -355,6 +340,8 @@

   const short *HFilter;

   const short *VFilter;

+  assert((xoffset | yoffset) != 0);

   HFilter = vp8_bilinear_filters[xoffset];

   VFilter = vp8_bilinear_filters[yoffset];

@@ -368,6 +355,8 @@

   const short *HFilter;

   const short *VFilter;

+  assert((xoffset | yoffset) != 0);

   HFilter = vp8_bilinear_filters[xoffset];

   VFilter = vp8_bilinear_filters[yoffset];

@@ -381,6 +370,8 @@

                                  int dst_pitch) {

   const short *HFilter;

   const short *VFilter;

+  assert((xoffset | yoffset) != 0);

   HFilter = vp8_bilinear_filters[xoffset];

   VFilter = vp8_bilinear_filters[yoffset];

--- a/vp8/common/x86/subpixel_ssse3.asm

+++ b/vp8/common/x86/subpixel_ssse3.asm

@@ -1291,6 +1291,8 @@

         movq        xmm7,       XMMWORD PTR [rsp+96]

         punpcklbw   xmm5,       xmm6

+        ; Because the source register (xmm0) is always treated as signed by

+        ; pmaddubsw, the constant '128' is treated as '-128'.

         pmaddubsw   xmm1,       xmm0

         pmaddubsw   xmm2,       xmm0

@@ -1319,6 +1321,10 @@

         psraw       xmm5,       VP8_FILTER_SHIFT

         psraw       xmm6,       VP8_FILTER_SHIFT

+        ; Having multiplied everything by '-128' and obtained negative

+        ; numbers, the unsigned saturation truncates those values to 0,

+        ; resulting in incorrect handling of xoffset == 0 && yoffset == 0

         packuswb    xmm1,       xmm1

         packuswb    xmm2,       xmm2

--

⑨