shithub: libvpx

--- a/test/variance_test.cc

+++ b/test/variance_test.cc

@@ -1540,4 +1540,12 @@

                         ::testing::Values(SseParams(2, 2,

                                                     &vpx_get4x4sse_cs_vsx)));

 #endif  // HAVE_VSX

+#if HAVE_MMI

+INSTANTIATE_TEST_CASE_P(MMI, VpxMseTest,

+                        ::testing::Values(MseParams(4, 4, &vpx_mse16x16_mmi),

+                                          MseParams(4, 3, &vpx_mse16x8_mmi),

+                                          MseParams(3, 4, &vpx_mse8x16_mmi),

+                                          MseParams(3, 3, &vpx_mse8x8_mmi)));

+#endif  // HAVE_MMI

 }  // namespace

--- /dev/null

+++ b/vpx_dsp/mips/variance_mmi.c

@@ -1,0 +1,146 @@

+/*

+ *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "./vpx_dsp_rtcd.h"

+#include "vpx_ports/mem.h"

+#include "vpx/vpx_integer.h"

+#include "vpx_ports/asmdefs_mmi.h"

+#define VARIANCE_SSE_8                                              \

+  "gsldlc1    %[ftmp1],   0x07(%[a])                          \n\t" \

+  "gsldrc1    %[ftmp1],   0x00(%[a])                          \n\t" \

+  "gsldlc1    %[ftmp2],   0x07(%[b])                          \n\t" \

+  "gsldrc1    %[ftmp2],   0x00(%[b])                          \n\t" \

+  "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \

+  "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \

+  "punpckhbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t" \

+  "pmaddhw    %[ftmp6],   %[ftmp4],       %[ftmp4]            \n\t" \

+  "pmaddhw    %[ftmp7],   %[ftmp5],       %[ftmp5]            \n\t" \

+  "paddw      %[ftmp8],   %[ftmp8],       %[ftmp6]            \n\t" \

+  "paddw      %[ftmp8],   %[ftmp8],       %[ftmp7]            \n\t"

+#define VARIANCE_SSE_16                                             \

+  VARIANCE_SSE_8                                                    \

+  "gsldlc1    %[ftmp1],   0x0f(%[a])                          \n\t" \

+  "gsldrc1    %[ftmp1],   0x08(%[a])                          \n\t" \

+  "gsldlc1    %[ftmp2],   0x0f(%[b])                          \n\t" \

+  "gsldrc1    %[ftmp2],   0x08(%[b])                          \n\t" \

+  "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \

+  "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \

+  "punpckhbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t" \

+  "pmaddhw    %[ftmp6],   %[ftmp4],       %[ftmp4]            \n\t" \

+  "pmaddhw    %[ftmp7],   %[ftmp5],       %[ftmp5]            \n\t" \

+  "paddw      %[ftmp8],   %[ftmp8],       %[ftmp6]            \n\t" \

+  "paddw      %[ftmp8],   %[ftmp8],       %[ftmp7]            \n\t"

+static inline uint32_t vpx_mse16x(const uint8_t *a, int a_stride,

+                                  const uint8_t *b, int b_stride, uint32_t *sse,

+                                  uint64_t high) {

+  double ftmp[12];

+  uint32_t tmp[1];

+  *sse = 0;

+  __asm__ volatile (

+    "li         %[tmp0],    0x20                                \n\t"

+    "mtc1       %[tmp0],    %[ftmp11]                           \n\t"

+    MMI_L(%[tmp0], %[high], 0x00)

+    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"

+    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"

+    "xor        %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"

+    "1:                                                         \n\t"

+    VARIANCE_SSE_16

+    "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"

+    MMI_ADDU(%[a], %[a], %[a_stride])

+    MMI_ADDU(%[b], %[b], %[b_stride])

+    "bnez       %[tmp0],    1b                                  \n\t"

+    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"

+    "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"

+    "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"

+    : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),

+      [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),

+      [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),

+      [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),

+      [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),

+      [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),

+      [tmp0]"=&r"(tmp[0]),

+      [a]"+&r"(a),                      [b]"+&r"(b)

+    : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),

+      [high]"r"(&high), [sse]"r"(sse)

+    : "memory"

+  );

+  return *sse;

+}

+#define vpx_mse16xN(n)                                         \

+  uint32_t vpx_mse16x##n##_mmi(const uint8_t *a, int a_stride, \

+                               const uint8_t *b, int b_stride, \

+                               uint32_t *sse) {                \

+    return vpx_mse16x(a, a_stride, b, b_stride, sse, n);       \

+  }

+vpx_mse16xN(16);

+vpx_mse16xN(8);

+static inline uint32_t vpx_mse8x(const uint8_t *a, int a_stride,

+                                 const uint8_t *b, int b_stride, uint32_t *sse,

+                                 uint64_t high) {

+  double ftmp[12];

+  uint32_t tmp[1];

+  *sse = 0;

+  __asm__ volatile (

+    "li         %[tmp0],    0x20                                \n\t"

+    "mtc1       %[tmp0],    %[ftmp11]                           \n\t"

+    MMI_L(%[tmp0], %[high], 0x00)

+    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"

+    "xor        %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"

+    "xor        %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"

+    "1:                                                         \n\t"

+    VARIANCE_SSE_8

+    "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"

+    MMI_ADDU(%[a], %[a], %[a_stride])

+    MMI_ADDU(%[b], %[b], %[b_stride])

+    "bnez       %[tmp0],    1b                                  \n\t"

+    "dsrl       %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"

+    "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"

+    "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"

+    : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),

+      [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),

+      [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),

+      [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),

+      [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),

+      [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),

+      [tmp0]"=&r"(tmp[0]),

+      [a]"+&r"(a),                      [b]"+&r"(b)

+    : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride),

+      [high]"r"(&high), [sse]"r"(sse)

+    : "memory"

+  );

+  return *sse;

+}

+#define vpx_mse8xN(n)                                                          \

+  uint32_t vpx_mse8x##n##_mmi(const uint8_t *a, int a_stride,                  \

+                              const uint8_t *b, int b_stride, uint32_t *sse) { \

+    return vpx_mse8x(a, a_stride, b, b_stride, sse, n);                        \

+  }

+vpx_mse8xN(16);

+vpx_mse8xN(8);

--- a/vpx_dsp/vpx_dsp.mk

+++ b/vpx_dsp/vpx_dsp.mk

@@ -351,6 +351,8 @@

 DSP_SRCS-$(HAVE_MSA)    += mips/variance_msa.c

 DSP_SRCS-$(HAVE_MSA)    += mips/sub_pixel_variance_msa.c

+DSP_SRCS-$(HAVE_MMI)    += mips/variance_mmi.c

 DSP_SRCS-$(HAVE_SSE)    += x86/variance_sse2.c

 DSP_SRCS-$(HAVE_SSE2)   += x86/avg_pred_sse2.c

 DSP_SRCS-$(HAVE_SSE2)   += x86/variance_sse2.c  # Contains SSE2 and SSSE3

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -1101,16 +1101,16 @@

   specialize qw/vpx_get8x8var sse2 neon msa/;

 add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";

-  specialize qw/vpx_mse16x16 sse2 avx2 neon msa/;

+  specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/;

 add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";

-  specialize qw/vpx_mse16x8 sse2 msa/;

+  specialize qw/vpx_mse16x8 sse2 msa mmi/;

 add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";

-  specialize qw/vpx_mse8x16 sse2 msa/;

+  specialize qw/vpx_mse8x16 sse2 msa mmi/;

 add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";

-  specialize qw/vpx_mse8x8 sse2 msa/;

+  specialize qw/vpx_mse8x8 sse2 msa mmi/;

 add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";

   specialize qw/vpx_get_mb_ss sse2 msa vsx/;

--

⑨