shithub: libvpx

--- a/vpx_dsp/mips/fwd_txfm_msa.c

+++ b/vpx_dsp/mips/fwd_txfm_msa.c

@@ -8,8 +8,23 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/mips/fwd_txfm_msa.h"

+void vpx_fdct8x8_1_msa(const int16_t *input, tran_low_t *out, int32_t stride) {

+  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;

+  v4i32 vec_w;

+  LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);

+  ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6);

+  ADD2(in0, in2, in4, in6, in0, in4);

+  vec_w = __msa_hadd_s_w(in0, in0);

+  vec_w += __msa_hadd_s_w(in4, in4);

+  out[0] = HADD_SW_S32(vec_w);

+  out[1] = 0;

+}

+#if !CONFIG_VP9_HIGHBITDEPTH

 void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr,

                         int32_t src_stride) {

   v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;

@@ -215,19 +230,6 @@

   ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);

-void vpx_fdct8x8_1_msa(const int16_t *input, tran_low_t *out, int32_t stride) {

-  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;

-  v4i32 vec_w;

-  LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);

-  ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6);

-  ADD2(in0, in2, in4, in6, in0, in4);

-  vec_w = __msa_hadd_s_w(in0, in0);

-  vec_w += __msa_hadd_s_w(in4, in4);

-  out[0] = HADD_SW_S32(vec_w);

-  out[1] = 0;

-}

 void vpx_fdct16x16_msa(const int16_t *input, int16_t *output,

                        int32_t src_stride) {

   int32_t i;

@@ -267,3 +269,4 @@

   sum = HADD_SW_S32(vec_w);

   out[0] = (int16_t)(sum >> 1);

+#endif  // !CONFIG_VP9_HIGHBITDEPTH

--- a/vpx_dsp/mips/loopfilter_16_msa.c

+++ b/vpx_dsp/mips/loopfilter_16_msa.c

@@ -8,13 +8,15 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/mem.h"

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/mips/loopfilter_msa.h"

+#include "vpx_ports/mem.h"

-int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, uint8_t *filter48,

-                                 const uint8_t *b_limit_ptr,

-                                 const uint8_t *limit_ptr,

-                                 const uint8_t *thresh_ptr) {

+static int32_t hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,

+                                    uint8_t *filter48,

+                                    const uint8_t *b_limit_ptr,

+                                    const uint8_t *limit_ptr,

+                                    const uint8_t *thresh_ptr) {

   v16u8 p3, p2, p1, p0, q3, q2, q1, q0;

   v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;

   v16u8 flat, mask, hev, thresh, b_limit, limit;

@@ -77,7 +79,7 @@

-void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {

+static void hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {

   v16u8 flat, flat2, filter8;

   v16i8 zero = { 0 };

   v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;

@@ -413,11 +415,11 @@

   (void)count;

-  early_exit = vpx_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,

-                                        limit_ptr, thresh_ptr);

+  early_exit = hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,

+                                    limit_ptr, thresh_ptr);

   if (0 == early_exit) {

-    vpx_hz_lpf_t16_16w(src, pitch, filter48);

+    hz_lpf_t16_16w(src, pitch, filter48);

@@ -753,11 +755,11 @@

   ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);

-int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,

-                                uint8_t *src_org, int32_t pitch_org,

-                                const uint8_t *b_limit_ptr,

-                                const uint8_t *limit_ptr,

-                                const uint8_t *thresh_ptr) {

+static int32_t vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,

+                                   uint8_t *src_org, int32_t pitch_org,

+                                   const uint8_t *b_limit_ptr,

+                                   const uint8_t *limit_ptr,

+                                   const uint8_t *thresh_ptr) {

   v16u8 p3, p2, p1, p0, q3, q2, q1, q0;

   v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;

   v16u8 flat, mask, hev, thresh, b_limit, limit;

@@ -820,8 +822,8 @@

-int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,

-                          uint8_t *filter48) {

+static int32_t vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,

+                             uint8_t *filter48) {

   v16i8 zero = { 0 };

   v16u8 filter8, flat, flat2;

   v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;

@@ -1051,12 +1053,12 @@

   transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);

   early_exit =

-      vpx_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), &filter48[0], src,

-                              pitch, b_limit_ptr, limit_ptr, thresh_ptr);

+      vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), &filter48[0], src, pitch,

+                          b_limit_ptr, limit_ptr, thresh_ptr);

   if (0 == early_exit) {

-    early_exit = vpx_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,

-                                   &filter48[0]);

+    early_exit =

+        vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch, &filter48[0]);

     if (0 == early_exit) {

       transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);

@@ -1064,11 +1066,11 @@

-int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,

-                                 uint8_t *src_org, int32_t pitch,

-                                 const uint8_t *b_limit_ptr,

-                                 const uint8_t *limit_ptr,

-                                 const uint8_t *thresh_ptr) {

+static int32_t vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,

+                                    uint8_t *src_org, int32_t pitch,

+                                    const uint8_t *b_limit_ptr,

+                                    const uint8_t *limit_ptr,

+                                    const uint8_t *thresh_ptr) {

   v16u8 p3, p2, p1, p0, q3, q2, q1, q0;

   v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;

   v16u8 flat, mask, hev, thresh, b_limit, limit;

@@ -1141,8 +1143,8 @@

-int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,

-                           uint8_t *filter48) {

+static int32_t vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,

+                              uint8_t *filter48) {

   v16u8 flat, flat2, filter8;

   v16i8 zero = { 0 };

   v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;

@@ -1473,12 +1475,12 @@

   transpose_16x16((src - 8), pitch, &transposed_input[0], 16);

   early_exit =

-      vpx_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), &filter48[0], src,

-                               pitch, b_limit_ptr, limit_ptr, thresh_ptr);

+      vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), &filter48[0], src,

+                           pitch, b_limit_ptr, limit_ptr, thresh_ptr);

   if (0 == early_exit) {

-    early_exit = vpx_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,

-                                    &filter48[0]);

+    early_exit =

+        vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch, &filter48[0]);

     if (0 == early_exit) {

       transpose_16x16(transposed_input, 16, (src - 8), pitch);

--- a/vpx_dsp/mips/loopfilter_4_msa.c

+++ b/vpx_dsp/mips/loopfilter_4_msa.c

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/mips/loopfilter_msa.h"

 void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,

--- a/vpx_dsp/mips/loopfilter_8_msa.c

+++ b/vpx_dsp/mips/loopfilter_8_msa.c

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/mips/loopfilter_msa.h"

 void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,

--- a/vpx_dsp/mips/vpx_convolve_avg_msa.c

+++ b/vpx_dsp/mips/vpx_convolve_avg_msa.c

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/mips/macros_msa.h"

 static void avg_width4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst,

--- a/vpx_dsp/mips/vpx_convolve_copy_msa.c

+++ b/vpx_dsp/mips/vpx_convolve_copy_msa.c

@@ -9,6 +9,7 @@

*/

 #include <string.h>

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/mips/macros_msa.h"

 static void copy_width8_msa(const uint8_t *src, int32_t src_stride,

--

⑨