ref: df7e1fecc11f9459282e722dcbd9d9dfb94df6da
parent: 280ad355532a61df1bb6f4e7918f120d47f3fc55
author: Kaustubh Raste <kaustubh.raste@imgtec.com>
date: Tue Jan 31 05:00:43 EST 2017
Add mips msa vpx_minmax_8x8 function average improvement ~4x-5x Change-Id: I83aee9977534fddb8a9b80d31af646c0b6b1a8c3
--- a/test/minmax_test.cc
+++ b/test/minmax_test.cc
@@ -127,4 +127,9 @@
::testing::Values(&vpx_minmax_8x8_neon));
#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, MinMaxTest,
+ ::testing::Values(&vpx_minmax_8x8_msa));
+#endif
+
} // namespace
--- a/vpx_dsp/mips/avg_msa.c
+++ b/vpx_dsp/mips/avg_msa.c
@@ -677,3 +677,50 @@
return var;
}
+
+void vpx_minmax_8x8_msa(const uint8_t *s, int p, const uint8_t *d, int dp,
+ int *min, int *max) {
+ v16u8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7;
+ v16u8 diff0, diff1, diff2, diff3, min0, min1, max0, max1;
+
+ LD_UB8(s, p, s0, s1, s2, s3, s4, s5, s6, s7);
+ LD_UB8(d, dp, d0, d1, d2, d3, d4, d5, d6, d7);
+ PCKEV_D4_UB(s1, s0, s3, s2, s5, s4, s7, s6, s0, s1, s2, s3);
+ PCKEV_D4_UB(d1, d0, d3, d2, d5, d4, d7, d6, d0, d1, d2, d3);
+
+ diff0 = __msa_asub_u_b(s0, d0);
+ diff1 = __msa_asub_u_b(s1, d1);
+ diff2 = __msa_asub_u_b(s2, d2);
+ diff3 = __msa_asub_u_b(s3, d3);
+
+ min0 = __msa_min_u_b(diff0, diff1);
+ min1 = __msa_min_u_b(diff2, diff3);
+ min0 = __msa_min_u_b(min0, min1);
+
+ max0 = __msa_max_u_b(diff0, diff1);
+ max1 = __msa_max_u_b(diff2, diff3);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 8);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 8);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 4);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 4);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 2);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 2);
+ max0 = __msa_max_u_b(max0, max1);
+
+ min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 1);
+ min0 = __msa_min_u_b(min0, min1);
+ max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 1);
+ max0 = __msa_max_u_b(max0, max1);
+
+ *min = min0[0];
+ *max = max0[0];
+}
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -883,7 +883,7 @@
specialize qw/vpx_avg_4x4 sse2 neon msa/;
add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
- specialize qw/vpx_minmax_8x8 sse2 neon/;
+ specialize qw/vpx_minmax_8x8 sse2 neon msa/;
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";