shithub: libvpx

Download patch

ref: 3da752fe0083d2acf2c8436a5da4805883df086f
parent: 8b5eddf709b5ecd09c2cec98c5418a2e3b0cfe14
author: Kaustubh Raste <kaustubh.raste@imgtec.com>
date: Mon Oct 10 14:03:28 EDT 2016

Optimize vpx_mbpost_proc_across_ip_msa function

Removed HADD_SW_S32 calculation

Change-Id: I7384dc881451d197404d09beb7c27b222e1d6875

--- a/vpx_dsp/mips/deblock_msa.c
+++ b/vpx_dsp/mips/deblock_msa.c
@@ -454,7 +454,7 @@
   v16u8 tmp = { 0 };
   v16i8 zero = { 0 };
   v8u16 sum_h, src_r_h, src_l_h;
-  v4u32 src_r_w, src_l_w;
+  v4u32 src_r_w;
   v4i32 flimit_vec;
 
   flimit_vec = __msa_fill_w(flimit);
@@ -473,9 +473,8 @@
     src[15] = 0;
     ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
     src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
-    src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
+    src_r_w += __msa_dotp_u_w(src_l_h, src_l_h);
     sum_sq = HADD_SW_S32(src_r_w);
-    sum_sq += HADD_SW_S32(src_l_w);
     sum_h = __msa_hadd_u_h(src, src);
     sum = HADD_UH_U32(sum_h);
     {