ref: 3da752fe0083d2acf2c8436a5da4805883df086f
parent: 8b5eddf709b5ecd09c2cec98c5418a2e3b0cfe14
author: Kaustubh Raste <kaustubh.raste@imgtec.com>
date: Mon Oct 10 14:03:28 EDT 2016
Optimize vpx_mbpost_proc_across_ip_msa function Removed HADD_SW_S32 calculation Change-Id: I7384dc881451d197404d09beb7c27b222e1d6875
--- a/vpx_dsp/mips/deblock_msa.c
+++ b/vpx_dsp/mips/deblock_msa.c
@@ -454,7 +454,7 @@
v16u8 tmp = { 0 };
v16i8 zero = { 0 };
v8u16 sum_h, src_r_h, src_l_h;
- v4u32 src_r_w, src_l_w;
+ v4u32 src_r_w;
v4i32 flimit_vec;
flimit_vec = __msa_fill_w(flimit);
@@ -473,9 +473,8 @@
src[15] = 0;
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
- src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
+ src_r_w += __msa_dotp_u_w(src_l_h, src_l_h);
sum_sq = HADD_SW_S32(src_r_w);
- sum_sq += HADD_SW_S32(src_l_w);
sum_h = __msa_hadd_u_h(src, src);
sum = HADD_UH_U32(sum_h);
{