shithub: dav1d

Download patch

ref: 632b4876e3869aea085427cc79f5d08487d848de
parent: 65ba279b5393382a98ddd6844e0c0753f63e749f
author: B Krishnan Iyer <krishnaniyer97@gmail.com>
date: Mon Jul 1 17:17:35 EDT 2019

arm: mc: neon: Improvement in blend_v function

	                     A73             A53
	                Earlier	Now	Earlier	Now

blend_v_w2_8bpc_neon:	122.1	121.3	195.5	195.5
blend_v_w4_8bpc_neon:	248.2	247.5	375.6	358.5
blend_v_w8_8bpc_neon:	210.3	205.2	375.6	358.5
blend_v_w16_8bpc_neon:	252.7	237.1	579.2	590.5
blend_v_w32_8bpc_neon:	347	345.8	997.4	994.1

--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -527,6 +527,7 @@
         add             r12, r0,  r1
         lsl             r1,  r1,  #1
         vsub.i8         d5,  d22, d4
+        sub             r1,  r1,  #3
 4:
         vld1.32         {d2[]},  [r2],  r3
         vld1.32         {d0[]},  [r0]
@@ -540,8 +541,6 @@
         vst1.16         {d20[2]}, [r12]!
         vst1.8          {d20[2]}, [r0]!
         vst1.8          {d20[6]}, [r12]!
-        sub             r0,  r0,  #3
-        sub             r12, r12, #3
         add             r0,  r0,  r1
         add             r12, r12, r1
         bgt             4b
@@ -552,6 +551,7 @@
         add             r12, r0,  r1
         lsl             r1,  r1,  #1
         vsub.i8         d17, d16, d2
+        sub             r1,  r1,  #6
 8:
         vld1.u8         {d4},  [r2],  r3
         vld1.u8         {d0},  [r0]
@@ -568,8 +568,6 @@
         vst1.32         {d23[0]}, [r12]!
         vst1.16         {d22[2]}, [r0]!
         vst1.16         {d23[2]}, [r12]!
-        sub             r0,  r0,  #6
-        sub             r12, r12, #6
         add             r0,  r0,  r1
         add             r12, r12, r1
         bgt             8b
@@ -580,6 +578,7 @@
         add             r12, r0,  r1
         lsl             r1,  r1,  #1
         vsub.i8         q11, q12, q2
+        sub             r1,  r1,  #12
 16:
         vld1.u8         {q1},  [r2],  r3
         vld1.u8         {q0},  [r0]
@@ -602,8 +601,6 @@
         vst1.u8         {d20},    [r12]!
         vst1.32         {d19[0]}, [r0]!
         vst1.32         {d21[0]}, [r12]!
-        sub             r0,  r0,  #12
-        sub             r12, r12, #12
         add             r0,  r0,  r1
         add             r12, r12, r1
         bgt             16b