shithub: dav1d

Download patch

ref: d00a0227d6ce86d262ee142e68d6794c9f6917df
parent: 74d5cf57599f423c3d38d091b9a95ae245d89235
author: Martin Storsjö <martin@martin.st>
date: Fri Jun 26 20:13:13 EDT 2020

arm32: ipred: Remove unnecessary operations in ipred_dc_w4

These came from matching some parts too closely to the arm64 version
(where the summation can be done efficiently with uaddlv by zeroing
the upper half of the register).

Before:                  Cortex A7     A8     A9    A53   A72    A73
intra_pred_dc_w4_8bpc_neon:  124.5   65.1   90.2  100.4  48.1   50.4
After:
intra_pred_dc_w4_8bpc_neon:  120.3   60.7   83.6   94.0  44.1   47.9

--- a/src/arm/32/ipred.S
+++ b/src/arm/32/ipred.S
@@ -568,7 +568,6 @@
         clz             r3,  r3
         clz             r12, r4
         vdup.16         q15, lr             // width + height
-        mov             r6,  #0
         adr             r5,  L(ipred_dc_tbl)
         rbit            lr,  lr             // rbit(width + height)
         sub             r3,  r3,  #20       // 25 leading bits, minus table offset 5
@@ -606,10 +605,8 @@
 L(ipred_dc_w4):
         add             r2,  r2,  #1
         vld1.32         {d1[]},  [r2]
-        vmov.32         d1[1],  r6
         vadd.s16        d0,  d0,  d30
         vpaddl.u8       d1,  d1
-        vpadd.u16       d1,  d1
         vpadd.u16       d1,  d1
         cmp             r4,  #4
         vadd.s16        d0,  d0,  d1