shithub: dav1d

Download patch

ref: 36a2d2caa090cbcbedbca33dd207ccd9f9bee170
parent: e214351b0e485f68d0c7a03f726e8a694f0a8cbf
author: Janne Grunau <janne-vlc@jannau.net>
date: Thu Oct 25 17:13:18 EDT 2018

arm: reverse jump tables

Instead of bit reverting the register subtract 24 from clz. This avoids
two empty filler values in the jump table with the same instruction
count.

--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -72,6 +72,7 @@
         push            {r4-r6,lr}
         ldr             r4, [sp, #16]
         ldr             r5, [sp, #20]
+        clz             r4,  r4
 .ifnc \type, avg
         ldr             lr, [sp, #24]
 .endif
@@ -83,9 +84,8 @@
 .ifc \type, mask
         vmov.i8         q15, #256-2
 .endif
-        rbit            r4,  r4
         adr             r12, L(\type\()_tbl)
-        clz             r4,  r4
+        sub             r4,  r4,  #24
         ldr             r4,  [r12, r4, lsl #2]
         \type           d16, d17, q0,  q1,  q2,  q3
         add             r12, r12, r4
@@ -92,13 +92,12 @@
         bx              r12
         .align 2
 L(\type\()_tbl):
-        .word 0, 0
-        .word 4f    - L(\type\()_tbl) + CONFIG_THUMB
-        .word 80f   - L(\type\()_tbl) + CONFIG_THUMB
-        .word 160f  - L(\type\()_tbl) + CONFIG_THUMB
-        .word 320f  - L(\type\()_tbl) + CONFIG_THUMB
-        .word 640f  - L(\type\()_tbl) + CONFIG_THUMB
         .word 1280f - L(\type\()_tbl) + CONFIG_THUMB
+        .word 640f  - L(\type\()_tbl) + CONFIG_THUMB
+        .word 320f  - L(\type\()_tbl) + CONFIG_THUMB
+        .word 160f  - L(\type\()_tbl) + CONFIG_THUMB
+        .word 80f   - L(\type\()_tbl) + CONFIG_THUMB
+        .word 4f    - L(\type\()_tbl) + CONFIG_THUMB
 4:
         add             r6,  r0,  r1
         lsl             r1,  r1,  #1
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -96,6 +96,7 @@
 
 .macro bidir_fn type
 function \type\()_8bpc_neon, export=1
+        clz             w4,  w4
 .ifc \type, w_avg
         dup             v30.8h, w6
         neg             v30.8h, v30.8h
@@ -104,9 +105,8 @@
 .ifc \type, mask
         movi            v31.16b, #256-2
 .endif
-        rbit            w4,  w4
         adr             x7,  L(\type\()_tbl)
-        clz             w4,  w4
+        sub             w4,  w4,  #24
         \type           v4,  v0,  v1
         ldrh            w4,  [x7, x4, lsl #1]
         \type           v5,  v2,  v3
@@ -218,13 +218,12 @@
 0:
         ret
 L(\type\()_tbl):
-        .hword 0, 0
-        .hword L(\type\()_tbl) -    4b
-        .hword L(\type\()_tbl) -    8b
-        .hword L(\type\()_tbl) -  160b
-        .hword L(\type\()_tbl) -  320b
-        .hword L(\type\()_tbl) -  640b
         .hword L(\type\()_tbl) - 1280b
+        .hword L(\type\()_tbl) -  640b
+        .hword L(\type\()_tbl) -  320b
+        .hword L(\type\()_tbl) -  160b
+        .hword L(\type\()_tbl) -    8b
+        .hword L(\type\()_tbl) -    4b
 endfunc
 .endm