ref: d4002c88a3c253342cd5437662c9db652158bdf0
parent: 5f4e28fe77a77682ac52841308582e74d09ca6e7
author: Martin Storsjö <martin@martin.st>
date: Thu Apr 9 06:40:09 EDT 2020
arm64: itx: Prepare for other bitdepths
--- a/src/arm/64/itx.S
+++ b/src/arm/64/itx.S
@@ -600,7 +600,7 @@
.endr
.endm
-function inv_txfm_add_wht_wht_4x4_neon, export=1
+function inv_txfm_add_wht_wht_4x4_8bpc_neon, export=1
mov x15, x30
movi v31.8h, #0
ld1 {v16.4h,v17.4h,v18.4h,v19.4h}, [x2]
@@ -664,7 +664,7 @@
endfunc
.macro def_fn_4x4 txfm1, txfm2
-function inv_txfm_add_\txfm1\()_\txfm2\()_4x4_neon, export=1
+function inv_txfm_add_\txfm1\()_\txfm2\()_4x4_8bpc_neon, export=1
mov x15, x30
.ifc \txfm1\()_\txfm2, dct_dct
@@ -905,7 +905,7 @@
def_fn_8x8_base identity_
.macro def_fn_8x8 txfm1, txfm2
-function inv_txfm_add_\txfm1\()_\txfm2\()_8x8_neon, export=1
+function inv_txfm_add_\txfm1\()_\txfm2\()_8x8_8bpc_neon, export=1
mov x15, x30
.ifc \txfm1\()_\txfm2, dct_dct
@@ -992,7 +992,7 @@
endfunc
.macro def_fn_48 w, h, txfm1, txfm2
-function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_neon, export=1
+function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
mov x15, x30
.ifc \txfm1\()_\txfm2, dct_dct
@@ -1457,7 +1457,7 @@
endfunc
.macro def_fn_16x16 txfm1, txfm2, eob_half
-function inv_txfm_add_\txfm1\()_\txfm2\()_16x16_neon, export=1
+function inv_txfm_add_\txfm1\()_\txfm2\()_16x16_8bpc_neon, export=1
.ifc \txfm1\()_\txfm2, dct_dct
idct_dc 16, 16, 2
.endif
@@ -1629,7 +1629,7 @@
def_fn_416_base identity_
.macro def_fn_416 w, h, txfm1, txfm2, eob_half
-function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_neon, export=1
+function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
.ifc \txfm1\()_\txfm2, dct_dct
idct_dc \w, \h, 1
.endif
@@ -1812,7 +1812,7 @@
def_fn_816_base identity_
.macro def_fn_816 w, h, txfm1, txfm2, eob_half
-function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_neon, export=1
+function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
.ifc \txfm1\()_\txfm2, dct_dct
idct_dc \w, \h, 1
.endif
@@ -2182,7 +2182,7 @@
.short 43, 107, 171, 256
endconst
-function inv_txfm_add_identity_identity_32x32_neon, export=1
+function inv_txfm_add_identity_identity_32x32_8bpc_neon, export=1
movi v0.8h, #0
movrel x13, eob_32x32
@@ -2225,7 +2225,7 @@
.endm
.macro def_identity_1632 w, h, wshort, hshort
-function inv_txfm_add_identity_identity_\w\()x\h\()_neon, export=1
+function inv_txfm_add_identity_identity_\w\()x\h\()_8bpc_neon, export=1
mov w16, #2896*8
mov w17, #2*(5793-4096)*8
dup v1.4h, w16
@@ -2285,7 +2285,7 @@
def_identity_1632 32, 16, , _shortside
.macro def_identity_832 w, h
-function inv_txfm_add_identity_identity_\w\()x\h\()_neon, export=1
+function inv_txfm_add_identity_identity_\w\()x\h\()_8bpc_neon, export=1
movi v0.8h, #0
movrel x13, eob_8x32
@@ -2329,7 +2329,7 @@
def_identity_832 8, 32
def_identity_832 32, 8
-function inv_txfm_add_dct_dct_32x32_neon, export=1
+function inv_txfm_add_dct_dct_32x32_8bpc_neon, export=1
idct_dc 32, 32, 2
mov x15, x30
@@ -2377,7 +2377,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_16x32_neon, export=1
+function inv_txfm_add_dct_dct_16x32_8bpc_neon, export=1
idct_dc 16, 32, 1
mov x15, x30
@@ -2426,7 +2426,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_32x16_neon, export=1
+function inv_txfm_add_dct_dct_32x16_8bpc_neon, export=1
idct_dc 32, 16, 1
mov x15, x30
@@ -2471,7 +2471,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_8x32_neon, export=1
+function inv_txfm_add_dct_dct_8x32_8bpc_neon, export=1
idct_dc 8, 32, 2
mov x15, x30
@@ -2528,7 +2528,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_32x8_neon, export=1
+function inv_txfm_add_dct_dct_32x8_8bpc_neon, export=1
idct_dc 32, 8, 2
mov x15, x30
@@ -3023,7 +3023,7 @@
#endif
.endm
-function inv_txfm_add_dct_dct_64x64_neon, export=1
+function inv_txfm_add_dct_dct_64x64_8bpc_neon, export=1
idct_dc 64, 64, 2
mov x15, x30
@@ -3077,7 +3077,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_64x32_neon, export=1
+function inv_txfm_add_dct_dct_64x32_8bpc_neon, export=1
idct_dc 64, 32, 1
mov x15, x30
@@ -3130,7 +3130,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_32x64_neon, export=1
+function inv_txfm_add_dct_dct_32x64_8bpc_neon, export=1
idct_dc 32, 64, 1
mov x15, x30
@@ -3180,7 +3180,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_64x16_neon, export=1
+function inv_txfm_add_dct_dct_64x16_8bpc_neon, export=1
idct_dc 64, 16, 2
mov x15, x30
@@ -3234,7 +3234,7 @@
br x15
endfunc
-function inv_txfm_add_dct_dct_16x64_neon, export=1
+function inv_txfm_add_dct_dct_16x64_8bpc_neon, export=1
idct_dc 16, 64, 2
mov x15, x30
--- a/src/arm/itx_init_tmpl.c
+++ b/src/arm/itx_init_tmpl.c
@@ -29,32 +29,32 @@
#include "src/itx.h"
#define decl_itx2_fns(w, h, opt) \
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_identity_##w##x##h##_##opt)
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
#define decl_itx12_fns(w, h, opt) \
decl_itx2_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_dct_##w##x##h##_##opt)
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
#define decl_itx16_fns(w, h, opt) \
decl_itx12_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_flipadst_##w##x##h##_##opt)
+decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
+decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
#define decl_itx17_fns(w, h, opt) \
decl_itx16_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_wht_wht_##w##x##h##_##opt)
+decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
decl_itx17_fns( 4, 4, neon);
decl_itx16_fns( 4, 8, neon);
@@ -71,16 +71,16 @@
decl_itx2_fns (32, 16, neon);
decl_itx2_fns (32, 32, neon);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_16x64_neon);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_32x64_neon);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x16_neon);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_neon);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_neon);
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_16x64, neon));
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_32x64, neon));
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x16, neon));
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x32, neon));
+decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x64, neon));
COLD void bitfn(dav1d_itx_dsp_init_arm)(Dav1dInvTxfmDSPContext *const c, int bpc) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
- dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
+ BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
#define assign_itx1_fn(pfx, w, h, ext) \
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)