ref: a9323ef58df2c0713e4115965df10c76818aadb9
parent: e0f28d45be2a99e0f56ffe0f7f94754ce6c83ab8
author: Martin Storsjö <martin@martin.st>
date: Sun Mar 15 20:04:57 EDT 2020
arm: ipred: Prepare for 16 bpc
--- a/src/arm/32/ipred.S
+++ b/src/arm/32/ipred.S
@@ -29,11 +29,11 @@
#include "src/arm/asm.S"
#include "util.S"
-// void ipred_dc_128_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_128_neon, export=1
+// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_128_8bpc_neon, export=1
push {r4, lr}
ldr r4, [sp, #8]
clz r3, r3
@@ -107,11 +107,11 @@
pop {r4, pc}
endfunc
-// void ipred_v_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_v_neon, export=1
+// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_v_8bpc_neon, export=1
push {r4, lr}
ldr lr, [sp, #8]
clz r3, r3
@@ -189,11 +189,11 @@
pop {r4, pc}
endfunc
-// void ipred_h_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_h_neon, export=1
+// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_h_8bpc_neon, export=1
push {r4-r5, lr}
ldr r4, [sp, #12]
clz r3, r3
@@ -297,11 +297,11 @@
pop {r4-r5, pc}
endfunc
-// void ipred_dc_top_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_top_neon, export=1
+// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_top_8bpc_neon, export=1
push {r4-r5, lr}
ldr r4, [sp, #12]
clz r3, r3
@@ -418,11 +418,11 @@
pop {r4-r5, pc}
endfunc
-// void ipred_dc_left_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_left_neon, export=1
+// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_left_8bpc_neon, export=1
push {r4-r5, lr}
ldr r4, [sp, #12]
sub r2, r2, r4
@@ -556,11 +556,11 @@
pop {r4-r5, pc}
endfunc
-// void ipred_dc_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_neon, export=1
+// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_8bpc_neon, export=1
push {r4-r6, lr}
ldr r4, [sp, #16]
sub r2, r2, r4
--- a/src/arm/64/ipred.S
+++ b/src/arm/64/ipred.S
@@ -28,11 +28,11 @@
#include "src/arm/asm.S"
#include "util.S"
-// void ipred_dc_128_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_128_neon, export=1
+// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_128_8bpc_neon, export=1
clz w3, w3
adr x5, L(ipred_dc_128_tbl)
sub w3, w3, #25
@@ -97,11 +97,11 @@
.hword L(ipred_dc_128_tbl) - 4b
endfunc
-// void ipred_v_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_v_neon, export=1
+// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_v_8bpc_neon, export=1
clz w3, w3
adr x5, L(ipred_v_tbl)
sub w3, w3, #25
@@ -170,11 +170,11 @@
.hword L(ipred_v_tbl) - 40b
endfunc
-// void ipred_h_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_h_neon, export=1
+// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_h_8bpc_neon, export=1
clz w3, w3
adr x5, L(ipred_h_tbl)
sub w3, w3, #25
@@ -251,11 +251,11 @@
.hword L(ipred_h_tbl) - 4b
endfunc
-// void ipred_dc_top_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_top_neon, export=1
+// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_top_8bpc_neon, export=1
clz w3, w3
adr x5, L(ipred_dc_top_tbl)
sub w3, w3, #25
@@ -351,11 +351,11 @@
.hword L(ipred_dc_top_tbl) - 40b
endfunc
-// void ipred_dc_left_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_left_neon, export=1
+// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_left_8bpc_neon, export=1
sub x2, x2, w4, uxtw
clz w3, w3
clz w7, w4
@@ -472,11 +472,11 @@
.hword L(ipred_dc_left_tbl) - L(ipred_dc_left_w4)
endfunc
-// void ipred_dc_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_dc_neon, export=1
+// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_dc_8bpc_neon, export=1
sub x2, x2, w4, uxtw
add w7, w3, w4 // width + height
clz w3, w3
@@ -687,11 +687,11 @@
.hword L(ipred_dc_tbl) - L(ipred_dc_w4)
endfunc
-// void ipred_paeth_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_paeth_neon, export=1
+// void ipred_paeth_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_paeth_8bpc_neon, export=1
clz w9, w3
adr x5, L(ipred_paeth_tbl)
sub w9, w9, #25
@@ -864,11 +864,11 @@
.hword L(ipred_paeth_tbl) - 40b
endfunc
-// void ipred_smooth_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_smooth_neon, export=1
+// void ipred_smooth_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_smooth_8bpc_neon, export=1
movrel x10, X(sm_weights)
add x11, x10, w4, uxtw
add x10, x10, w3, uxtw
@@ -1042,11 +1042,11 @@
.hword L(ipred_smooth_tbl) - 40b
endfunc
-// void ipred_smooth_v_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_smooth_v_neon, export=1
+// void ipred_smooth_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_smooth_v_8bpc_neon, export=1
movrel x7, X(sm_weights)
add x7, x7, w4, uxtw
clz w9, w3
@@ -1180,11 +1180,11 @@
.hword L(ipred_smooth_v_tbl) - 40b
endfunc
-// void ipred_smooth_h_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int a,
-// const int max_width, const int max_height);
-function ipred_smooth_h_neon, export=1
+// void ipred_smooth_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int a,
+// const int max_width, const int max_height);
+function ipred_smooth_h_8bpc_neon, export=1
movrel x8, X(sm_weights)
add x8, x8, w3, uxtw
clz w9, w3
@@ -1323,11 +1323,11 @@
.hword L(ipred_smooth_h_tbl) - 40b
endfunc
-// void ipred_filter_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height, const int filt_idx,
-// const int max_width, const int max_height);
-function ipred_filter_neon, export=1
+// void ipred_filter_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height, const int filt_idx,
+// const int max_width, const int max_height);
+function ipred_filter_8bpc_neon, export=1
and w5, w5, #511
movrel x6, X(filter_intra_taps)
lsl w5, w5, #6
@@ -1483,10 +1483,10 @@
.hword L(ipred_filter_tbl) - 40b
endfunc
-// void pal_pred_neon(pixel *dst, const ptrdiff_t stride,
-// const uint16_t *const pal, const uint8_t *idx,
-// const int w, const int h);
-function pal_pred_neon, export=1
+// void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const uint16_t *const pal, const uint8_t *idx,
+// const int w, const int h);
+function pal_pred_8bpc_neon, export=1
ld1 {v0.8h}, [x2]
clz w9, w4
adr x6, L(pal_pred_tbl)
@@ -1574,11 +1574,11 @@
.hword L(pal_pred_tbl) - 4b
endfunc
-// void ipred_cfl_128_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height,
-// const int16_t *ac, const int alpha);
-function ipred_cfl_128_neon, export=1
+// void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height,
+// const int16_t *ac, const int alpha);
+function ipred_cfl_128_8bpc_neon, export=1
clz w9, w3
adr x7, L(ipred_cfl_128_tbl)
sub w9, w9, #26
@@ -1695,11 +1695,11 @@
.hword L(ipred_cfl_128_tbl) - L(ipred_cfl_splat_w4)
endfunc
-// void ipred_cfl_top_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height,
-// const int16_t *ac, const int alpha);
-function ipred_cfl_top_neon, export=1
+// void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height,
+// const int16_t *ac, const int alpha);
+function ipred_cfl_top_8bpc_neon, export=1
clz w9, w3
adr x7, L(ipred_cfl_top_tbl)
sub w9, w9, #26
@@ -1744,11 +1744,11 @@
.hword L(ipred_cfl_top_tbl) - 4b
endfunc
-// void ipred_cfl_left_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height,
-// const int16_t *ac, const int alpha);
-function ipred_cfl_left_neon, export=1
+// void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height,
+// const int16_t *ac, const int alpha);
+function ipred_cfl_left_8bpc_neon, export=1
sub x2, x2, w4, uxtw
clz w9, w3
clz w8, w4
@@ -1802,11 +1802,11 @@
.hword L(ipred_cfl_left_tbl) - L(ipred_cfl_left_h4)
endfunc
-// void ipred_cfl_neon(pixel *dst, const ptrdiff_t stride,
-// const pixel *const topleft,
-// const int width, const int height,
-// const int16_t *ac, const int alpha);
-function ipred_cfl_neon, export=1
+// void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+// const pixel *const topleft,
+// const int width, const int height,
+// const int16_t *ac, const int alpha);
+function ipred_cfl_8bpc_neon, export=1
sub x2, x2, w4, uxtw
add w8, w3, w4 // width + height
dup v1.8h, w6 // alpha
@@ -1942,10 +1942,10 @@
.hword L(ipred_cfl_tbl) - L(ipred_cfl_w4)
endfunc
-// void cfl_ac_420_neon(int16_t *const ac, const pixel *const ypx,
-// const ptrdiff_t stride, const int w_pad,
-// const int h_pad, const int cw, const int ch);
-function ipred_cfl_ac_420_neon, export=1
+// void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx,
+// const ptrdiff_t stride, const int w_pad,
+// const int h_pad, const int cw, const int ch);
+function ipred_cfl_ac_420_8bpc_neon, export=1
clz w8, w5
lsl w4, w4, #2
adr x7, L(ipred_cfl_ac_420_tbl)
@@ -2260,10 +2260,10 @@
.hword L(ipred_cfl_ac_420_w16_tbl) - L(ipred_cfl_ac_420_w16_wpad3)
endfunc
-// void cfl_ac_422_neon(int16_t *const ac, const pixel *const ypx,
-// const ptrdiff_t stride, const int w_pad,
-// const int h_pad, const int cw, const int ch);
-function ipred_cfl_ac_422_neon, export=1
+// void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx,
+// const ptrdiff_t stride, const int w_pad,
+// const int h_pad, const int cw, const int ch);
+function ipred_cfl_ac_422_8bpc_neon, export=1
clz w8, w5
lsl w4, w4, #2
adr x7, L(ipred_cfl_ac_422_tbl)
--- a/src/arm/ipred_init_tmpl.c
+++ b/src/arm/ipred_init_tmpl.c
@@ -27,27 +27,27 @@
#include "src/cpu.h"
#include "src/ipred.h"
-decl_angular_ipred_fn(dav1d_ipred_dc_neon);
-decl_angular_ipred_fn(dav1d_ipred_dc_128_neon);
-decl_angular_ipred_fn(dav1d_ipred_dc_top_neon);
-decl_angular_ipred_fn(dav1d_ipred_dc_left_neon);
-decl_angular_ipred_fn(dav1d_ipred_h_neon);
-decl_angular_ipred_fn(dav1d_ipred_v_neon);
-decl_angular_ipred_fn(dav1d_ipred_paeth_neon);
-decl_angular_ipred_fn(dav1d_ipred_smooth_neon);
-decl_angular_ipred_fn(dav1d_ipred_smooth_v_neon);
-decl_angular_ipred_fn(dav1d_ipred_smooth_h_neon);
-decl_angular_ipred_fn(dav1d_ipred_filter_neon);
+decl_angular_ipred_fn(BF(dav1d_ipred_dc, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_dc_128, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_dc_top, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_dc_left, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_h, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_v, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_paeth, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_smooth, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_smooth_h, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_filter, neon));
-decl_cfl_pred_fn(dav1d_ipred_cfl_neon);
-decl_cfl_pred_fn(dav1d_ipred_cfl_128_neon);
-decl_cfl_pred_fn(dav1d_ipred_cfl_top_neon);
-decl_cfl_pred_fn(dav1d_ipred_cfl_left_neon);
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl, neon));
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, neon));
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, neon));
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, neon));
-decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_neon);
-decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_neon);
+decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_420, neon));
+decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_422, neon));
-decl_pal_pred_fn(dav1d_pal_pred_neon);
+decl_pal_pred_fn(BF(dav1d_pal_pred, neon));
COLD void bitfn(dav1d_intra_pred_dsp_init_arm)(Dav1dIntraPredDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
@@ -55,28 +55,28 @@
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
#if BITDEPTH == 8
- c->intra_pred[DC_PRED] = dav1d_ipred_dc_neon;
- c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_neon;
- c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_neon;
- c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_neon;
- c->intra_pred[HOR_PRED] = dav1d_ipred_h_neon;
- c->intra_pred[VERT_PRED] = dav1d_ipred_v_neon;
+ c->intra_pred[DC_PRED] = BF(dav1d_ipred_dc, neon);
+ c->intra_pred[DC_128_PRED] = BF(dav1d_ipred_dc_128, neon);
+ c->intra_pred[TOP_DC_PRED] = BF(dav1d_ipred_dc_top, neon);
+ c->intra_pred[LEFT_DC_PRED] = BF(dav1d_ipred_dc_left, neon);
+ c->intra_pred[HOR_PRED] = BF(dav1d_ipred_h, neon);
+ c->intra_pred[VERT_PRED] = BF(dav1d_ipred_v, neon);
#if ARCH_AARCH64
- c->intra_pred[PAETH_PRED] = dav1d_ipred_paeth_neon;
- c->intra_pred[SMOOTH_PRED] = dav1d_ipred_smooth_neon;
- c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_neon;
- c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_neon;
- c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_neon;
+ c->intra_pred[PAETH_PRED] = BF(dav1d_ipred_paeth, neon);
+ c->intra_pred[SMOOTH_PRED] = BF(dav1d_ipred_smooth, neon);
+ c->intra_pred[SMOOTH_V_PRED] = BF(dav1d_ipred_smooth_v, neon);
+ c->intra_pred[SMOOTH_H_PRED] = BF(dav1d_ipred_smooth_h, neon);
+ c->intra_pred[FILTER_PRED] = BF(dav1d_ipred_filter, neon);
- c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_neon;
- c->cfl_pred[DC_128_PRED] = dav1d_ipred_cfl_128_neon;
- c->cfl_pred[TOP_DC_PRED] = dav1d_ipred_cfl_top_neon;
- c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_neon;
+ c->cfl_pred[DC_PRED] = BF(dav1d_ipred_cfl, neon);
+ c->cfl_pred[DC_128_PRED] = BF(dav1d_ipred_cfl_128, neon);
+ c->cfl_pred[TOP_DC_PRED] = BF(dav1d_ipred_cfl_top, neon);
+ c->cfl_pred[LEFT_DC_PRED] = BF(dav1d_ipred_cfl_left, neon);
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_neon;
- c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_neon;
+ c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_ipred_cfl_ac_420, neon);
+ c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_ipred_cfl_ac_422, neon);
- c->pal_pred = dav1d_pal_pred_neon;
+ c->pal_pred = BF(dav1d_pal_pred, neon);
#endif
#endif
}