shithub: dav1d

Download patch

ref: a9323ef58df2c0713e4115965df10c76818aadb9
parent: e0f28d45be2a99e0f56ffe0f7f94754ce6c83ab8
author: Martin Storsjö <martin@martin.st>
date: Sun Mar 15 20:04:57 EDT 2020

arm: ipred: Prepare for 16 bpc

--- a/src/arm/32/ipred.S
+++ b/src/arm/32/ipred.S
@@ -29,11 +29,11 @@
 #include "src/arm/asm.S"
 #include "util.S"
 
-// void ipred_dc_128_neon(pixel *dst, const ptrdiff_t stride,
-//                        const pixel *const topleft,
-//                        const int width, const int height, const int a,
-//                        const int max_width, const int max_height);
-function ipred_dc_128_neon, export=1
+// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                             const pixel *const topleft,
+//                             const int width, const int height, const int a,
+//                             const int max_width, const int max_height);
+function ipred_dc_128_8bpc_neon, export=1
         push            {r4, lr}
         ldr             r4,  [sp, #8]
         clz             r3,  r3
@@ -107,11 +107,11 @@
         pop             {r4, pc}
 endfunc
 
-// void ipred_v_neon(pixel *dst, const ptrdiff_t stride,
-//                   const pixel *const topleft,
-//                   const int width, const int height, const int a,
-//                   const int max_width, const int max_height);
-function ipred_v_neon, export=1
+// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                        const pixel *const topleft,
+//                        const int width, const int height, const int a,
+//                        const int max_width, const int max_height);
+function ipred_v_8bpc_neon, export=1
         push            {r4, lr}
         ldr             lr,  [sp, #8]
         clz             r3,  r3
@@ -189,11 +189,11 @@
         pop             {r4, pc}
 endfunc
 
-// void ipred_h_neon(pixel *dst, const ptrdiff_t stride,
-//                   const pixel *const topleft,
-//                   const int width, const int height, const int a,
-//                   const int max_width, const int max_height);
-function ipred_h_neon, export=1
+// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                        const pixel *const topleft,
+//                        const int width, const int height, const int a,
+//                        const int max_width, const int max_height);
+function ipred_h_8bpc_neon, export=1
         push            {r4-r5, lr}
         ldr             r4,  [sp, #12]
         clz             r3,  r3
@@ -297,11 +297,11 @@
         pop             {r4-r5, pc}
 endfunc
 
-// void ipred_dc_top_neon(pixel *dst, const ptrdiff_t stride,
-//                        const pixel *const topleft,
-//                        const int width, const int height, const int a,
-//                        const int max_width, const int max_height);
-function ipred_dc_top_neon, export=1
+// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                             const pixel *const topleft,
+//                             const int width, const int height, const int a,
+//                             const int max_width, const int max_height);
+function ipred_dc_top_8bpc_neon, export=1
         push            {r4-r5, lr}
         ldr             r4,  [sp, #12]
         clz             r3,  r3
@@ -418,11 +418,11 @@
         pop             {r4-r5, pc}
 endfunc
 
-// void ipred_dc_left_neon(pixel *dst, const ptrdiff_t stride,
-//                         const pixel *const topleft,
-//                         const int width, const int height, const int a,
-//                         const int max_width, const int max_height);
-function ipred_dc_left_neon, export=1
+// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                              const pixel *const topleft,
+//                              const int width, const int height, const int a,
+//                              const int max_width, const int max_height);
+function ipred_dc_left_8bpc_neon, export=1
         push            {r4-r5, lr}
         ldr             r4,  [sp, #12]
         sub             r2,  r2,  r4
@@ -556,11 +556,11 @@
         pop             {r4-r5, pc}
 endfunc
 
-// void ipred_dc_neon(pixel *dst, const ptrdiff_t stride,
-//                    const pixel *const topleft,
-//                    const int width, const int height, const int a,
-//                    const int max_width, const int max_height);
-function ipred_dc_neon, export=1
+// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                         const pixel *const topleft,
+//                         const int width, const int height, const int a,
+//                         const int max_width, const int max_height);
+function ipred_dc_8bpc_neon, export=1
         push            {r4-r6, lr}
         ldr             r4,  [sp, #16]
         sub             r2,  r2,  r4
--- a/src/arm/64/ipred.S
+++ b/src/arm/64/ipred.S
@@ -28,11 +28,11 @@
 #include "src/arm/asm.S"
 #include "util.S"
 
-// void ipred_dc_128_neon(pixel *dst, const ptrdiff_t stride,
-//                        const pixel *const topleft,
-//                        const int width, const int height, const int a,
-//                        const int max_width, const int max_height);
-function ipred_dc_128_neon, export=1
+// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                             const pixel *const topleft,
+//                             const int width, const int height, const int a,
+//                             const int max_width, const int max_height);
+function ipred_dc_128_8bpc_neon, export=1
         clz             w3,  w3
         adr             x5,  L(ipred_dc_128_tbl)
         sub             w3,  w3,  #25
@@ -97,11 +97,11 @@
         .hword L(ipred_dc_128_tbl) -   4b
 endfunc
 
-// void ipred_v_neon(pixel *dst, const ptrdiff_t stride,
-//                   const pixel *const topleft,
-//                   const int width, const int height, const int a,
-//                   const int max_width, const int max_height);
-function ipred_v_neon, export=1
+// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                        const pixel *const topleft,
+//                        const int width, const int height, const int a,
+//                        const int max_width, const int max_height);
+function ipred_v_8bpc_neon, export=1
         clz             w3,  w3
         adr             x5,  L(ipred_v_tbl)
         sub             w3,  w3,  #25
@@ -170,11 +170,11 @@
         .hword L(ipred_v_tbl) -  40b
 endfunc
 
-// void ipred_h_neon(pixel *dst, const ptrdiff_t stride,
-//                   const pixel *const topleft,
-//                   const int width, const int height, const int a,
-//                   const int max_width, const int max_height);
-function ipred_h_neon, export=1
+// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                        const pixel *const topleft,
+//                        const int width, const int height, const int a,
+//                        const int max_width, const int max_height);
+function ipred_h_8bpc_neon, export=1
         clz             w3,  w3
         adr             x5,  L(ipred_h_tbl)
         sub             w3,  w3,  #25
@@ -251,11 +251,11 @@
         .hword L(ipred_h_tbl) -  4b
 endfunc
 
-// void ipred_dc_top_neon(pixel *dst, const ptrdiff_t stride,
-//                        const pixel *const topleft,
-//                        const int width, const int height, const int a,
-//                        const int max_width, const int max_height);
-function ipred_dc_top_neon, export=1
+// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                             const pixel *const topleft,
+//                             const int width, const int height, const int a,
+//                             const int max_width, const int max_height);
+function ipred_dc_top_8bpc_neon, export=1
         clz             w3,  w3
         adr             x5,  L(ipred_dc_top_tbl)
         sub             w3,  w3,  #25
@@ -351,11 +351,11 @@
         .hword L(ipred_dc_top_tbl) -  40b
 endfunc
 
-// void ipred_dc_left_neon(pixel *dst, const ptrdiff_t stride,
-//                         const pixel *const topleft,
-//                         const int width, const int height, const int a,
-//                         const int max_width, const int max_height);
-function ipred_dc_left_neon, export=1
+// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                              const pixel *const topleft,
+//                              const int width, const int height, const int a,
+//                              const int max_width, const int max_height);
+function ipred_dc_left_8bpc_neon, export=1
         sub             x2,  x2,  w4, uxtw
         clz             w3,  w3
         clz             w7,  w4
@@ -472,11 +472,11 @@
         .hword L(ipred_dc_left_tbl) - L(ipred_dc_left_w4)
 endfunc
 
-// void ipred_dc_neon(pixel *dst, const ptrdiff_t stride,
-//                    const pixel *const topleft,
-//                    const int width, const int height, const int a,
-//                    const int max_width, const int max_height);
-function ipred_dc_neon, export=1
+// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                         const pixel *const topleft,
+//                         const int width, const int height, const int a,
+//                         const int max_width, const int max_height);
+function ipred_dc_8bpc_neon, export=1
         sub             x2,  x2,  w4, uxtw
         add             w7,  w3,  w4             // width + height
         clz             w3,  w3
@@ -687,11 +687,11 @@
         .hword L(ipred_dc_tbl) - L(ipred_dc_w4)
 endfunc
 
-// void ipred_paeth_neon(pixel *dst, const ptrdiff_t stride,
-//                       const pixel *const topleft,
-//                       const int width, const int height, const int a,
-//                       const int max_width, const int max_height);
-function ipred_paeth_neon, export=1
+// void ipred_paeth_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                            const pixel *const topleft,
+//                            const int width, const int height, const int a,
+//                            const int max_width, const int max_height);
+function ipred_paeth_8bpc_neon, export=1
         clz             w9,  w3
         adr             x5,  L(ipred_paeth_tbl)
         sub             w9,  w9,  #25
@@ -864,11 +864,11 @@
         .hword L(ipred_paeth_tbl) -  40b
 endfunc
 
-// void ipred_smooth_neon(pixel *dst, const ptrdiff_t stride,
-//                        const pixel *const topleft,
-//                        const int width, const int height, const int a,
-//                        const int max_width, const int max_height);
-function ipred_smooth_neon, export=1
+// void ipred_smooth_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                             const pixel *const topleft,
+//                             const int width, const int height, const int a,
+//                             const int max_width, const int max_height);
+function ipred_smooth_8bpc_neon, export=1
         movrel          x10, X(sm_weights)
         add             x11, x10, w4, uxtw
         add             x10, x10, w3, uxtw
@@ -1042,11 +1042,11 @@
         .hword L(ipred_smooth_tbl) -  40b
 endfunc
 
-// void ipred_smooth_v_neon(pixel *dst, const ptrdiff_t stride,
-//                          const pixel *const topleft,
-//                          const int width, const int height, const int a,
-//                          const int max_width, const int max_height);
-function ipred_smooth_v_neon, export=1
+// void ipred_smooth_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                               const pixel *const topleft,
+//                               const int width, const int height, const int a,
+//                               const int max_width, const int max_height);
+function ipred_smooth_v_8bpc_neon, export=1
         movrel          x7,  X(sm_weights)
         add             x7,  x7,  w4, uxtw
         clz             w9,  w3
@@ -1180,11 +1180,11 @@
         .hword L(ipred_smooth_v_tbl) -  40b
 endfunc
 
-// void ipred_smooth_h_neon(pixel *dst, const ptrdiff_t stride,
-//                          const pixel *const topleft,
-//                          const int width, const int height, const int a,
-//                          const int max_width, const int max_height);
-function ipred_smooth_h_neon, export=1
+// void ipred_smooth_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                               const pixel *const topleft,
+//                               const int width, const int height, const int a,
+//                               const int max_width, const int max_height);
+function ipred_smooth_h_8bpc_neon, export=1
         movrel          x8,  X(sm_weights)
         add             x8,  x8,  w3, uxtw
         clz             w9,  w3
@@ -1323,11 +1323,11 @@
         .hword L(ipred_smooth_h_tbl) -  40b
 endfunc
 
-// void ipred_filter_neon(pixel *dst, const ptrdiff_t stride,
-//                        const pixel *const topleft,
-//                        const int width, const int height, const int filt_idx,
-//                        const int max_width, const int max_height);
-function ipred_filter_neon, export=1
+// void ipred_filter_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                             const pixel *const topleft,
+//                             const int width, const int height, const int filt_idx,
+//                             const int max_width, const int max_height);
+function ipred_filter_8bpc_neon, export=1
         and             w5,  w5,  #511
         movrel          x6,  X(filter_intra_taps)
         lsl             w5,  w5,  #6
@@ -1483,10 +1483,10 @@
         .hword L(ipred_filter_tbl) -  40b
 endfunc
 
-// void pal_pred_neon(pixel *dst, const ptrdiff_t stride,
-//                    const uint16_t *const pal, const uint8_t *idx,
-//                    const int w, const int h);
-function pal_pred_neon, export=1
+// void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                         const uint16_t *const pal, const uint8_t *idx,
+//                         const int w, const int h);
+function pal_pred_8bpc_neon, export=1
         ld1             {v0.8h}, [x2]
         clz             w9,  w4
         adr             x6,  L(pal_pred_tbl)
@@ -1574,11 +1574,11 @@
         .hword L(pal_pred_tbl) -  4b
 endfunc
 
-// void ipred_cfl_128_neon(pixel *dst, const ptrdiff_t stride,
-//                         const pixel *const topleft,
-//                         const int width, const int height,
-//                         const int16_t *ac, const int alpha);
-function ipred_cfl_128_neon, export=1
+// void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                              const pixel *const topleft,
+//                              const int width, const int height,
+//                              const int16_t *ac, const int alpha);
+function ipred_cfl_128_8bpc_neon, export=1
         clz             w9,  w3
         adr             x7,  L(ipred_cfl_128_tbl)
         sub             w9,  w9,  #26
@@ -1695,11 +1695,11 @@
         .hword L(ipred_cfl_128_tbl) - L(ipred_cfl_splat_w4)
 endfunc
 
-// void ipred_cfl_top_neon(pixel *dst, const ptrdiff_t stride,
-//                         const pixel *const topleft,
-//                         const int width, const int height,
-//                         const int16_t *ac, const int alpha);
-function ipred_cfl_top_neon, export=1
+// void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                              const pixel *const topleft,
+//                              const int width, const int height,
+//                              const int16_t *ac, const int alpha);
+function ipred_cfl_top_8bpc_neon, export=1
         clz             w9,  w3
         adr             x7,  L(ipred_cfl_top_tbl)
         sub             w9,  w9,  #26
@@ -1744,11 +1744,11 @@
         .hword L(ipred_cfl_top_tbl) -  4b
 endfunc
 
-// void ipred_cfl_left_neon(pixel *dst, const ptrdiff_t stride,
-//                          const pixel *const topleft,
-//                          const int width, const int height,
-//                          const int16_t *ac, const int alpha);
-function ipred_cfl_left_neon, export=1
+// void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                               const pixel *const topleft,
+//                               const int width, const int height,
+//                               const int16_t *ac, const int alpha);
+function ipred_cfl_left_8bpc_neon, export=1
         sub             x2,  x2,  w4, uxtw
         clz             w9,  w3
         clz             w8,  w4
@@ -1802,11 +1802,11 @@
         .hword L(ipred_cfl_left_tbl) - L(ipred_cfl_left_h4)
 endfunc
 
-// void ipred_cfl_neon(pixel *dst, const ptrdiff_t stride,
-//                     const pixel *const topleft,
-//                     const int width, const int height,
-//                     const int16_t *ac, const int alpha);
-function ipred_cfl_neon, export=1
+// void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                          const pixel *const topleft,
+//                          const int width, const int height,
+//                          const int16_t *ac, const int alpha);
+function ipred_cfl_8bpc_neon, export=1
         sub             x2,  x2,  w4, uxtw
         add             w8,  w3,  w4             // width + height
         dup             v1.8h,   w6              // alpha
@@ -1942,10 +1942,10 @@
         .hword L(ipred_cfl_tbl) - L(ipred_cfl_w4)
 endfunc
 
-// void cfl_ac_420_neon(int16_t *const ac, const pixel *const ypx,
-//                      const ptrdiff_t stride, const int w_pad,
-//                      const int h_pad, const int cw, const int ch);
-function ipred_cfl_ac_420_neon, export=1
+// void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx,
+//                           const ptrdiff_t stride, const int w_pad,
+//                           const int h_pad, const int cw, const int ch);
+function ipred_cfl_ac_420_8bpc_neon, export=1
         clz             w8,  w5
         lsl             w4,  w4,  #2
         adr             x7,  L(ipred_cfl_ac_420_tbl)
@@ -2260,10 +2260,10 @@
         .hword L(ipred_cfl_ac_420_w16_tbl) - L(ipred_cfl_ac_420_w16_wpad3)
 endfunc
 
-// void cfl_ac_422_neon(int16_t *const ac, const pixel *const ypx,
-//                      const ptrdiff_t stride, const int w_pad,
-//                      const int h_pad, const int cw, const int ch);
-function ipred_cfl_ac_422_neon, export=1
+// void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx,
+//                           const ptrdiff_t stride, const int w_pad,
+//                           const int h_pad, const int cw, const int ch);
+function ipred_cfl_ac_422_8bpc_neon, export=1
         clz             w8,  w5
         lsl             w4,  w4,  #2
         adr             x7,  L(ipred_cfl_ac_422_tbl)
--- a/src/arm/ipred_init_tmpl.c
+++ b/src/arm/ipred_init_tmpl.c
@@ -27,27 +27,27 @@
 #include "src/cpu.h"
 #include "src/ipred.h"
 
-decl_angular_ipred_fn(dav1d_ipred_dc_neon);
-decl_angular_ipred_fn(dav1d_ipred_dc_128_neon);
-decl_angular_ipred_fn(dav1d_ipred_dc_top_neon);
-decl_angular_ipred_fn(dav1d_ipred_dc_left_neon);
-decl_angular_ipred_fn(dav1d_ipred_h_neon);
-decl_angular_ipred_fn(dav1d_ipred_v_neon);
-decl_angular_ipred_fn(dav1d_ipred_paeth_neon);
-decl_angular_ipred_fn(dav1d_ipred_smooth_neon);
-decl_angular_ipred_fn(dav1d_ipred_smooth_v_neon);
-decl_angular_ipred_fn(dav1d_ipred_smooth_h_neon);
-decl_angular_ipred_fn(dav1d_ipred_filter_neon);
+decl_angular_ipred_fn(BF(dav1d_ipred_dc, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_dc_128, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_dc_top, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_dc_left, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_h, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_v, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_paeth, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_smooth, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_smooth_h, neon));
+decl_angular_ipred_fn(BF(dav1d_ipred_filter, neon));
 
-decl_cfl_pred_fn(dav1d_ipred_cfl_neon);
-decl_cfl_pred_fn(dav1d_ipred_cfl_128_neon);
-decl_cfl_pred_fn(dav1d_ipred_cfl_top_neon);
-decl_cfl_pred_fn(dav1d_ipred_cfl_left_neon);
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl, neon));
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, neon));
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, neon));
+decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, neon));
 
-decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_neon);
-decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_neon);
+decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_420, neon));
+decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_422, neon));
 
-decl_pal_pred_fn(dav1d_pal_pred_neon);
+decl_pal_pred_fn(BF(dav1d_pal_pred, neon));
 
 COLD void bitfn(dav1d_intra_pred_dsp_init_arm)(Dav1dIntraPredDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
@@ -55,28 +55,28 @@
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
 
 #if BITDEPTH == 8
-    c->intra_pred[DC_PRED]       = dav1d_ipred_dc_neon;
-    c->intra_pred[DC_128_PRED]   = dav1d_ipred_dc_128_neon;
-    c->intra_pred[TOP_DC_PRED]   = dav1d_ipred_dc_top_neon;
-    c->intra_pred[LEFT_DC_PRED]  = dav1d_ipred_dc_left_neon;
-    c->intra_pred[HOR_PRED]      = dav1d_ipred_h_neon;
-    c->intra_pred[VERT_PRED]     = dav1d_ipred_v_neon;
+    c->intra_pred[DC_PRED]       = BF(dav1d_ipred_dc, neon);
+    c->intra_pred[DC_128_PRED]   = BF(dav1d_ipred_dc_128, neon);
+    c->intra_pred[TOP_DC_PRED]   = BF(dav1d_ipred_dc_top, neon);
+    c->intra_pred[LEFT_DC_PRED]  = BF(dav1d_ipred_dc_left, neon);
+    c->intra_pred[HOR_PRED]      = BF(dav1d_ipred_h, neon);
+    c->intra_pred[VERT_PRED]     = BF(dav1d_ipred_v, neon);
 #if ARCH_AARCH64
-    c->intra_pred[PAETH_PRED]    = dav1d_ipred_paeth_neon;
-    c->intra_pred[SMOOTH_PRED]   = dav1d_ipred_smooth_neon;
-    c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_neon;
-    c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_neon;
-    c->intra_pred[FILTER_PRED]   = dav1d_ipred_filter_neon;
+    c->intra_pred[PAETH_PRED]    = BF(dav1d_ipred_paeth, neon);
+    c->intra_pred[SMOOTH_PRED]   = BF(dav1d_ipred_smooth, neon);
+    c->intra_pred[SMOOTH_V_PRED] = BF(dav1d_ipred_smooth_v, neon);
+    c->intra_pred[SMOOTH_H_PRED] = BF(dav1d_ipred_smooth_h, neon);
+    c->intra_pred[FILTER_PRED]   = BF(dav1d_ipred_filter, neon);
 
-    c->cfl_pred[DC_PRED]         = dav1d_ipred_cfl_neon;
-    c->cfl_pred[DC_128_PRED]     = dav1d_ipred_cfl_128_neon;
-    c->cfl_pred[TOP_DC_PRED]     = dav1d_ipred_cfl_top_neon;
-    c->cfl_pred[LEFT_DC_PRED]    = dav1d_ipred_cfl_left_neon;
+    c->cfl_pred[DC_PRED]         = BF(dav1d_ipred_cfl, neon);
+    c->cfl_pred[DC_128_PRED]     = BF(dav1d_ipred_cfl_128, neon);
+    c->cfl_pred[TOP_DC_PRED]     = BF(dav1d_ipred_cfl_top, neon);
+    c->cfl_pred[LEFT_DC_PRED]    = BF(dav1d_ipred_cfl_left, neon);
 
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_neon;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_neon;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_ipred_cfl_ac_420, neon);
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_ipred_cfl_ac_422, neon);
 
-    c->pal_pred                  = dav1d_pal_pred_neon;
+    c->pal_pred                  = BF(dav1d_pal_pred, neon);
 #endif
 #endif
 }