shithub: dav1d

Download patch

ref: c2a2e6ee187f854a03973318cd00edbf0269725d
parent: 5bc8a5002beadaa195fa99df46f10a793055a533
author: Martin Storsjö <martin@martin.st>
date: Wed Feb 5 05:43:14 EST 2020

arm: looprestoration: Fix the wiener C wrapper function for high bitdepths

Use HIGHBD_DECL_SUFFIX and HIGHBD_TAIL_SUFFIX where necessary, add
a missing sizeof(pixel).

--- a/src/arm/looprestoration_init_tmpl.c
+++ b/src/arm/looprestoration_init_tmpl.c
@@ -44,7 +44,8 @@
 void BF(dav1d_wiener_filter_h, neon)(int16_t *dst, const pixel (*left)[4],
                                      const pixel *src, ptrdiff_t stride,
                                      const int16_t fh[7], const intptr_t w,
-                                     int h, enum LrEdgeFlags edges);
+                                     int h, enum LrEdgeFlags edges
+                                     HIGHBD_DECL_SUFFIX);
 // This calculates things slightly differently than the reference C version.
 // This version calculates roughly this:
 // fv[3] += 128;
@@ -56,7 +57,7 @@
 void BF(dav1d_wiener_filter_v, neon)(pixel *dst, ptrdiff_t stride,
                                      const int16_t *mid, int w, int h,
                                      const int16_t fv[7], enum LrEdgeFlags edges,
-                                     ptrdiff_t mid_stride);
+                                     ptrdiff_t mid_stride HIGHBD_DECL_SUFFIX);
 void BF(dav1d_copy_narrow, neon)(pixel *dst, ptrdiff_t stride,
                                  const pixel *src, int w, int h);
 
@@ -64,7 +65,8 @@
                                const pixel (*const left)[4],
                                const pixel *lpf, const ptrdiff_t lpf_stride,
                                const int w, const int h, const int16_t fh[7],
-                               const int16_t fv[7], const enum LrEdgeFlags edges)
+                               const int16_t fv[7], const enum LrEdgeFlags edges
+                               HIGHBD_DECL_SUFFIX)
 {
     ALIGN_STK_16(int16_t, mid, 68 * 384,);
     int mid_stride = (w + 7) & ~7;
@@ -71,27 +73,31 @@
 
     // Horizontal filter
     BF(dav1d_wiener_filter_h, neon)(&mid[2 * mid_stride], left, dst, dst_stride,
-                                    fh, w, h, edges);
+                                    fh, w, h, edges HIGHBD_TAIL_SUFFIX);
     if (edges & LR_HAVE_TOP)
         BF(dav1d_wiener_filter_h, neon)(mid, NULL, lpf, lpf_stride,
-                                        fh, w, 2, edges);
+                                        fh, w, 2, edges HIGHBD_TAIL_SUFFIX);
     if (edges & LR_HAVE_BOTTOM)
         BF(dav1d_wiener_filter_h, neon)(&mid[(2 + h) * mid_stride], NULL,
                                         lpf + 6 * PXSTRIDE(lpf_stride),
-                                        lpf_stride, fh, w, 2, edges);
+                                        lpf_stride, fh, w, 2, edges
+                                        HIGHBD_TAIL_SUFFIX);
 
     // Vertical filter
     if (w >= 8)
         BF(dav1d_wiener_filter_v, neon)(dst, dst_stride, &mid[2*mid_stride],
                                         w & ~7, h, fv, edges,
-                                        mid_stride * sizeof(*mid));
+                                        mid_stride * sizeof(*mid)
+                                        HIGHBD_TAIL_SUFFIX);
     if (w & 7) {
         // For uneven widths, do a full 8 pixel wide filtering into a temp
         // buffer and copy out the narrow slice of pixels separately into dest.
         ALIGN_STK_16(pixel, tmp, 64 * 8,);
-        BF(dav1d_wiener_filter_v, neon)(tmp, w & 7, &mid[2*mid_stride + (w & ~7)],
+        BF(dav1d_wiener_filter_v, neon)(tmp, (w & 7) * sizeof(pixel),
+                                        &mid[2*mid_stride + (w & ~7)],
                                         w & 7, h, fv, edges,
-                                        mid_stride * sizeof(*mid));
+                                        mid_stride * sizeof(*mid)
+                                        HIGHBD_TAIL_SUFFIX);
         BF(dav1d_copy_narrow, neon)(dst + (w & ~7), dst_stride, tmp, w & 7, h);
     }
 }