shithub: dav1d

Download patch

ref: 74c9802f42842d458165f5acedc5307fda383f7b
parent: b865e6c7141f3cb7d56f8868a307fe24905b17a4
author: David Michael Barr <b@rr-dav.id.au>
date: Sat Sep 29 18:35:58 EDT 2018

Remove DC row buffer for CfL prediction

--- a/src/ipred.c
+++ b/src/ipred.c
@@ -881,14 +881,13 @@
 
 static NOINLINE void
 cfl_pred_1_c(pixel *dst, const ptrdiff_t stride, const int16_t *ac,
-             const pixel *const dc_pred, const int8_t alpha,
-             const int width, const int height)
+             const int8_t alpha, const int width, const int height)
 {
+    const pixel dc = *dst;
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
             const int diff = alpha * ac[x];
-            dst[x] = iclip_pixel(dc_pred[0] + apply_sign((abs(diff) + 32) >> 6,
-                                                         diff));
+            dst[x] = iclip_pixel(dc + apply_sign((abs(diff) + 32) >> 6, diff));
         }
         ac += width;
         dst += PXSTRIDE(stride);
@@ -899,11 +898,10 @@
 static void cfl_pred_1_##width##xN_c(pixel *const dst, \
                                      const ptrdiff_t stride, \
                                      const int16_t *const ac, \
-                                     const pixel *const dc_pred, \
                                      const int8_t alpha, \
                                      const int height) \
 { \
-    cfl_pred_1_c(dst, stride, ac, dc_pred, alpha, width, height); \
+    cfl_pred_1_c(dst, stride, ac, alpha, width, height); \
 }
 
 cfl_pred_1_fn( 4)
@@ -912,18 +910,16 @@
 cfl_pred_1_fn(32)
 
 static NOINLINE void
-cfl_pred_c(pixel *dstU, pixel *dstV, const ptrdiff_t stride,
-           const int16_t *ac, const pixel *const dc_pred,
+cfl_pred_c(pixel *dstU, pixel *dstV, const ptrdiff_t stride, const int16_t *ac,
            const int8_t *const alphas, const int width, const int height)
 {
+    const pixel dcU = *dstU, dcV = *dstV;
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
             const int diff1 = alphas[0] * ac[x];
-            dstU[x] = iclip_pixel(dc_pred[ 0] + apply_sign((abs(diff1) + 32) >> 6,
-                                                           diff1));
+            dstU[x] = iclip_pixel(dcU + apply_sign((abs(diff1) + 32) >> 6, diff1));
             const int diff2 = alphas[1] * ac[x];
-            dstV[x] = iclip_pixel(dc_pred[32] + apply_sign((abs(diff2) + 32) >> 6,
-                                                           diff2));
+            dstV[x] = iclip_pixel(dcV + apply_sign((abs(diff2) + 32) >> 6, diff2));
         }
         ac += width;
         dstU += PXSTRIDE(stride);
@@ -936,11 +932,10 @@
                                    pixel *const dstV, \
                                    const ptrdiff_t stride, \
                                    const int16_t *const ac, \
-                                   const pixel *const dc_pred, \
                                    const int8_t *const alphas, \
                                    const int height) \
 { \
-    cfl_pred_c(dstU, dstV, stride, ac, dc_pred, alphas, width, height); \
+    cfl_pred_c(dstU, dstV, stride, ac, alphas, width, height); \
 }
 
 cfl_pred_fn( 4)
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -55,26 +55,23 @@
 typedef decl_cfl_ac_fn(*cfl_ac_fn);
 
 /*
- * dst[x,y] = dc + alpha * ac[x,y]
+ * dst[x,y] += alpha * ac[x,y]
  * - alpha contains a q3 scalar in [-16,16] range;
- * - dc_pred[] is the first line of the plane's DC prediction
  */
 #define decl_cfl_pred_1_fn(name) \
 void (name)(pixel *dst, ptrdiff_t stride, \
-            const int16_t *ac, const pixel *dc_pred, \
-            const int8_t alpha, const int height)
+            const int16_t *ac, const int8_t alpha, \
+            const int height)
 typedef decl_cfl_pred_1_fn(*cfl_pred_1_fn);
 
 /*
- * dst[plane][x,y] = dc[plane] + alpha[plane] * ac[x,y]
+ * dst[plane][x,y] += alpha[plane] * ac[x,y]
  * - alphas contains two q3 scalars (one for each plane) in [-16,16] range;
- * - dc_pred[] is the first line of each plane's DC prediction, the second plane
- *   starting at an offset of 16 * sizeof(pixel) bytes.
  */
 #define decl_cfl_pred_fn(name) \
 void (name)(pixel *u_dst, pixel *v_dst, ptrdiff_t stride, \
-            const int16_t *ac, const pixel *dc_pred, \
-            const int8_t *const alphas, const int height)
+            const int16_t *ac, const int8_t *const alphas, \
+            const int height)
 typedef decl_cfl_pred_fn(*cfl_pred_fn);
 
 /*
--- a/src/recon.c
+++ b/src/recon.c
@@ -836,7 +836,6 @@
                 assert(!init_x && !init_y);
 
                 int16_t *const ac = t->scratch.ac;
-                ALIGN_STK_32(pixel, uv_pred, 2 * 32,);
                 pixel *y_src = ((pixel *) f->cur.p.data[0]) + 4 * (t->bx & ~ss_hor) +
                                  4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.p.stride[0]);
                 const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
@@ -870,13 +869,7 @@
                                                     top_sb_edge, DC_PRED, &angle,
                                                     cfl_uv_t_dim->w,
                                                     cfl_uv_t_dim->h, edge);
-                    if (b->cfl_alpha[pl]) {
-                      dsp->ipred.intra_pred[cfl_uvtx][m](&uv_pred[32 * pl],
-                                                         0, edge, 0);
-                    } else {
-                      dsp->ipred.intra_pred[cfl_uvtx][m](uv_dst[pl], stride,
-                                                         edge, 0);
-                    }
+                    dsp->ipred.intra_pred[cfl_uvtx][m](uv_dst[pl], stride, edge, 0);
                 }
                 const int furthest_r =
                     ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
@@ -889,14 +882,12 @@
                 if (b->cfl_alpha[0] && b->cfl_alpha[1]) {
                   dsp->ipred.cfl_pred[cfl_uv_t_dim->lw](uv_dst[0],
                                                         uv_dst[1], stride,
-                                                        ac, uv_pred,
-                                                        b->cfl_alpha,
+                                                        ac, b->cfl_alpha,
                                                         cbh4 * 4);
                 } else {
                   const int pl = !b->cfl_alpha[0];
                   dsp->ipred.cfl_pred_1[cfl_uv_t_dim->lw](uv_dst[pl],
                                                           stride, ac,
-                                                          &uv_pred[32 * pl],
                                                           b->cfl_alpha[pl],
                                                           cbh4 * 4);
                 }