shithub: libvpx

--- a/vp9/common/vp9_blockd.h

+++ b/vp9/common/vp9_blockd.h

@@ -238,6 +238,9 @@

   /* pointers to reference frames */

   const YV12_BUFFER_CONFIG *ref_buf[2];

+  /* pointer to current frame */

+  const YV12_BUFFER_CONFIG *cur_buf;

   int lossless;

   /* Inverse transform function pointers. */

   void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);

@@ -407,44 +410,6 @@

   const int raster_mb = block >> (tx_size << 1);

   *x = (raster_mb & (tx_cols - 1)) << tx_size;

   *y = (raster_mb >> tx_cols_log2) << tx_size;

-}

-static void extend_for_intra(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,

-                             int plane, int aoff, int loff) {

-  struct macroblockd_plane *const pd = &xd->plane[plane];

-  uint8_t *const buf = pd->dst.buf;

-  const int stride = pd->dst.stride;

-  const int x = aoff * 4 - 1;

-  const int y = loff * 4 - 1;

-  // Copy a pixel into the umv if we are in a situation where the block size

-  // extends into the UMV.

-  // TODO(JBB): Should be able to do the full extend in place so we don't have

-  // to do this multiple times.

-  if (xd->mb_to_right_edge < 0) {

-    const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];

-    const int umv_border_start = bw + (xd->mb_to_right_edge >>

-                                       (3 + pd->subsampling_x));

-    if (x + bw > umv_border_start)

-      vpx_memset(&buf[y * stride + umv_border_start],

-                 buf[y * stride + umv_border_start - 1], bw);

-  }

-  if (xd->mb_to_bottom_edge < 0) {

-    if (xd->left_available || x >= 0) {

-      const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];

-      const int umv_border_start =

-          bh + (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y));

-      if (y + bh > umv_border_start) {

-        const uint8_t c = buf[(umv_border_start - 1) * stride + x];

-        uint8_t *d = &buf[umv_border_start * stride + x];

-        int i;

-        for (i = 0; i < bh; ++i, d += stride)

-          *d = c;

-      }

-    }

-  }

 static void set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,

--- a/vp9/common/vp9_reconintra.c

+++ b/vp9/common/vp9_reconintra.c

@@ -313,11 +313,12 @@

 #undef intra_pred_allsizes

-static void build_intra_predictors(const uint8_t *ref, int ref_stride,

-                                   uint8_t *dst, int dst_stride,

+static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,

+                                   int ref_stride, uint8_t *dst, int dst_stride,

                                    MB_PREDICTION_MODE mode, TX_SIZE tx_size,

                                    int up_available, int left_available,

-                                   int right_available) {

+                                   int right_available, int x, int y,

+                                   int plane) {

   int i;

   DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);

   DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);

@@ -324,6 +325,9 @@

   uint8_t *above_row = above_data + 16;

   const uint8_t *const_above_row = above_row;

   const int bs = 4 << tx_size;

+  int frame_width, frame_height;

+  int x0, y0;

+  const struct macroblockd_plane *const pd = &xd->plane[plane];

   // 127 127 127 .. 127 127 127 127 127 127

   // 129  A   B  ..  Y   Z

@@ -334,26 +338,90 @@

   once(init_intra_pred_fn_ptrs);

+  // Get current frame pointer, width and height.

+  if (plane == 0) {

+    frame_width = xd->cur_buf->y_width;

+    frame_height = xd->cur_buf->y_height;

+  } else {

+    frame_width = xd->cur_buf->uv_width;

+    frame_height = xd->cur_buf->uv_height;

+  }

+  // Get block position in current frame.

+  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;

+  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;

   // left

   if (left_available) {

-    for (i = 0; i < bs; i++)

-      left_col[i] = ref[i * ref_stride - 1];

+    if (xd->mb_to_bottom_edge < 0) {

+      /* slower path if the block needs border extension */

+      if (y0 + bs <= frame_height) {

+        for (i = 0; i < bs; ++i)

+          left_col[i] = ref[i * ref_stride - 1];

+      } else {

+        const int extend_bottom = frame_height - y0;

+        for (i = 0; i < extend_bottom; ++i)

+          left_col[i] = ref[i * ref_stride - 1];

+        for (; i < bs; ++i)

+          left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];

+      }

+    } else {

+      /* faster path if the block does not need extension */

+      for (i = 0; i < bs; ++i)

+        left_col[i] = ref[i * ref_stride - 1];

+    }

   } else {

     vpx_memset(left_col, 129, bs);

+  // TODO(hkuang) do not extend 2*bs pixels for all modes.

   // above

   if (up_available) {

     const uint8_t *above_ref = ref - ref_stride;

-    if (bs == 4 && right_available && left_available) {

-      const_above_row = above_ref;

+    if (xd->mb_to_right_edge < 0) {

+      /* slower path if the block needs border extension */

+      if (x0 + 2 * bs <= frame_width) {

+        if (right_available && bs == 4) {

+          vpx_memcpy(above_row - 1, above_ref - 1, 2 * bs + 1);

+        } else {

+          vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);

+          vpx_memset(above_row + bs, above_row[bs - 1], bs);

+        }

+      } else if (x0 + bs <= frame_width) {

+        const int r = frame_width - x0;

+        if (right_available && bs == 4) {

+          vpx_memcpy(above_row - 1, above_ref - 1, r + 1);

+          vpx_memset(above_row + r, above_row[r - 1],

+                     x0 + 2 * bs - frame_width);

+        } else {

+          vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);

+          vpx_memset(above_row + bs, above_row[bs - 1], bs);

+        }

+      } else if (x0 <= frame_width) {

+        const int r = frame_width - x0;

+        if (right_available && bs == 4) {

+          vpx_memcpy(above_row - 1, above_ref - 1, r + 1);

+          vpx_memset(above_row + r, above_row[r - 1],

+                     x0 + 2 * bs - frame_width);

+        } else {

+          vpx_memcpy(above_row - 1, above_ref - 1, r + 1);

+          vpx_memset(above_row + r, above_row[r - 1],

+                     x0 + 2 * bs - frame_width);

+        }

+        above_row[-1] = left_available ? above_ref[-1] : 129;

+      }

     } else {

-      vpx_memcpy(above_row, above_ref, bs);

-      if (bs == 4 && right_available)

-        vpx_memcpy(above_row + bs, above_ref + bs, bs);

-      else

-        vpx_memset(above_row + bs, above_row[bs - 1], bs);

-      above_row[-1] = left_available ? above_ref[-1] : 129;

+      /* faster path if the block does not need extension */

+      if (bs == 4 && right_available && left_available) {

+        const_above_row = above_ref;

+      } else {

+        vpx_memcpy(above_row, above_ref, bs);

+        if (bs == 4 && right_available)

+          vpx_memcpy(above_row + bs, above_ref + bs, bs);

+        else

+          vpx_memset(above_row + bs, above_row[bs - 1], bs);

+        above_row[-1] = left_available ? above_ref[-1] : 129;

+      }

   } else {

     vpx_memset(above_row, 127, bs * 2);

@@ -370,16 +438,19 @@

 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,

-                            TX_SIZE tx_size, int mode,

-                            const uint8_t *ref, int ref_stride,

-                            uint8_t *dst, int dst_stride) {

+                             TX_SIZE tx_size, int mode,

+                             const uint8_t *ref, int ref_stride,

+                             uint8_t *dst, int dst_stride,

+                             int aoff, int loff, int plane) {

   const int bwl = bwl_in - tx_size;

   const int wmask = (1 << bwl) - 1;

   const int have_top = (block_idx >> bwl) || xd->up_available;

   const int have_left = (block_idx & wmask) || xd->left_available;

   const int have_right = ((block_idx & wmask) != wmask);

+  const int x = aoff * 4;

+  const int y = loff * 4;

   assert(bwl >= 0);

-  build_intra_predictors(ref, ref_stride, dst, dst_stride, mode, tx_size,

-                         have_top, have_left, have_right);

+  build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,

+                         have_top, have_left, have_right, x, y, plane);

--- a/vp9/common/vp9_reconintra.h

+++ b/vp9/common/vp9_reconintra.h

@@ -17,5 +17,6 @@

 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,

                              TX_SIZE tx_size, int mode,

                              const uint8_t *ref, int ref_stride,

-                             uint8_t *dst, int dst_stride);

+                             uint8_t *dst, int dst_stride,

+                             int aoff, int loff, int plane);

 #endif  // VP9_COMMON_VP9_RECONINTRA_H_

--- a/vp9/decoder/vp9_decodeframe.c

+++ b/vp9/decoder/vp9_decodeframe.c

@@ -305,12 +305,10 @@

   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);

   dst = &pd->dst.buf[4 * y * pd->dst.stride + 4 * x];

-  if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0)

-    extend_for_intra(xd, plane_bsize, plane, x, y);

   vp9_predict_intra_block(xd, block >> (tx_size << 1),

                           b_width_log2(plane_bsize), tx_size, mode,

-                          dst, pd->dst.stride, dst, pd->dst.stride);

+                          dst, pd->dst.stride, dst, pd->dst.stride,

+                          x, y, plane);

   if (!mi->mbmi.skip_coeff) {

     const int eob = vp9_decode_block_tokens(cm, xd, plane, block,

@@ -1333,6 +1331,7 @@

   const int tile_rows = 1 << cm->log2_tile_rows;

   const int tile_cols = 1 << cm->log2_tile_cols;

   YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);

+  xd->cur_buf = new_fb;

   if (!first_partition_size) {

       // showing a frame directly

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -548,6 +548,9 @@

                           src->alpha_stride};

   int i;

+  // Set current frame pointer.

+  x->e_mbd.cur_buf = src;

   for (i = 0; i < MAX_MB_PLANE; i++)

     setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,

                      NULL, x->e_mbd.plane[i].subsampling_x,

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -547,9 +547,6 @@

   src = &p->src.buf[4 * (j * p->src.stride + i)];

   src_diff = &p->src_diff[4 * (j * diff_stride + i)];

-  if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0)

-    extend_for_intra(xd, plane_bsize, plane, i, j);

   // if (x->optimize)

   // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);

@@ -560,7 +557,7 @@

       vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,

                               x->skip_encode ? src : dst,

                               x->skip_encode ? p->src.stride : pd->dst.stride,

-                              dst, pd->dst.stride);

+                              dst, pd->dst.stride, i, j, plane);

       if (!x->skip_recode) {

         vp9_subtract_block(32, 32, src_diff, diff_stride,

                            src, p->src.stride, dst, pd->dst.stride);

@@ -583,7 +580,7 @@

       vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,

                               x->skip_encode ? src : dst,

                               x->skip_encode ? p->src.stride : pd->dst.stride,

-                              dst, pd->dst.stride);

+                              dst, pd->dst.stride, i, j, plane);

       if (!x->skip_recode) {

         vp9_subtract_block(16, 16, src_diff, diff_stride,

                            src, p->src.stride, dst, pd->dst.stride);

@@ -602,7 +599,7 @@

       vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,

                               x->skip_encode ? src : dst,

                               x->skip_encode ? p->src.stride : pd->dst.stride,

-                              dst, pd->dst.stride);

+                              dst, pd->dst.stride, i, j, plane);

       if (!x->skip_recode) {

         vp9_subtract_block(8, 8, src_diff, diff_stride,

                            src, p->src.stride, dst, pd->dst.stride);

@@ -625,7 +622,7 @@

       vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,

                               x->skip_encode ? src : dst,

                               x->skip_encode ? p->src.stride : pd->dst.stride,

-                              dst, pd->dst.stride);

+                              dst, pd->dst.stride, i, j, plane);

       if (!x->skip_recode) {

         vp9_subtract_block(4, 4, src_diff, diff_stride,

--- a/vp9/encoder/vp9_mbgraph.c

+++ b/vp9/encoder/vp9_mbgraph.c

@@ -152,7 +152,8 @@

     xd->mi_8x8[0]->mbmi.mode = mode;

     vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode,

                             x->plane[0].src.buf, x->plane[0].src.stride,

-                            xd->plane[0].dst.buf, xd->plane[0].dst.stride);

+                            xd->plane[0].dst.buf, xd->plane[0].dst.stride,

+                            0, 0, 0);

     err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,

                        xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err);

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1042,7 +1042,7 @@

                                 TX_4X4, mode,

                                 x->skip_encode ? src : dst,

                                 x->skip_encode ? src_stride : dst_stride,

-                                dst, dst_stride);

+                                dst, dst_stride, idx, idy, 0);

         vp9_subtract_block(4, 4, src_diff, 8,

                            src, src_stride,

                            dst, dst_stride);

--

⑨