shithub: libvpx

Download patch

ref: 8783a8a97c6caa6ca5be81aab66fe83726b13077
parent: 8bf791e7ef59ed55252b9dc9e6b44f672147404d
author: Jingning Han <jingning@google.com>
date: Wed Jul 8 05:15:39 EDT 2015

Refactor transform block loop for inter mode decoding

Rework the inter mode transform block decoding loop. Replace the
block index with the row and col index as the input argument. It
saves function call to compute the row and col index according to
the block index and overall block size, and many if statements
associated with the transform block position relative to the coding
block. For the test bit-stream pedestrian_area 1080p at 5 Mbps,
the decoding speed goes up from 81.13 fps to 81.92 fps.

Note that the intra coded block decoding needs more refactoring
work than the inter ones. So keep it using foreach_transforme_block
as for now.

Change-Id: I5622bdae7be28ed5af96693274057f55ba9b4fb4

--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -406,20 +406,20 @@
   int seg_id;
 };
 
-static void reconstruct_inter_block(int plane, int block,
+static void reconstruct_inter_block(int plane, int row, int col,
                                     BLOCK_SIZE plane_bsize,
-                                    TX_SIZE tx_size, void *arg) {
+                                    TX_SIZE tx_size, struct inter_args *arg) {
   struct inter_args *args = (struct inter_args *)arg;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  int x, y, eob;
+  int eob;
   const scan_order *sc = &vp9_default_scan_orders[tx_size];
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
   eob = vp9_decode_block_tokens(xd, plane, sc, plane_bsize,
-                                x, y, tx_size, args->r, args->seg_id);
+                                col, row, tx_size, args->r, args->seg_id);
+
   inverse_transform_block_inter(xd, plane, tx_size,
-                                &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
-                                pd->dst.stride, eob);
+                            &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+                            pd->dst.stride, eob);
   *args->eobtotal += eob;
 }
 
@@ -838,7 +838,27 @@
     if (!mbmi->skip) {
       int eobtotal = 0;
       struct inter_args arg = {xd, r, &eobtotal, mbmi->segment_id};
-      vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
+      int plane;
+
+      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+        const struct macroblockd_plane *const pd = &xd->plane[plane];
+        const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
+                                      : mbmi->tx_size;
+        const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+        const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+        const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+        const int step = (1 << tx_size);
+        int r, c;
+        const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
+            0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+        const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
+            0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+        for (r = 0; r < max_blocks_high; r += step)
+          for (c = 0; c < max_blocks_wide; c += step)
+            reconstruct_inter_block(plane, r, c, plane_bsize, tx_size, &arg);
+      }
+
       if (!less8x8 && eobtotal == 0)
         mbmi->skip = 1;  // skip loopfilter
     }