shithub: libvpx

Download patch

ref: a64a192c90e45aafe77f02c84930a9495ac04c3b
parent: 5576a4e1cbef87394d1251eda9520619051c0a1e
parent: 70ffd5d0558d69f89d92168fcab337ed252df028
author: Jim Bankoski <jimbankoski@google.com>
date: Fri Nov 22 03:16:17 EST 2013

Merge changes Id1698a35,Idcabd0b9

* changes:
  detokenization speedups
  Don't write 0's to token_cache

--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -241,7 +241,8 @@
 }
 
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
-                                    TX_SIZE tx_size, uint8_t *dst, int stride) {
+                                    TX_SIZE tx_size, uint8_t *dst, int stride,
+                                    uint8_t *token_cache) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const int eob = pd->eobs[block];
   if (eob > 0) {
@@ -274,13 +275,20 @@
 
     if (eob == 1) {
       vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
+      vpx_memset(token_cache, 0, 2 * sizeof(token_cache[0]));
     } else {
-      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) {
         vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
-      else if (tx_size == TX_32X32 && eob <= 34)
+        vpx_memset(token_cache, 0,
+                   4 * (4 << tx_size) * sizeof(token_cache[0]));
+      } else if (tx_size == TX_32X32 && eob <= 34) {
         vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
-      else
+        vpx_memset(token_cache, 0, 256 * sizeof(token_cache[0]));
+      } else {
         vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
+        vpx_memset(token_cache, 0,
+                   (16 << (tx_size << 1)) * sizeof(token_cache[0]));
+      }
     }
   }
 }
@@ -319,7 +327,8 @@
   if (!mi->mbmi.skip_coeff) {
     vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size,
                             args->r, args->token_cache);
-    inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride);
+    inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
+                            args->token_cache);
   }
 }
 
@@ -345,7 +354,7 @@
                                              args->r, args->token_cache);
   inverse_transform_block(xd, plane, block, tx_size,
                           &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
-                          pd->dst.stride);
+                          pd->dst.stride, args->token_cache);
 }
 
 static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -946,6 +955,7 @@
     pd[i].dqcoeff = tile_data->dqcoeff[i];
     pd[i].eobs    = tile_data->eobs[i];
     vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+    vpx_memset(tile_data->token_cache, 0, sizeof(tile_data->token_cache));
   }
 }
 
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -81,6 +81,7 @@
     INCREMENT_COUNT(token);                              \
     token_cache[scan[c]] = vp9_pt_energy_class[token];   \
     ++c;                                                 \
+    pt = get_coef_context(nb, token_cache, c);           \
     dqv = dq[1];                                          \
     continue;                                            \
   }
@@ -118,8 +119,6 @@
 
   while (c < seg_eob) {
     int val;
-    if (c)
-      pt = get_coef_context(nb, token_cache, c);
     band = *band_translate++;
     prob = coef_probs[band][pt];
     if (!cm->frame_parallel_decoding_mode)
@@ -126,23 +125,18 @@
       ++eob_branch_count[band][pt];
     if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
       break;
-    goto DECODE_ZERO;
 
-  SKIP_START:
-    if (c >= seg_eob)
-      break;
-    if (c)
-      pt = get_coef_context(nb, token_cache, c);
-    band = *band_translate++;
-    prob = coef_probs[band][pt];
-
   DECODE_ZERO:
     if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
       INCREMENT_COUNT(ZERO_TOKEN);
-      token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
-      dqv = dq[1];                                          \
+      dqv = dq[1];
       ++c;
-      goto SKIP_START;
+      if (c >= seg_eob)
+        break;
+      pt = get_coef_context(nb, token_cache, c);
+      band = *band_translate++;
+      prob = coef_probs[band][pt];
+      goto DECODE_ZERO;
     }
 
     // ONE_CONTEXT_NODE_0_