shithub: libvpx

Download patch

ref: cccad1c5de41e29d6cc64c6e11a1d91c8486d090
parent: 0ede9f52b796b6d8e02046b24f68a3db8b9f5920
author: Jingning Han <jingning@google.com>
date: Tue Jul 7 07:36:05 EDT 2015

Reduce dqcoeff array size in decoder

The decoding process handles detokenization and reconstruction per
transform block sequentially. There is no need to offset the dqcoeff
buffer according to the transform block index. This allows to
reduce the memory spill and improve cache performance.

Change-Id: Ibb8bfe532a7a08fcabaf6d42cbec1e986901d32d

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -188,8 +188,11 @@
 #endif
 
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
+#if CONFIG_VP9_ENCODER
   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[64 * 64]);
-
+#else
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
+#endif
   int lossless;
   int corrupted;
 
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -188,7 +188,7 @@
   struct macroblockd_plane *const pd = &xd->plane[plane];
   if (eob > 0) {
     TX_TYPE tx_type = DCT_DCT;
-    tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+    tran_low_t *const dqcoeff = pd->dqcoeff;
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       if (xd->lossless) {
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -217,7 +217,7 @@
                                                pd->left_context + y);
   const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
   const int eob = decode_coefs(xd, pd->plane_type,
-                               BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
+                               pd->dqcoeff, tx_size,
                                dequant, ctx, so->scan, so->neighbors, r);
   vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
   return eob;