shithub: libvpx

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -26,7 +26,8 @@

 // Structure to hold snapshot of coding context during the mode picking process

 typedef struct {

   MODE_INFO mic;

-  uint8_t zcoeff_blk[256];

+  uint8_t *zcoeff_blk;

+  int num_4x4_blk;

   int skip;

   int_mv best_ref_mv;

   int_mv second_best_ref_mv;

@@ -176,6 +177,45 @@

   void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,

                          int y_blocks);

};

+// TODO(jingning): the variables used here are little complicated. need further

+// refactoring on organizing the temporary buffers, when recursive

+// partition down to 4x4 block size is enabled.

+static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {

+  MACROBLOCKD *const xd = &x->e_mbd;

+  switch (bsize) {

+    case BLOCK_64X64:

+      return &x->sb64_context;

+    case BLOCK_64X32:

+      return &x->sb64x32_context[xd->sb_index];

+    case BLOCK_32X64:

+      return &x->sb32x64_context[xd->sb_index];

+    case BLOCK_32X32:

+      return &x->sb32_context[xd->sb_index];

+    case BLOCK_32X16:

+      return &x->sb32x16_context[xd->sb_index][xd->mb_index];

+    case BLOCK_16X32:

+      return &x->sb16x32_context[xd->sb_index][xd->mb_index];

+    case BLOCK_16X16:

+      return &x->mb_context[xd->sb_index][xd->mb_index];

+    case BLOCK_16X8:

+      return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];

+    case BLOCK_8X16:

+      return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];

+    case BLOCK_8X8:

+      return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];

+    case BLOCK_8X4:

+      return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];

+    case BLOCK_4X8:

+      return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];

+    case BLOCK_4X4:

+      return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];

+    default:

+      assert(0);

+      return NULL;

+  }

+}

 struct rdcost_block_args {

   MACROBLOCK *x;

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -419,7 +419,7 @@

   x->skip = ctx->skip;

   vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,

-             sizeof(ctx->zcoeff_blk));

+             sizeof(uint8_t) * ctx->num_4x4_blk);

   if (!output_enabled)

     return;

@@ -696,45 +696,6 @@

     // Count of last ref frame 0,0 usage

     if (mbmi->mode == ZEROMV && mbmi->ref_frame[0] == LAST_FRAME)

       cpi->inter_zz_count++;

-  }

-}

-// TODO(jingning): the variables used here are little complicated. need further

-// refactoring on organizing the temporary buffers, when recursive

-// partition down to 4x4 block size is enabled.

-static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {

-  MACROBLOCKD *const xd = &x->e_mbd;

-  switch (bsize) {

-    case BLOCK_64X64:

-      return &x->sb64_context;

-    case BLOCK_64X32:

-      return &x->sb64x32_context[xd->sb_index];

-    case BLOCK_32X64:

-      return &x->sb32x64_context[xd->sb_index];

-    case BLOCK_32X32:

-      return &x->sb32_context[xd->sb_index];

-    case BLOCK_32X16:

-      return &x->sb32x16_context[xd->sb_index][xd->mb_index];

-    case BLOCK_16X32:

-      return &x->sb16x32_context[xd->sb_index][xd->mb_index];

-    case BLOCK_16X16:

-      return &x->mb_context[xd->sb_index][xd->mb_index];

-    case BLOCK_16X8:

-      return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];

-    case BLOCK_8X16:

-      return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];

-    case BLOCK_8X8:

-      return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];

-    case BLOCK_8X4:

-      return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];

-    case BLOCK_4X8:

-      return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];

-    case BLOCK_4X4:

-      return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];

-    default:

-      assert(0);

-      return NULL;

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -1414,6 +1414,94 @@

   } while (++i <= MV_MAX);

+static void init_pick_mode_context(VP9_COMP *cpi) {

+  int i;

+  MACROBLOCK  *x  = &cpi->mb;

+  MACROBLOCKD *xd = &x->e_mbd;

+  VP9_COMMON  *cm = &cpi->common;

+  for (i = 0; i < BLOCK_SIZES; ++i) {

+    const int num_4x4_w = num_4x4_blocks_wide_lookup[i];

+    const int num_4x4_h = num_4x4_blocks_high_lookup[i];

+    const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);

+    if (i < BLOCK_16X16) {

+      for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {

+        for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) {

+          for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) {

+            PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+            ctx->num_4x4_blk = num_4x4_blk;

+            CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,

+                            vpx_calloc(num_4x4_blk, sizeof(uint8_t)));

+          }

+        }

+      }

+    } else if (i < BLOCK_32X32) {

+      for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {

+        for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk;

+                               ++xd->mb_index) {

+          PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+          ctx->num_4x4_blk = num_4x4_blk;

+          CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,

+                          vpx_calloc(num_4x4_blk, sizeof(uint8_t)));

+        }

+      }

+    } else if (i < BLOCK_64X64) {

+      for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) {

+        PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+        ctx->num_4x4_blk = num_4x4_blk;

+        CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,

+                        vpx_calloc(num_4x4_blk, sizeof(uint8_t)));

+      }

+    } else {

+      PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+      ctx->num_4x4_blk = num_4x4_blk;

+      CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,

+                      vpx_calloc(num_4x4_blk, sizeof(uint8_t)));

+    }

+  }

+}

+static void free_pick_mode_context(MACROBLOCK *x) {

+  int i;

+  MACROBLOCKD *xd = &x->e_mbd;

+  for (i = 0; i < BLOCK_SIZES; ++i) {

+    const int num_4x4_w = num_4x4_blocks_wide_lookup[i];

+    const int num_4x4_h = num_4x4_blocks_high_lookup[i];

+    const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);

+    if (i < BLOCK_16X16) {

+      for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {

+        for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) {

+          for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) {

+            PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+            vpx_free(ctx->zcoeff_blk);

+            ctx->zcoeff_blk = 0;

+          }

+        }

+      }

+    } else if (i < BLOCK_32X32) {

+      for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {

+        for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk;

+                               ++xd->mb_index) {

+          PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+          vpx_free(ctx->zcoeff_blk);

+          ctx->zcoeff_blk = 0;

+        }

+      }

+    } else if (i < BLOCK_64X64) {

+      for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) {

+        PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+        vpx_free(ctx->zcoeff_blk);

+        ctx->zcoeff_blk = 0;

+      }

+    } else {

+      PICK_MODE_CONTEXT *ctx = get_block_context(x, i);

+      vpx_free(ctx->zcoeff_blk);

+      ctx->zcoeff_blk = 0;

+    }

+  }

+}

 VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {

   int i, j;

   volatile union {

@@ -1450,6 +1538,8 @@

   init_config((VP9_PTR)cpi, oxcf);

+  init_pick_mode_context(cpi);

   cm->current_video_frame   = 0;

   cpi->kf_overspend_bits            = 0;

   cpi->kf_bitrate_adjustment        = 0;

@@ -1913,6 +2003,7 @@

 #endif

+  free_pick_mode_context(&cpi->mb);

   dealloc_compressor_data(cpi);

   vpx_free(cpi->mb.ss);

   vpx_free(cpi->tok);

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -3587,7 +3587,7 @@

         best_mbmode = *mbmi;

         best_skip2 = this_skip2;

         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],

-                   sizeof(ctx->zcoeff_blk));

+                   sizeof(uint8_t) * ctx->num_4x4_blk);

         // TODO(debargha): enhance this test with a better distortion prediction

         // based on qp, activity mask and history

@@ -4327,7 +4327,7 @@

         best_mbmode = *mbmi;

         best_skip2 = this_skip2;

         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],

-                   sizeof(ctx->zcoeff_blk));

+                   sizeof(uint8_t) * ctx->num_4x4_blk);

         for (i = 0; i < 4; i++)

           best_bmodes[i] = xd->mi_8x8[0]->bmi[i];

--

⑨