shithub: libvpx

Download patch

ref: fe9b5143ba5192b5dc488bb3c86ffeea3dc481f0
parent: 9ba8aed179f796f90b80b44c1e4453dc2468a6cb
author: Deb Mukherjee <debargha@google.com>
date: Tue Mar 26 11:23:30 EDT 2013

Framework changes in nzc to allow more flexibility

The patch adds the flexibility to use standard EOB based coding
on smaller block sizes and nzc based coding on larger blocksizes.
The tx-sizes that use nzc based coding and those that use EOB based
coding are controlled by a function get_nzc_used().
By default, this function uses nzc based coding for 16x16 and 32x32
transform blocks, which seem to bridge the performance gap
substantially.

All sets are now lower by 0.5% to 0.7%, as opposed to ~1.8% before.

Change-Id: I06abed3df57b52d241ea1f51b0d571c71e38fd0b

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -672,4 +672,10 @@
   }
   return tx_size_uv;
 }
+
+#if CONFIG_CODE_NONZEROCOUNT
+static int get_nzc_used(TX_SIZE tx_size) {
+  return (tx_size >= TX_16X16);
+}
+#endif
 #endif  // VP9_COMMON_VP9_BLOCKD_H_
--- a/vp9/common/vp9_default_coef_probs.h
+++ b/vp9/common/vp9_default_coef_probs.h
@@ -790,10 +790,10 @@
     }
   }, {
     {
-      { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
+      { 19408, 31758, 16023, 10123, 6705, 2468, 369, 17, 10, 5 },
       { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 },
     }, {
-      { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
+      { 22408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
       { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 },
     }
   }
@@ -821,7 +821,7 @@
     }
   }, {
     {
-      { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
+      { 19408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
       { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 },
     }, {
       { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -2647,8 +2647,8 @@
     if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
       return 0;
     else
-    return get_nzc_4x4_uv_sb32(
-        &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block));
+      return get_nzc_4x4_uv_sb32(
+          &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block));
   } else {
     return get_nzc_4x4_uv_mb16(mi, block);
   }
@@ -3292,6 +3292,7 @@
                        int ref,
                        int type) {
   int e, c;
+  if (!get_nzc_used(tx_size)) return;
   c = codenzc(nzc);
   if (tx_size == TX_32X32)
     cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
@@ -3605,10 +3606,10 @@
 }
 
 #if CONFIG_CODE_NONZEROCOUNT
-static void adapt_nzc_probs(VP9_COMMON *cm,
-                            int block_size,
-                            int count_sat,
-                            int update_factor) {
+static void adapt_nzc_probs_common(VP9_COMMON *cm,
+                                   TX_SIZE tx_size,
+                                   int count_sat,
+                                   int update_factor) {
   int c, r, b, n;
   int count, factor;
   unsigned int nzc_branch_ct[NZC32X32_NODES][2];
@@ -3619,19 +3620,20 @@
   vp9_prob *pre_nzc_probs;
   unsigned int *nzc_counts;
 
-  if (block_size == 32) {
+  if (!get_nzc_used(tx_size)) return;
+  if (tx_size == TX_32X32) {
     tokens = NZC32X32_TOKENS;
     nzc_tree = vp9_nzc32x32_tree;
     dst_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
     pre_nzc_probs = cm->fc.pre_nzc_probs_32x32[0][0][0];
     nzc_counts = cm->fc.nzc_counts_32x32[0][0][0];
-  } else if (block_size == 16) {
+  } else if (tx_size == TX_16X16) {
     tokens = NZC16X16_TOKENS;
     nzc_tree = vp9_nzc16x16_tree;
     dst_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
     pre_nzc_probs = cm->fc.pre_nzc_probs_16x16[0][0][0];
     nzc_counts = cm->fc.nzc_counts_16x16[0][0][0];
-  } else if (block_size == 8) {
+  } else if (tx_size == TX_8X8) {
     tokens = NZC8X8_TOKENS;
     nzc_tree = vp9_nzc8x8_tree;
     dst_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
@@ -3668,6 +3670,9 @@
 static void adapt_nzc_pcat(VP9_COMMON *cm, int count_sat, int update_factor) {
   int c, t;
   int count, factor;
+  if (!(get_nzc_used(TX_4X4) || get_nzc_used(TX_8X8) ||
+        get_nzc_used(TX_16X16) || get_nzc_used(TX_32X32)))
+    return;
   for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
     for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
       int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
@@ -3717,10 +3722,10 @@
     count_sat = COEF_COUNT_SAT;
   }
 
-  adapt_nzc_probs(cm, 4, count_sat, update_factor);
-  adapt_nzc_probs(cm, 8, count_sat, update_factor);
-  adapt_nzc_probs(cm, 16, count_sat, update_factor);
-  adapt_nzc_probs(cm, 32, count_sat, update_factor);
+  adapt_nzc_probs_common(cm, TX_4X4, count_sat, update_factor);
+  adapt_nzc_probs_common(cm, TX_8X8, count_sat, update_factor);
+  adapt_nzc_probs_common(cm, TX_16X16, count_sat, update_factor);
+  adapt_nzc_probs_common(cm, TX_32X32, count_sat, update_factor);
   adapt_nzc_pcat(cm, count_sat, update_factor);
 }
 #endif  // CONFIG_CODE_NONZEROCOUNT
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -1157,6 +1157,7 @@
                          BOOL_DECODER* const bc) {
   int c, e;
   uint16_t nzc;
+  if (!get_nzc_used(tx_size)) return 0;
   if (tx_size == TX_32X32) {
     c = treed_read(bc, vp9_nzc32x32_tree,
                    cm->fc.nzc_probs_32x32[nzc_context][ref][type]);
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -151,12 +151,39 @@
   }
 }
 
+#if CONFIG_CODE_NONZEROCOUNT
+static void propagate_nzcs(VP9_COMMON *cm, MACROBLOCKD *xd) {
+  MODE_INFO *m = xd->mode_info_context;
+  BLOCK_SIZE_TYPE sb_type = m->mbmi.sb_type;
+  const int mis = cm->mode_info_stride;
+  int n;
+  if (sb_type == BLOCK_SIZE_SB64X64) {
+    for (n = 0; n < 16; ++n) {
+      int i = n >> 2;
+      int j = n & 3;
+      if (i == 0 && j == 0) continue;
+      vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
+                 384 * sizeof(m->mbmi.nzcs[0]));
+    }
+  } else if (sb_type == BLOCK_SIZE_SB32X32) {
+    for (n = 0; n < 4; ++n) {
+      int i = n >> 1;
+      int j = n & 1;
+      if (i == 0 && j == 0) continue;
+      vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
+                 384 * sizeof(m->mbmi.nzcs[0]));
+    }
+  }
+}
+#endif
+
 /* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
  *  to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
  */
 static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
                           int mb_row, int mb_col) {
-  BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+  MODE_INFO *m = xd->mode_info_context;
+  BLOCK_SIZE_TYPE sb_type = m->mbmi.sb_type;
 
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
     if (sb_type == BLOCK_SIZE_SB64X64) {
@@ -196,6 +223,10 @@
                                          mb_row, mb_col);
     }
   }
+#if CONFIG_CODE_NONZEROCOUNT
+  vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+  propagate_nzcs(&pbi->common, xd);
+#endif
 }
 
 static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -468,16 +499,16 @@
   MODE_INFO *mi = xd->mode_info_context;
   const int mis = pc->mode_info_stride;
 
-  assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64);
+  assert(mi->mbmi.sb_type == BLOCK_SIZE_SB64X64);
 
   if (pbi->common.frame_type != KEY_FRAME)
-    vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc);
+    vp9_setup_interp_filters(xd, mi->mbmi.interp_filter, pc);
 
   // re-initialize macroblock dequantizer before detokenization
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
 
-  if (xd->mode_info_context->mbmi.mb_skip_coeff) {
+  if (mi->mbmi.mb_skip_coeff) {
     vp9_reset_sb64_tokens_context(xd);
 
     /* Special case:  Force the loopfilter to skip when eobtotal and
@@ -632,6 +663,9 @@
       default: assert(0);
     }
   }
+#if CONFIG_CODE_NONZEROCOUNT
+  propagate_nzcs(&pbi->common, xd);
+#endif
 }
 
 static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -639,18 +673,19 @@
                                 BOOL_DECODER* const bc) {
   int n, eobtotal;
   VP9_COMMON *const pc = &pbi->common;
+  MODE_INFO *mi = xd->mode_info_context;
   const int mis = pc->mode_info_stride;
 
-  assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32);
+  assert(mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
 
   if (pbi->common.frame_type != KEY_FRAME)
-    vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc);
+    vp9_setup_interp_filters(xd, mi->mbmi.interp_filter, pc);
 
   // re-initialize macroblock dequantizer before detokenization
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
 
-  if (xd->mode_info_context->mbmi.mb_skip_coeff) {
+  if (mi->mbmi.mb_skip_coeff) {
     vp9_reset_sb_tokens_context(xd);
 
     /* Special case:  Force the loopfilter to skip when eobtotal and
@@ -661,7 +696,7 @@
   }
 
   /* do prediction */
-  if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+  if (mi->mbmi.ref_frame == INTRA_FRAME) {
     vp9_build_intra_predictors_sby_s(xd);
     vp9_build_intra_predictors_sbuv_s(xd);
   } else {
@@ -674,13 +709,13 @@
   /* dequantization and idct */
   eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
   if (eobtotal == 0) {  // skip loopfilter
-    xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+    mi->mbmi.mb_skip_coeff = 1;
     if (mb_col + 1 < pc->mb_cols)
-      xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
+      mi[1].mbmi.mb_skip_coeff = 1;
     if (mb_row + 1 < pc->mb_rows) {
-      xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
+      mi[mis].mbmi.mb_skip_coeff = 1;
       if (mb_col + 1 < pc->mb_cols)
-        xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
+        mi[mis + 1].mbmi.mb_skip_coeff = 1;
     }
   } else {
     switch (xd->mode_info_context->mbmi.txfm_size) {
@@ -793,6 +828,9 @@
       default: assert(0);
     }
   }
+#if CONFIG_CODE_NONZEROCOUNT
+  propagate_nzcs(&pbi->common, xd);
+#endif
 }
 
 static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -1130,23 +1168,24 @@
 #if CONFIG_CODE_NONZEROCOUNT
 static void read_nzc_probs_common(VP9_COMMON *cm,
                                   BOOL_DECODER* const bc,
-                                  int block_size) {
+                                  TX_SIZE tx_size) {
   int c, r, b, t;
   int tokens, nodes;
   vp9_prob *nzc_probs;
   vp9_prob upd;
 
+  if (!get_nzc_used(tx_size)) return;
   if (!vp9_read_bit(bc)) return;
 
-  if (block_size == 32) {
+  if (tx_size == TX_32X32) {
     tokens = NZC32X32_TOKENS;
     nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
     upd = NZC_UPDATE_PROB_32X32;
-  } else if (block_size == 16) {
+  } else if (tx_size == TX_16X16) {
     tokens = NZC16X16_TOKENS;
     nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
     upd = NZC_UPDATE_PROB_16X16;
-  } else if (block_size == 8) {
+  } else if (tx_size == TX_8X8) {
     tokens = NZC8X8_TOKENS;
     nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
     upd = NZC_UPDATE_PROB_8X8;
@@ -1175,6 +1214,9 @@
 static void read_nzc_pcat_probs(VP9_COMMON *cm, BOOL_DECODER* const bc) {
   int c, t, b;
   vp9_prob upd = NZC_UPDATE_PROB_PCAT;
+  if (!(get_nzc_used(TX_4X4) || get_nzc_used(TX_8X8) ||
+        get_nzc_used(TX_16X16) || get_nzc_used(TX_32X32)))
+    return;
   if (!vp9_read_bit(bc)) {
     return;
   }
@@ -1193,13 +1235,13 @@
 
 static void read_nzc_probs(VP9_COMMON *cm,
                            BOOL_DECODER* const bc) {
-  read_nzc_probs_common(cm, bc, 4);
+  read_nzc_probs_common(cm, bc, TX_4X4);
   if (cm->txfm_mode != ONLY_4X4)
-    read_nzc_probs_common(cm, bc, 8);
+    read_nzc_probs_common(cm, bc, TX_8X8);
   if (cm->txfm_mode > ALLOW_8X8)
-    read_nzc_probs_common(cm, bc, 16);
+    read_nzc_probs_common(cm, bc, TX_16X16);
   if (cm->txfm_mode > ALLOW_16X16)
-    read_nzc_probs_common(cm, bc, 32);
+    read_nzc_probs_common(cm, bc, TX_32X32);
 #ifdef NZC_PCAT_UPDATE
   read_nzc_pcat_probs(cm, bc);
 #endif
@@ -1208,7 +1250,7 @@
 
 static void read_coef_probs_common(BOOL_DECODER* const bc,
                                    vp9_coeff_probs *coef_probs,
-                                   int block_types) {
+                                   TX_SIZE tx_size) {
 #if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
   const int entropy_nodes_update = UNCONSTRAINED_UPDATE_NODES;
 #else
@@ -1218,13 +1260,19 @@
   int i, j, k, l, m;
 
   if (vp9_read_bit(bc)) {
-    for (i = 0; i < block_types; i++) {
+    for (i = 0; i < BLOCK_TYPES; i++) {
       for (j = 0; j < REF_TYPES; j++) {
         for (k = 0; k < COEF_BANDS; k++) {
           for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+#if CONFIG_CODE_NONZEROCOUNT
+            const int mstart = get_nzc_used(tx_size);
+#else
+            const int mstart = 0;
+#endif
             if (l >= 3 && k == 0)
               continue;
-            for (m = CONFIG_CODE_NONZEROCOUNT; m < entropy_nodes_update; m++) {
+
+            for (m = mstart; m < entropy_nodes_update; m++) {
               vp9_prob *const p = coef_probs[i][j][k][l] + m;
 
               if (vp9_read(bc, vp9_coef_update_prob[m])) {
@@ -1245,16 +1293,16 @@
 static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
   VP9_COMMON *const pc = &pbi->common;
 
-  read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES);
+  read_coef_probs_common(bc, pc->fc.coef_probs_4x4, TX_4X4);
 
   if (pbi->common.txfm_mode != ONLY_4X4)
-    read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES);
+    read_coef_probs_common(bc, pc->fc.coef_probs_8x8, TX_8X8);
 
   if (pbi->common.txfm_mode > ALLOW_8X8)
-    read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES);
+    read_coef_probs_common(bc, pc->fc.coef_probs_16x16, TX_16X16);
 
   if (pbi->common.txfm_mode > ALLOW_16X16)
-    read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES);
+    read_coef_probs_common(bc, pc->fc.coef_probs_32x32, TX_32X32);
 }
 
 static void update_frame_size(VP9D_COMP *pbi) {
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -112,8 +112,10 @@
   vp9_coeff_count *coef_counts;
   const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
 #if CONFIG_CODE_NONZEROCOUNT
+  const int nzc_used = get_nzc_used(txfm_size);
   uint16_t nzc = 0;
-  uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx];
+  uint16_t nzc_expected =
+      nzc_used ? xd->mode_info_context->mbmi.nzcs[block_idx] : 0;
 #endif
   const int *scan, *nb;
   uint8_t token_cache[1024];
@@ -244,24 +246,25 @@
     if (c >= seg_eob)
       break;
 #if CONFIG_CODE_NONZEROCOUNT
-    if (nzc == nzc_expected)
+    if (nzc_used && nzc == nzc_expected)
       break;
 #endif
     prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt];
-#if CONFIG_CODE_NONZEROCOUNT == 0
     fc->eob_branch_counts[txfm_size][type][ref]
                          [get_coef_band(scan, txfm_size, c)][pt]++;
-    if (!vp9_read(br, prob[EOB_CONTEXT_NODE]))
-      break;
+#if CONFIG_CODE_NONZEROCOUNT
+    if (!nzc_used)
 #endif
+      if (!vp9_read(br, prob[EOB_CONTEXT_NODE]))
+        break;
 SKIP_START:
     if (c >= seg_eob)
       break;
 #if CONFIG_CODE_NONZEROCOUNT
-    if (nzc == nzc_expected)
+    if (nzc_used && nzc == nzc_expected)
       break;
     // decode zero node only if there are zeros left
-    if (seg_eob - nzc_expected - c + nzc > 0)
+    if (!nzc_used || seg_eob - nzc_expected - c + nzc > 0)
 #endif
     if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) {
       INCREMENT_COUNT(ZERO_TOKEN);
@@ -329,10 +332,17 @@
     WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6);
   }
 
-#if CONFIG_CODE_NONZEROCOUNT == 0
-  if (c < seg_eob)
-    coef_counts[type][ref][get_coef_band(scan, txfm_size, c)]
-               [pt][DCT_EOB_TOKEN]++;
+#if CONFIG_CODE_NONZEROCOUNT
+  if (!nzc_used)
+#endif
+    if (c < seg_eob)
+      coef_counts[type][ref][get_coef_band(scan, txfm_size, c)]
+                 [pt][DCT_EOB_TOKEN]++;
+#if CONFIG_CODE_NONZEROCOUNT
+  if (!nzc_used)
+    xd->mode_info_context->mbmi.nzcs[block_idx] = nzc;
+  else
+    assert(nzc == nzc_expected);
 #endif
 
   A0[aidx] = L0[lidx] = c > 0;
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1087,7 +1087,7 @@
 }
 
 #if CONFIG_CODE_NONZEROCOUNT
-static void write_nzc(VP9_COMMON *const cm,
+static void write_nzc(VP9_COMP *const cpi,
                       uint16_t nzc,
                       int nzc_context,
                       TX_SIZE tx_size,
@@ -1094,7 +1094,11 @@
                       int ref,
                       int type,
                       vp9_writer* const bc) {
+  VP9_COMMON *const cm = &cpi->common;
   int c, e;
+  // if (!cpi->dummy_packing && cm->current_video_frame == 27)
+  //   printf("nzc: %d, tx_size: %d\n", nzc, tx_size);
+  if (!get_nzc_used(tx_size)) return;
   c = codenzc(nzc);
   if (tx_size == TX_32X32) {
     write_token(bc, vp9_nzc32x32_tree,
@@ -1152,11 +1156,11 @@
     case TX_32X32:
       for (j = 0; j < 256; j += 64) {
         nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
       }
       for (j = 256; j < 384; j += 64) {
         nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1, bc);
       }
       break;
 
@@ -1163,11 +1167,11 @@
     case TX_16X16:
       for (j = 0; j < 256; j += 16) {
         nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
       }
       for (j = 256; j < 384; j += 16) {
         nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
       }
       break;
 
@@ -1174,11 +1178,11 @@
     case TX_8X8:
       for (j = 0; j < 256; j += 4) {
         nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
       }
       for (j = 256; j < 384; j += 4) {
         nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
       }
       break;
 
@@ -1185,11 +1189,11 @@
     case TX_4X4:
       for (j = 0; j < 256; ++j) {
         nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
       }
       for (j = 256; j < 384; ++j) {
         nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
       }
       break;
 
@@ -1219,11 +1223,11 @@
     case TX_32X32:
       for (j = 0; j < 64; j += 64) {
         nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
       }
       for (j = 64; j < 96; j += 16) {
         nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
       }
       break;
 
@@ -1230,11 +1234,11 @@
     case TX_16X16:
       for (j = 0; j < 64; j += 16) {
         nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
       }
       for (j = 64; j < 96; j += 16) {
         nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
       }
       break;
 
@@ -1241,11 +1245,11 @@
     case TX_8X8:
       for (j = 0; j < 64; j += 4) {
         nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
       }
       for (j = 64; j < 96; j += 4) {
         nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
       }
       break;
 
@@ -1252,11 +1256,11 @@
     case TX_4X4:
       for (j = 0; j < 64; ++j) {
         nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
       }
       for (j = 64; j < 96; ++j) {
         nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
       }
       break;
 
@@ -1286,11 +1290,11 @@
     case TX_16X16:
       for (j = 0; j < 16; j += 16) {
         nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
       }
       for (j = 16; j < 24; j += 4) {
         nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
       }
       break;
 
@@ -1297,17 +1301,17 @@
     case TX_8X8:
       for (j = 0; j < 16; j += 4) {
         nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
       }
       if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
         for (j = 16; j < 24; ++j) {
           nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
-          write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+          write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
         }
       } else {
         for (j = 16; j < 24; j += 4) {
           nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
-          write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+          write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
         }
       }
       break;
@@ -1315,11 +1319,11 @@
     case TX_4X4:
       for (j = 0; j < 16; ++j) {
         nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
       }
       for (j = 16; j < 24; ++j) {
         nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
-        write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+        write_nzc(cpi, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
       }
       break;
 
@@ -1835,7 +1839,7 @@
 #if CONFIG_CODE_NONZEROCOUNT
 static void update_nzc_probs_common(VP9_COMP* cpi,
                                     vp9_writer* const bc,
-                                    int block_size) {
+                                    TX_SIZE tx_size) {
   VP9_COMMON *cm = &cpi->common;
   int c, r, b, t;
   int update[2] = {0, 0};
@@ -1848,7 +1852,8 @@
   unsigned int (*nzc_branch_ct)[2];
   vp9_prob upd;
 
-  if (block_size == 32) {
+  if (!get_nzc_used(tx_size)) return;
+  if (tx_size == TX_32X32) {
     tokens = NZC32X32_TOKENS;
     nzc_tree = vp9_nzc32x32_tree;
     old_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
@@ -1856,7 +1861,7 @@
     nzc_counts = cm->fc.nzc_counts_32x32[0][0][0];
     nzc_branch_ct = cpi->frame_nzc_branch_ct_32x32[0][0][0];
     upd = NZC_UPDATE_PROB_32X32;
-  } else if (block_size == 16) {
+  } else if (tx_size == TX_16X16) {
     tokens = NZC16X16_TOKENS;
     nzc_tree = vp9_nzc16x16_tree;
     old_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
@@ -1864,7 +1869,7 @@
     nzc_counts = cm->fc.nzc_counts_16x16[0][0][0];
     nzc_branch_ct = cpi->frame_nzc_branch_ct_16x16[0][0][0];
     upd = NZC_UPDATE_PROB_16X16;
-  } else if (block_size == 8) {
+  } else if (tx_size == TX_8X8) {
     tokens = NZC8X8_TOKENS;
     nzc_tree = vp9_nzc8x8_tree;
     old_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
@@ -1971,6 +1976,9 @@
   int update[2] = {0, 0};
   int savings = 0;
   vp9_prob upd = NZC_UPDATE_PROB_PCAT;
+  if (!(get_nzc_used(TX_4X4) || get_nzc_used(TX_8X8) ||
+        get_nzc_used(TX_16X16) || get_nzc_used(TX_32X32)))
+    return;
   for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
     for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
       int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
@@ -2037,13 +2045,13 @@
 
 static void update_nzc_probs(VP9_COMP* cpi,
                              vp9_writer* const bc) {
-  update_nzc_probs_common(cpi, bc, 4);
+  update_nzc_probs_common(cpi, bc, TX_4X4);
   if (cpi->common.txfm_mode != ONLY_4X4)
-    update_nzc_probs_common(cpi, bc, 8);
+    update_nzc_probs_common(cpi, bc, TX_8X8);
   if (cpi->common.txfm_mode > ALLOW_8X8)
-    update_nzc_probs_common(cpi, bc, 16);
+    update_nzc_probs_common(cpi, bc, TX_16X16);
   if (cpi->common.txfm_mode > ALLOW_16X16)
-    update_nzc_probs_common(cpi, bc, 32);
+    update_nzc_probs_common(cpi, bc, TX_32X32);
 #ifdef NZC_PCAT_UPDATE
   update_nzc_pcat_probs(cpi, bc);
 #endif
@@ -2062,7 +2070,7 @@
                                      vp9_coeff_probs *new_frame_coef_probs,
                                      vp9_coeff_probs *old_frame_coef_probs,
                                      vp9_coeff_stats *frame_branch_ct,
-                                     int block_types) {
+                                     TX_SIZE tx_size) {
   int i, j, k, l, t;
   int update[2] = {0, 0};
   int savings;
@@ -2073,14 +2081,19 @@
 #endif
   // vp9_prob bestupd = find_coef_update_prob(cpi);
 
+#if CONFIG_CODE_NONZEROCOUNT
+  const int tstart = get_nzc_used(tx_size);
+#else
+  const int tstart = 0;
+#endif
   /* dry run to see if there is any udpate at all needed */
   savings = 0;
-  for (i = 0; i < block_types; ++i) {
+  for (i = 0; i < BLOCK_TYPES; ++i) {
     for (j = 0; j < REF_TYPES; ++j) {
       for (k = 0; k < COEF_BANDS; ++k) {
         // int prev_coef_savings[ENTROPY_NODES] = {0};
         for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
-          for (t = CONFIG_CODE_NONZEROCOUNT; t < entropy_nodes_update; ++t) {
+          for (t = tstart; t < entropy_nodes_update; ++t) {
             vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
             const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
             const vp9_prob upd = vp9_coef_update_prob[t];
@@ -2128,13 +2141,13 @@
     return;
   }
   vp9_write_bit(bc, 1);
-  for (i = 0; i < block_types; ++i) {
+  for (i = 0; i < BLOCK_TYPES; ++i) {
     for (j = 0; j < REF_TYPES; ++j) {
       for (k = 0; k < COEF_BANDS; ++k) {
         // int prev_coef_savings[ENTROPY_NODES] = {0};
         for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
           // calc probs and branch cts for this frame only
-          for (t = CONFIG_CODE_NONZEROCOUNT; t < entropy_nodes_update; ++t) {
+          for (t = tstart; t < entropy_nodes_update; ++t) {
             vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
             vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
             const vp9_prob upd = vp9_coef_update_prob[t];
@@ -2198,7 +2211,7 @@
                            cpi->frame_coef_probs_4x4,
                            cpi->common.fc.coef_probs_4x4,
                            cpi->frame_branch_ct_4x4,
-                           BLOCK_TYPES);
+                           TX_4X4);
 
   /* do not do this if not even allowed */
   if (cpi->common.txfm_mode != ONLY_4X4) {
@@ -2210,7 +2223,7 @@
                              cpi->frame_coef_probs_8x8,
                              cpi->common.fc.coef_probs_8x8,
                              cpi->frame_branch_ct_8x8,
-                             BLOCK_TYPES);
+                             TX_8X8);
   }
 
   if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -2222,7 +2235,7 @@
                              cpi->frame_coef_probs_16x16,
                              cpi->common.fc.coef_probs_16x16,
                              cpi->frame_branch_ct_16x16,
-                             BLOCK_TYPES);
+                             TX_16X16);
   }
 
   if (cpi->common.txfm_mode > ALLOW_16X16) {
@@ -2234,7 +2247,7 @@
                              cpi->frame_coef_probs_32x32,
                              cpi->common.fc.coef_probs_32x32,
                              cpi->frame_branch_ct_32x32,
-                             BLOCK_TYPES);
+                             TX_32X32);
   }
 }
 
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1958,9 +1958,10 @@
 
 static void gather_nzcs_sb32(VP9_COMMON *const cm,
                              MACROBLOCKD *xd) {
-  int i, j;
   MODE_INFO *m = xd->mode_info_context;
   int mis = cm->mode_info_stride;
+  int i, j;
+
   vpx_memset(m->mbmi.nzcs, 0,
              384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
   switch (xd->mode_info_context->mbmi.txfm_size) {
@@ -2002,9 +2003,10 @@
 
 static void gather_nzcs_sb64(VP9_COMMON *const cm,
                              MACROBLOCKD *xd) {
-  int i, j;
   MODE_INFO *m = xd->mode_info_context;
   int mis = cm->mode_info_stride;
+  int i, j;
+
   vpx_memset(xd->mode_info_context->mbmi.nzcs, 0,
              384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
   switch (xd->mode_info_context->mbmi.txfm_size) {
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -573,6 +573,7 @@
   // The current implementation uses a suboptimal approach to account for
   // the nzc rates somewhat, but in reality the optimization approach needs
   // to change substantially.
+  const int nzc_used = get_nzc_used(tx_size);
   uint16_t nzc = xd->nzcs[ib];
   uint16_t nzc0, nzc1;
   uint16_t final_nzc = 0, final_nzc_exp;
@@ -649,7 +650,7 @@
   memset(best_index, 0, sizeof(best_index));
   /* Initialize the sentinel node of the trellis. */
 #if CONFIG_CODE_NONZEROCOUNT
-  tokens[eob][0].rate = nzc_cost[nzc];
+  tokens[eob][0].rate = nzc_used ? nzc_cost[nzc] : 0;
 #else
   tokens[eob][0].rate = 0;
 #endif
@@ -734,8 +735,10 @@
 #if CONFIG_CODE_NONZEROCOUNT
         // Account for rate drop because of the nzc change.
         // TODO(debargha): Find a better solution
-        rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1];
-        rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1];
+        if (nzc_used) {
+          rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1];
+          rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1];
+        }
 #endif
       } else {
         t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -149,10 +149,10 @@
 
 static void fill_token_costs(vp9_coeff_count *c,
                              vp9_coeff_probs *p,
-                             int block_type_counts) {
+                             TX_SIZE tx_size) {
   int i, j, k, l;
 
-  for (i = 0; i < block_type_counts; i++)
+  for (i = 0; i < BLOCK_TYPES; i++)
     for (j = 0; j < REF_TYPES; j++)
       for (k = 0; k < COEF_BANDS; k++)
         for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
@@ -163,26 +163,26 @@
 }
 
 #if CONFIG_CODE_NONZEROCOUNT
-static void fill_nzc_costs(VP9_COMP *cpi, int block_size) {
+static void fill_nzc_costs(VP9_COMP *cpi, TX_SIZE tx_size) {
   int nzc_context, r, b, nzc, values;
   int cost[16];
-  values = block_size * block_size + 1;
+  values = (16 << (2 * tx_size)) + 1;
 
   for (nzc_context = 0; nzc_context < MAX_NZC_CONTEXTS; ++nzc_context) {
     for (r = 0; r < REF_TYPES; ++r) {
       for (b = 0; b < BLOCK_TYPES; ++b) {
         unsigned int *nzc_costs;
-        if (block_size == 4) {
+        if (tx_size == TX_4X4) {
           vp9_cost_tokens(cost,
                           cpi->common.fc.nzc_probs_4x4[nzc_context][r][b],
                           vp9_nzc4x4_tree);
           nzc_costs = cpi->mb.nzc_costs_4x4[nzc_context][r][b];
-        } else if (block_size == 8) {
+        } else if (tx_size == TX_8X8) {
           vp9_cost_tokens(cost,
                           cpi->common.fc.nzc_probs_8x8[nzc_context][r][b],
                           vp9_nzc8x8_tree);
           nzc_costs = cpi->mb.nzc_costs_8x8[nzc_context][r][b];
-        } else if (block_size == 16) {
+        } else if (tx_size == TX_16X16) {
           vp9_cost_tokens(cost,
                           cpi->common.fc.nzc_probs_16x16[nzc_context][r][b],
                           vp9_nzc16x16_tree);
@@ -308,18 +308,18 @@
   }
 
   fill_token_costs(cpi->mb.token_costs[TX_4X4],
-                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES);
+                   cpi->common.fc.coef_probs_4x4, TX_4X4);
   fill_token_costs(cpi->mb.token_costs[TX_8X8],
-                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES);
+                   cpi->common.fc.coef_probs_8x8, TX_8X8);
   fill_token_costs(cpi->mb.token_costs[TX_16X16],
-                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
+                   cpi->common.fc.coef_probs_16x16, TX_16X16);
   fill_token_costs(cpi->mb.token_costs[TX_32X32],
-                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES);
+                   cpi->common.fc.coef_probs_32x32, TX_32X32);
 #if CONFIG_CODE_NONZEROCOUNT
-  fill_nzc_costs(cpi, 4);
-  fill_nzc_costs(cpi, 8);
-  fill_nzc_costs(cpi, 16);
-  fill_nzc_costs(cpi, 32);
+  fill_nzc_costs(cpi, TX_4X4);
+  fill_nzc_costs(cpi, TX_8X8);
+  fill_nzc_costs(cpi, TX_16X16);
+  fill_nzc_costs(cpi, TX_32X32);
 #endif
 
   /*rough estimate for costing*/
@@ -449,13 +449,13 @@
       sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
 
 #if CONFIG_CODE_NONZEROCOUNT
+  const int nzc_used = get_nzc_used(tx_size);
   int nzc_context = vp9_get_nzc_context(cm, xd, ib);
   unsigned int *nzc_cost;
-#else
+#endif
   const int segment_id = xd->mode_info_context->mbmi.segment_id;
   vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                         [ENTROPY_NODES];
-#endif
   int seg_eob, default_eob;
   uint8_t token_cache[1024];
 
@@ -475,9 +475,8 @@
       l_ec = *l;
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
-#else
-      coef_probs = cm->fc.coef_probs_4x4;
 #endif
+      coef_probs = cm->fc.coef_probs_4x4;
       seg_eob = 16;
       if (tx_type == ADST_DCT) {
         scan = vp9_row_scan_4x4;
@@ -504,9 +503,8 @@
       }
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
-#else
-      coef_probs = cm->fc.coef_probs_8x8;
 #endif
+      coef_probs = cm->fc.coef_probs_8x8;
       seg_eob = 64;
       break;
     }
@@ -524,9 +522,8 @@
       }
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
-#else
-      coef_probs = cm->fc.coef_probs_16x16;
 #endif
+      coef_probs = cm->fc.coef_probs_16x16;
       seg_eob = 256;
       if (type == PLANE_TYPE_UV) {
         a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
@@ -541,9 +538,8 @@
       scan = vp9_default_zig_zag1d_32x32;
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
-#else
-      coef_probs = cm->fc.coef_probs_32x32;
 #endif
+      coef_probs = cm->fc.coef_probs_32x32;
       seg_eob = 1024;
       if (type == PLANE_TYPE_UV) {
         ENTROPY_CONTEXT *a2, *a3, *l2, *l3;
@@ -571,10 +567,11 @@
   nb = vp9_get_coef_neighbors_handle(scan, &pad);
   default_eob = seg_eob;
 
-#if CONFIG_CODE_NONZEROCOUNT == 0
-  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
-    seg_eob = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+  if (!nzc_used)
 #endif
+    if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
+      seg_eob = 0;
 
   {
 #if CONFIG_CODE_NONZEROCOUNT
@@ -598,13 +595,14 @@
       pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
     }
 #if CONFIG_CODE_NONZEROCOUNT
-    cost += nzc_cost[nzc];
-#else
-    if (c < seg_eob)
-      cost += mb->token_costs[tx_size][type][ref]
-                             [get_coef_band(scan, tx_size, c)]
-                             [pt][DCT_EOB_TOKEN];
+    if (nzc_used)
+      cost += nzc_cost[nzc];
+    else
 #endif
+      if (c < seg_eob)
+        cost += mb->token_costs[tx_size][type][ref]
+                               [get_coef_band(scan, tx_size, c)]
+                               [pt][DCT_EOB_TOKEN];
   }
 
   // is eob first coefficient;
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -134,7 +134,8 @@
   ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
   uint8_t token_cache[1024];
 #if CONFIG_CODE_NONZEROCOUNT
-  int zerosleft, nzc = 0;
+  const int nzc_used = get_nzc_used(tx_size);
+  int zerosleft = 0, nzc = 0;
   if (eob == 0)
     assert(xd->nzcs[ib] == 0);
 #endif
@@ -255,7 +256,8 @@
     int token;
     int v = 0;
 #if CONFIG_CODE_NONZEROCOUNT
-    zerosleft = seg_eob - xd->nzcs[ib] - c + nzc;
+    if (nzc_used)
+      zerosleft = seg_eob - xd->nzcs[ib] - c + nzc;
 #endif
     if (c < eob) {
       const int rc = scan[c];
@@ -266,10 +268,11 @@
       token    = vp9_dct_value_tokens_ptr[v].Token;
     } else {
 #if CONFIG_CODE_NONZEROCOUNT
-      break;
-#else
-      token = DCT_EOB_TOKEN;
+      if (nzc_used)
+        break;
+      else
 #endif
+        token = DCT_EOB_TOKEN;
     }
 
     t->Token = token;
@@ -276,10 +279,11 @@
     t->context_tree = probs[type][ref][band][pt];
 #if CONFIG_CODE_NONZEROCOUNT
     // Skip zero node if there are no zeros left
-    t->skip_eob_node = 1 + (zerosleft == 0);
-#else
-    t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0);
+    if (nzc_used)
+      t->skip_eob_node = 1 + (zerosleft == 0);
+    else
 #endif
+      t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0);
     assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
     if (!dry_run) {
       ++counts[type][ref][band][pt][token];
@@ -975,14 +979,15 @@
                     int dry_run) {
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
   const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
-#if CONFIG_CODE_NONZEROCOUNT == 0
   vp9_coeff_count *counts;
   vp9_coeff_probs *probs;
   int pt, band;
   TOKENEXTRA *t = *tp;
   const int ref = mbmi->ref_frame != INTRA_FRAME;
-#endif
   ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+#if CONFIG_CODE_NONZEROCOUNT
+  const int nzc_used = get_nzc_used(tx_size);
+#endif
 
   if (sb_type == BLOCK_SIZE_SB32X32) {
     a = (ENTROPY_CONTEXT *)xd->above_context +
@@ -1011,18 +1016,14 @@
     case TX_4X4:
       a_ec = a[0];
       l_ec = l[0];
-#if CONFIG_CODE_NONZEROCOUNT == 0
       counts = cpi->coef_counts_4x4;
       probs = cpi->common.fc.coef_probs_4x4;
-#endif
       break;
     case TX_8X8:
       a_ec = (a[0] + a[1]) != 0;
       l_ec = (l[0] + l[1]) != 0;
-#if CONFIG_CODE_NONZEROCOUNT == 0
       counts = cpi->coef_counts_8x8;
       probs = cpi->common.fc.coef_probs_8x8;
-#endif
       break;
     case TX_16X16:
       if (type != PLANE_TYPE_UV) {
@@ -1032,10 +1033,8 @@
         a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
         l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
       }
-#if CONFIG_CODE_NONZEROCOUNT == 0
       counts = cpi->coef_counts_16x16;
       probs = cpi->common.fc.coef_probs_16x16;
-#endif
       break;
     case TX_32X32:
       if (type != PLANE_TYPE_UV) {
@@ -1049,26 +1048,28 @@
         l_ec = (l[0] + l[1] + l1[0] + l1[1] +
                 l2[0] + l2[1] + l3[0] + l3[1]) != 0;
       }
-#if CONFIG_CODE_NONZEROCOUNT == 0
       counts = cpi->coef_counts_32x32;
       probs = cpi->common.fc.coef_probs_32x32;
-#endif
       break;
   }
 
-#if CONFIG_CODE_NONZEROCOUNT == 0
-  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-  band = 0;
-  t->Token = DCT_EOB_TOKEN;
-  t->context_tree = probs[type][ref][band][pt];
-  t->skip_eob_node = 0;
-  ++t;
-  *tp = t;
-  if (!dry_run) {
-    ++counts[type][ref][band][pt][DCT_EOB_TOKEN];
+#if CONFIG_CODE_NONZEROCOUNT
+  if (!nzc_used) {
+#endif
+    VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
+    band = 0;
+    t->Token = DCT_EOB_TOKEN;
+    t->context_tree = probs[type][ref][band][pt];
+    t->skip_eob_node = 0;
+    ++t;
+    *tp = t;
+    if (!dry_run) {
+      ++counts[type][ref][band][pt][DCT_EOB_TOKEN];
+    }
+#if CONFIG_CODE_NONZEROCOUNT
   }
 #endif
-  *a = *l = 0;
+    *a = *l = 0;
   if (tx_size == TX_8X8) {
     a[1] = 0;
     l[1] = 0;
--