shithub: libvpx

Download patch

ref: 21401942b03d4859a957f56faa5dc161c5950a2d
parent: 869a39ba60379b031573ed5ba1911088d353a3c1
author: Deb Mukherjee <debargha@google.com>
date: Thu Jun 6 07:14:04 EDT 2013

Coding tx-size selection by use of spatial context

Adds coding of transform size within a frame by use of context
of transform sizes selected in left and above blocks.

Also incorporates code for generating stats.

TODO: generate and incorporate new default stats

Change-Id: I6a7af099f6ad61d448521d9a51167aedaf638ed6

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -124,12 +124,10 @@
 
 #define TX_SIZE_PROBS  6  // (TX_SIZE_MAX_SB * (TX_SIZE_MAX_SB - 1) / 2)
 
-#if TX_SIZE_PROBS == 6
-#define get_tx_probs_offset(b) ((b) < BLOCK_SIZE_MB16X16 ? 0 : \
-                                (b) < BLOCK_SIZE_SB32X32 ? 1 : 3)
-#else
-#define get_tx_probs_offset(b) 0
-#endif
+#define get_tx_probs(c, b) ((b) < BLOCK_SIZE_MB16X16 ? \
+                            (c)->fc.tx_probs_8x8p :    \
+                            (b) < BLOCK_SIZE_SB32X32 ? \
+                            (c)->fc.tx_probs_16x16p : (c)->fc.tx_probs_32x32p)
 
 /* For keyframes, intra block modes are predicted by the (already decoded)
    modes for the Y blocks to the left and above us; for interframes, there
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -149,54 +149,53 @@
   { 235, 248 },
 };
 
-void tx_counts_to_branch_counts(unsigned int *tx_count_32x32p,
-                                unsigned int *tx_count_16x16p,
-                                unsigned int *tx_count_8x8p,
-                                unsigned int (*ct)[2]) {
-#if TX_SIZE_PROBS == 6
-  ct[0][0] = tx_count_8x8p[TX_4X4];
-  ct[0][1] = tx_count_8x8p[TX_8X8];
-  ct[1][0] = tx_count_16x16p[TX_4X4];
-  ct[1][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16];
-  ct[2][0] = tx_count_16x16p[TX_8X8];
-  ct[2][1] = tx_count_16x16p[TX_16X16];
-  ct[3][0] = tx_count_32x32p[TX_4X4];
-  ct[3][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] +
-             tx_count_32x32p[TX_32X32];
-  ct[4][0] = tx_count_32x32p[TX_8X8];
-  ct[4][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32];
-  ct[5][0] = tx_count_32x32p[TX_16X16];
-  ct[5][1] = tx_count_32x32p[TX_32X32];
-#else
-  ct[0][0] = tx_count_32x32p[TX_4X4] +
-             tx_count_16x16p[TX_4X4] +
-             tx_count_8x8p[TX_4X4];
-  ct[0][1] = tx_count_32x32p[TX_8X8] +
-             tx_count_32x32p[TX_16X16] +
-             tx_count_32x32p[TX_32X32] +
-             tx_count_16x16p[TX_8X8] +
-             tx_count_16x16p[TX_16X16] +
-             tx_count_8x8p[TX_8X8];
-  ct[1][0] = tx_count_32x32p[TX_8X8] +
-             tx_count_16x16p[TX_8X8];
-  ct[1][1] = tx_count_32x32p[TX_16X16] +
-             tx_count_32x32p[TX_32X32] +
-             tx_count_16x16p[TX_16X16];
-  ct[2][0] = tx_count_32x32p[TX_16X16];
-  ct[2][1] = tx_count_32x32p[TX_32X32];
-#endif
-}
-
-#if TX_SIZE_PROBS == 6
-const vp9_prob vp9_default_tx_probs[TX_SIZE_PROBS] = {
-  96, 96, 96, 96, 96, 96
+const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_MAX_SB]
+                                          [TX_SIZE_MAX_SB - 1] = {
+  { 16, 64, 96, },
+  { 32, 64, 96, },
+  { 32, 64, 96, },
+  { 32, 64, 96, },
 };
-#else
-const vp9_prob vp9_default_tx_probs[TX_SIZE_PROBS] = {
-  96, 96, 96
+const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_MAX_SB - 1]
+                                          [TX_SIZE_MAX_SB - 2] = {
+  { 32, 96, },
+  { 64, 96, },
+  { 64, 96, },
 };
-#endif
+const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_MAX_SB - 2]
+                                        [TX_SIZE_MAX_SB - 3] = {
+  { 96, },
+  { 96, },
+};
 
+void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
+                                      unsigned int (*ct_32x32p)[2]) {
+  ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
+  ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] +
+                    tx_count_32x32p[TX_16X16] +
+                    tx_count_32x32p[TX_32X32];
+  ct_32x32p[1][0] = tx_count_32x32p[TX_8X8];
+  ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] +
+                    tx_count_32x32p[TX_32X32];
+  ct_32x32p[2][0] = tx_count_32x32p[TX_16X16];
+  ct_32x32p[2][1] = tx_count_32x32p[TX_32X32];
+}
+
+void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
+                                      unsigned int (*ct_16x16p)[2]) {
+  ct_16x16p[0][0] = tx_count_16x16p[TX_4X4];
+  ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] +
+                    tx_count_16x16p[TX_16X16];
+  ct_16x16p[1][0] = tx_count_16x16p[TX_8X8];
+  ct_16x16p[1][1] = tx_count_16x16p[TX_16X16];
+}
+
+void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p,
+                                    unsigned int (*ct_8x8p)[2]) {
+  ct_8x8p[0][0] =   tx_count_8x8p[TX_4X4];
+  ct_8x8p[0][1] =   tx_count_8x8p[TX_8X8];
+}
+
 const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = {
   192, 128, 64
 };
@@ -223,8 +222,12 @@
              sizeof(default_comp_ref_p));
   vpx_memcpy(x->fc.single_ref_prob, default_single_ref_p,
              sizeof(default_single_ref_p));
-  vpx_memcpy(x->fc.tx_probs, vp9_default_tx_probs,
-             sizeof(vp9_default_tx_probs));
+  vpx_memcpy(x->fc.tx_probs_32x32p, vp9_default_tx_probs_32x32p,
+             sizeof(vp9_default_tx_probs_32x32p));
+  vpx_memcpy(x->fc.tx_probs_16x16p, vp9_default_tx_probs_16x16p,
+             sizeof(vp9_default_tx_probs_16x16p));
+  vpx_memcpy(x->fc.tx_probs_8x8p, vp9_default_tx_probs_8x8p,
+             sizeof(vp9_default_tx_probs_8x8p));
   vpx_memcpy(x->fc.mbskip_probs, vp9_default_mbskip_probs,
              sizeof(vp9_default_mbskip_probs));
 }
@@ -431,17 +434,51 @@
     }
   }
   if (cm->txfm_mode == TX_MODE_SELECT) {
-    unsigned int branch_ct[TX_SIZE_PROBS][2];
-    tx_counts_to_branch_counts(cm->fc.tx_count_32x32p,
-                               cm->fc.tx_count_16x16p,
-                               cm->fc.tx_count_8x8p, branch_ct);
-    for (i = 0; i < TX_SIZE_PROBS; ++i) {
-      int factor;
-      int count = branch_ct[i][0] + branch_ct[i][1];
-      vp9_prob prob = get_binary_prob(branch_ct[i][0], branch_ct[i][1]);
-      count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
-      factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
-      cm->fc.tx_probs[i] = weighted_prob(cm->fc.pre_tx_probs[i], prob, factor);
+    int j;
+    unsigned int branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2];
+    unsigned int branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2];
+    unsigned int branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2];
+    for (i = 0; i < TX_SIZE_MAX_SB - 2; ++i) {
+      tx_counts_to_branch_counts_8x8(cm->fc.tx_count_8x8p[i],
+                                     branch_ct_8x8p);
+      for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) {
+        int factor;
+        int count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1];
+        vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0],
+                                        branch_ct_8x8p[j][1]);
+        count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+        factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+        cm->fc.tx_probs_8x8p[i][j] = weighted_prob(
+            cm->fc.pre_tx_probs_8x8p[i][j], prob, factor);
+      }
+    }
+    for (i = 0; i < TX_SIZE_MAX_SB - 1; ++i) {
+      tx_counts_to_branch_counts_16x16(cm->fc.tx_count_16x16p[i],
+                                       branch_ct_16x16p);
+      for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) {
+        int factor;
+        int count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1];
+        vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0],
+                                        branch_ct_16x16p[j][1]);
+        count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+        factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+        cm->fc.tx_probs_16x16p[i][j] = weighted_prob(
+            cm->fc.pre_tx_probs_16x16p[i][j], prob, factor);
+      }
+    }
+    for (i = 0; i < TX_SIZE_MAX_SB; ++i) {
+      tx_counts_to_branch_counts_32x32(cm->fc.tx_count_32x32p[i],
+                                       branch_ct_32x32p);
+      for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) {
+        int factor;
+        int count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1];
+        vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0],
+                                        branch_ct_32x32p[j][1]);
+        count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
+        factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
+        cm->fc.tx_probs_32x32p[i][j] = weighted_prob(
+            cm->fc.pre_tx_probs_32x32p[i][j], prob, factor);
+      }
     }
   }
   for (i = 0; i < MBSKIP_CONTEXTS; ++i)
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -16,6 +16,8 @@
 
 #define SUBMVREF_COUNT 5
 
+// #define MODE_STATS
+
 extern int vp9_mv_cont(const int_mv *l, const int_mv *a);
 
 
@@ -75,11 +77,17 @@
 extern const  vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
                                                  [VP9_SWITCHABLE_FILTERS - 1];
 
-extern const vp9_prob vp9_default_tx_probs[TX_SIZE_PROBS];
+extern const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_MAX_SB]
+                                                 [TX_SIZE_MAX_SB - 1];
+extern const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_MAX_SB - 1]
+                                                 [TX_SIZE_MAX_SB - 2];
+extern const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_MAX_SB - 2]
+                                               [TX_SIZE_MAX_SB - 3];
 
-extern void tx_counts_to_branch_counts(unsigned int *tx_count_32x32p,
-                                       unsigned int *tx_count_16x16p,
-                                       unsigned int *tx_count_8x8p,
-                                       unsigned int (*ct)[2]);
-
+extern void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
+                                             unsigned int (*ct_32x32p)[2]);
+extern void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
+                                             unsigned int (*ct_16x16p)[2]);
+extern void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p,
+                                           unsigned int (*ct_8x8p)[2]);
 #endif  // VP9_COMMON_VP9_ENTROPYMODE_H_
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -89,11 +89,16 @@
   unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2];
   unsigned int single_ref_count[REF_CONTEXTS][2][2];
   unsigned int comp_ref_count[REF_CONTEXTS][2];
-  vp9_prob tx_probs[TX_SIZE_PROBS];
-  vp9_prob pre_tx_probs[TX_SIZE_PROBS];
-  unsigned int tx_count_32x32p[TX_SIZE_MAX_SB];
-  unsigned int tx_count_16x16p[TX_SIZE_MAX_SB - 1];
-  unsigned int tx_count_8x8p[TX_SIZE_MAX_SB - 2];
+
+  vp9_prob tx_probs_32x32p[TX_SIZE_MAX_SB][TX_SIZE_MAX_SB - 1];
+  vp9_prob tx_probs_16x16p[TX_SIZE_MAX_SB - 1][TX_SIZE_MAX_SB - 2];
+  vp9_prob tx_probs_8x8p[TX_SIZE_MAX_SB - 2][TX_SIZE_MAX_SB - 3];
+  vp9_prob pre_tx_probs_32x32p[TX_SIZE_MAX_SB][TX_SIZE_MAX_SB - 1];
+  vp9_prob pre_tx_probs_16x16p[TX_SIZE_MAX_SB - 1][TX_SIZE_MAX_SB - 2];
+  vp9_prob pre_tx_probs_8x8p[TX_SIZE_MAX_SB - 2][TX_SIZE_MAX_SB - 3];
+  unsigned int tx_count_32x32p[TX_SIZE_MAX_SB][TX_SIZE_MAX_SB];
+  unsigned int tx_count_16x16p[TX_SIZE_MAX_SB - 1][TX_SIZE_MAX_SB - 1];
+  unsigned int tx_count_8x8p[TX_SIZE_MAX_SB - 2][TX_SIZE_MAX_SB - 2];
 
   vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
   vp9_prob pre_mbskip_probs[MBSKIP_CONTEXTS];
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -348,6 +348,37 @@
       break;
     }
 
+    case PRED_TX_SIZE: {
+      int above_context = TX_16X16, left_context = TX_16X16;
+      int max_tx_size;
+      if (mi->mbmi.sb_type < BLOCK_SIZE_SB8X8)
+        max_tx_size = TX_4X4;
+      else if (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
+        max_tx_size = TX_8X8;
+      else if (mi->mbmi.sb_type < BLOCK_SIZE_SB32X32)
+        max_tx_size = TX_16X16;
+      else
+        max_tx_size = TX_32X32;
+      if (xd->up_available) {
+        above_context = (above_mi->mbmi.mb_skip_coeff ?
+                         max_tx_size : above_mi->mbmi.txfm_size);
+      }
+      if (xd->left_available) {
+        left_context = (left_mi->mbmi.mb_skip_coeff ?
+                        max_tx_size : left_mi->mbmi.txfm_size);
+      }
+      if (!xd->left_available) {
+        left_context = above_context;
+      }
+      if (!xd->up_available) {
+        above_context = left_context;
+      }
+      pred_context = (above_context + left_context + 1) >> 1;
+      if (pred_context > max_tx_size)
+        pred_context = max_tx_size;
+      break;
+    }
+
     default:
       assert(0);
       pred_context = 0;  // *** add error trap code.
@@ -390,11 +421,21 @@
 const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
                                    const MACROBLOCKD *const xd,
                                    PRED_ID pred_id) {
+  const MODE_INFO *const mi = xd->mode_info_context;
   const int pred_context = vp9_get_pred_context(cm, xd, pred_id);
 
   switch (pred_id) {
     case PRED_SWITCHABLE_INTERP:
       return &cm->fc.switchable_interp_prob[pred_context][0];
+
+    case PRED_TX_SIZE:
+      if (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
+        return cm->fc.tx_probs_8x8p[pred_context];
+      else if (mi->mbmi.sb_type < BLOCK_SIZE_SB32X32)
+        return cm->fc.tx_probs_16x16p[pred_context];
+      else
+        return cm->fc.tx_probs_32x32p[pred_context];
+
     default:
       assert(0);
       return NULL;  // *** add error trap code.
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -24,6 +24,7 @@
   PRED_SINGLE_REF_P1 = 5,
   PRED_SINGLE_REF_P2 = 6,
   PRED_COMP_REF_P = 7,
+  PRED_TX_SIZE = 8
 } PRED_ID;
 
 unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -63,21 +63,22 @@
   }
 }
 
-static TX_SIZE select_txfm_size(VP9_COMMON *cm, vp9_reader *r,
-                                BLOCK_SIZE_TYPE bsize) {
-  int tx_probs_offset = get_tx_probs_offset(bsize);
-  TX_SIZE txfm_size = vp9_read(r, cm->fc.tx_probs[tx_probs_offset]);
+static TX_SIZE select_txfm_size(VP9_COMMON *cm, MACROBLOCKD *xd,
+                                vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
+  const int context = vp9_get_pred_context(cm, xd, PRED_TX_SIZE);
+  const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE);
+  TX_SIZE txfm_size = vp9_read(r, tx_probs[0]);
   if (txfm_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) {
-    txfm_size += vp9_read(r, cm->fc.tx_probs[tx_probs_offset + 1]);
+    txfm_size += vp9_read(r, tx_probs[1]);
     if (txfm_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32)
-      txfm_size += vp9_read(r, cm->fc.tx_probs[tx_probs_offset + 2]);
+      txfm_size += vp9_read(r, tx_probs[2]);
   }
   if (bsize >= BLOCK_SIZE_SB32X32) {
-    cm->fc.tx_count_32x32p[txfm_size]++;
+    cm->fc.tx_count_32x32p[context][txfm_size]++;
   } else if (bsize >= BLOCK_SIZE_MB16X16) {
-    cm->fc.tx_count_16x16p[txfm_size]++;
+    cm->fc.tx_count_16x16p[context][txfm_size]++;
   } else {
-    cm->fc.tx_count_8x8p[txfm_size]++;
+    cm->fc.tx_count_8x8p[context][txfm_size]++;
   }
   return txfm_size;
 }
@@ -107,7 +108,7 @@
 
   if (cm->txfm_mode == TX_MODE_SELECT &&
       m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
-    m->mbmi.txfm_size = select_txfm_size(cm, r,  m->mbmi.sb_type);
+    m->mbmi.txfm_size = select_txfm_size(cm, xd, r, m->mbmi.sb_type);
   } else if (cm->txfm_mode >= ALLOW_32X32 &&
              m->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
     m->mbmi.txfm_size = TX_32X32;
@@ -544,7 +545,7 @@
   if (cm->txfm_mode == TX_MODE_SELECT &&
       (mbmi->mb_skip_coeff == 0 || mbmi->ref_frame[0] == INTRA_FRAME) &&
       bsize >= BLOCK_SIZE_SB8X8) {
-    mbmi->txfm_size = select_txfm_size(cm, r, bsize);
+    mbmi->txfm_size = select_txfm_size(cm, xd, r, bsize);
   } else if (bsize >= BLOCK_SIZE_SB32X32 &&
              cm->txfm_mode >= ALLOW_32X32) {
     mbmi->txfm_size = TX_32X32;
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -58,15 +58,35 @@
     if (pc->txfm_mode == ALLOW_32X32)
       pc->txfm_mode += vp9_read_bit(r);
     if (pc->txfm_mode == TX_MODE_SELECT) {
-      int i;
-      for (i = 0; i < TX_SIZE_PROBS; ++i) {
-        if (vp9_read(r, VP9_DEF_UPDATE_PROB))
-           pc->fc.tx_probs[i] =
-               vp9_read_prob_diff_update(r, pc->fc.tx_probs[i]);
+      int i, j;
+      for (i = 0; i < TX_SIZE_MAX_SB - 2; ++i) {
+        for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) {
+          if (vp9_read(r, VP9_DEF_UPDATE_PROB))
+            pc->fc.tx_probs_8x8p[i][j] =
+                vp9_read_prob_diff_update(r, pc->fc.tx_probs_8x8p[i][j]);
+        }
       }
+      for (i = 0; i < TX_SIZE_MAX_SB - 1; ++i) {
+        for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) {
+          if (vp9_read(r, VP9_DEF_UPDATE_PROB))
+            pc->fc.tx_probs_16x16p[i][j] =
+                vp9_read_prob_diff_update(r, pc->fc.tx_probs_16x16p[i][j]);
+        }
+      }
+      for (i = 0; i < TX_SIZE_MAX_SB; ++i) {
+        for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) {
+          if (vp9_read(r, VP9_DEF_UPDATE_PROB))
+            pc->fc.tx_probs_32x32p[i][j] =
+                vp9_read_prob_diff_update(r, pc->fc.tx_probs_32x32p[i][j]);
+        }
+      }
     } else {
-      vpx_memcpy(pc->fc.tx_probs, vp9_default_tx_probs,
-                 sizeof(vp9_default_tx_probs));
+      vpx_memcpy(pc->fc.tx_probs_8x8p, vp9_default_tx_probs_8x8p,
+                 sizeof(vp9_default_tx_probs_8x8p));
+      vpx_memcpy(pc->fc.tx_probs_16x16p, vp9_default_tx_probs_16x16p,
+                 sizeof(vp9_default_tx_probs_16x16p));
+      vpx_memcpy(pc->fc.tx_probs_32x32p, vp9_default_tx_probs_32x32p,
+                 sizeof(vp9_default_tx_probs_32x32p));
     }
   }
 }
@@ -793,7 +813,9 @@
   fc->pre_nmvc = fc->nmvc;
   vp9_copy(fc->pre_switchable_interp_prob, fc->switchable_interp_prob);
   vp9_copy(fc->pre_inter_mode_probs, fc->inter_mode_probs);
-  vp9_copy(fc->pre_tx_probs, fc->tx_probs);
+  vp9_copy(fc->pre_tx_probs_8x8p, fc->tx_probs_8x8p);
+  vp9_copy(fc->pre_tx_probs_16x16p, fc->tx_probs_16x16p);
+  vp9_copy(fc->pre_tx_probs_32x32p, fc->tx_probs_32x32p);
   vp9_copy(fc->pre_mbskip_probs, fc->mbskip_probs);
 
   vp9_zero(fc->coef_counts);
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -51,6 +51,74 @@
 #define vp9_cost_upd  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)) >> 8)
 #define vp9_cost_upd256  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
 
+#ifdef MODE_STATS
+int64_t tx_count_32x32p_stats[TX_SIZE_MAX_SB][TX_SIZE_MAX_SB];
+int64_t tx_count_16x16p_stats[TX_SIZE_MAX_SB - 1][TX_SIZE_MAX_SB - 1];
+int64_t tx_count_8x8p_stats[TX_SIZE_MAX_SB - 2][TX_SIZE_MAX_SB - 2];
+
+void init_tx_count_stats() {
+  vp9_zero(tx_count_32x32p_stats);
+  vp9_zero(tx_count_16x16p_stats);
+  vp9_zero(tx_count_8x8p_stats);
+}
+
+static void update_tx_count_stats(VP9_COMMON *cm) {
+  int i, j;
+  for (i = 0; i < TX_SIZE_MAX_SB; i++) {
+    for (j = 0; j < TX_SIZE_MAX_SB; j++) {
+      tx_count_32x32p_stats[i][j] += cm->fc.tx_count_32x32p[i][j];
+    }
+  }
+  for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) {
+    for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) {
+      tx_count_16x16p_stats[i][j] += cm->fc.tx_count_16x16p[i][j];
+    }
+  }
+  for (i = 0; i < TX_SIZE_MAX_SB - 2; i++) {
+    for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) {
+      tx_count_8x8p_stats[i][j] += cm->fc.tx_count_8x8p[i][j];
+    }
+  }
+}
+
+void write_tx_count_stats() {
+  int i, j;
+  FILE *fp = fopen("tx_count.bin", "wb");
+  fwrite(tx_count_32x32p_stats, sizeof(tx_count_32x32p_stats), 1, fp);
+  fwrite(tx_count_16x16p_stats, sizeof(tx_count_16x16p_stats), 1, fp);
+  fwrite(tx_count_8x8p_stats, sizeof(tx_count_8x8p_stats), 1, fp);
+  fclose(fp);
+
+  printf("vp9_default_tx_count_32x32p[TX_SIZE_MAX_SB][TX_SIZE_MAX_SB] = {\n");
+  for (i = 0; i < TX_SIZE_MAX_SB; i++) {
+    printf("{ ");
+    for (j = 0; j < TX_SIZE_MAX_SB; j++) {
+      printf("%"PRId64", ", tx_count_32x32p_stats[i][j]);
+    }
+    printf("},\n");
+  }
+  printf("};\n");
+  printf("vp9_default_tx_count_16x16p[TX_SIZE_MAX_SB-1][TX_SIZE_MAX_SB-1] = {\n");
+  for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) {
+    printf("{ ");
+    for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) {
+      printf("%"PRId64", ", tx_count_16x16p_stats[i][j]);
+    }
+    printf("},\n");
+  }
+  printf("};\n");
+  printf("vp9_default_tx_count_8x8p[TX_SIZE_MAX_SB-2][TX_SIZE_MAX_SB-2] = {\n");
+  for (i = 0; i < TX_SIZE_MAX_SB - 2; i++) {
+    printf("{ ");
+    for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) {
+      printf("%"PRId64", ", tx_count_8x8p_stats[i][j]);
+    }
+    printf("},\n");
+  }
+  printf("};\n");
+}
+#endif
+
 static int update_bits[255];
 
 static INLINE void write_le32(uint8_t *p, int value) {
@@ -574,12 +642,12 @@
       !(rf != INTRA_FRAME &&
         (skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
     TX_SIZE sz = mi->txfm_size;
-    int tx_probs_offset = get_tx_probs_offset(mi->sb_type);
-    vp9_write(bc, sz != TX_4X4, pc->fc.tx_probs[tx_probs_offset]);
+    const vp9_prob *tx_probs = vp9_get_pred_probs(pc, xd, PRED_TX_SIZE);
+    vp9_write(bc, sz != TX_4X4, tx_probs[0]);
     if (mi->sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) {
-      vp9_write(bc, sz != TX_8X8, pc->fc.tx_probs[tx_probs_offset + 1]);
+      vp9_write(bc, sz != TX_8X8, tx_probs[1]);
       if (mi->sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8)
-        vp9_write(bc, sz != TX_16X16, pc->fc.tx_probs[tx_probs_offset + 2]);
+        vp9_write(bc, sz != TX_16X16, tx_probs[2]);
     }
   }
 
@@ -663,10 +731,6 @@
           }
         }
       }
-
-#ifdef MODE_STATS
-      ++count_mb_seg[mi->partitioning];
-#endif
     } else if (mode == NEWMV) {
 #ifdef ENTROPY_STATS
       active_section = 5;
@@ -705,12 +769,12 @@
 
   if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT) {
     TX_SIZE sz = m->mbmi.txfm_size;
-    int tx_probs_offset = get_tx_probs_offset(m->mbmi.sb_type);
-    vp9_write(bc, sz != TX_4X4, c->fc.tx_probs[tx_probs_offset]);
+    const vp9_prob *tx_probs = vp9_get_pred_probs(c, xd, PRED_TX_SIZE);
+    vp9_write(bc, sz != TX_4X4, tx_probs[0]);
     if (m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) {
-      vp9_write(bc, sz != TX_8X8, c->fc.tx_probs[tx_probs_offset + 1]);
+      vp9_write(bc, sz != TX_8X8, tx_probs[1]);
       if (m->mbmi.sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8)
-        vp9_write(bc, sz != TX_16X16, c->fc.tx_probs[tx_probs_offset + 2]);
+        vp9_write(bc, sz != TX_16X16, tx_probs[2]);
     }
   }
 
@@ -1226,19 +1290,46 @@
 
   // Probabilities
   if (cm->txfm_mode == TX_MODE_SELECT) {
-    int i;
-    unsigned int ct[TX_SIZE_PROBS][2];
-    tx_counts_to_branch_counts(cm->fc.tx_count_32x32p,
-                               cm->fc.tx_count_16x16p,
-                               cm->fc.tx_count_8x8p, ct);
+    int i, j;
+    unsigned int ct_8x8p[TX_SIZE_MAX_SB - 3][2];
+    unsigned int ct_16x16p[TX_SIZE_MAX_SB - 2][2];
+    unsigned int ct_32x32p[TX_SIZE_MAX_SB - 1][2];
 
-    for (i = 0; i < TX_SIZE_PROBS; i++) {
-      vp9_cond_prob_diff_update(w, &cm->fc.tx_probs[i],
-                                VP9_DEF_UPDATE_PROB, ct[i]);
+
+    for (i = 0; i < TX_SIZE_MAX_SB - 2; i++) {
+      tx_counts_to_branch_counts_8x8(cm->fc.tx_count_8x8p[i],
+                                     ct_8x8p);
+      for (j = 0; j < TX_SIZE_MAX_SB - 3; j++) {
+        vp9_cond_prob_diff_update(w, &cm->fc.tx_probs_8x8p[i][j],
+                                  VP9_DEF_UPDATE_PROB, ct_8x8p[j]);
+      }
     }
+    for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) {
+      tx_counts_to_branch_counts_16x16(cm->fc.tx_count_16x16p[i],
+                                       ct_16x16p);
+      for (j = 0; j < TX_SIZE_MAX_SB - 2; j++) {
+        vp9_cond_prob_diff_update(w, &cm->fc.tx_probs_16x16p[i][j],
+                                  VP9_DEF_UPDATE_PROB, ct_16x16p[j]);
+      }
+    }
+    for (i = 0; i < TX_SIZE_MAX_SB; i++) {
+      tx_counts_to_branch_counts_32x32(cm->fc.tx_count_32x32p[i],
+                                       ct_32x32p);
+      for (j = 0; j < TX_SIZE_MAX_SB - 1; j++) {
+        vp9_cond_prob_diff_update(w, &cm->fc.tx_probs_32x32p[i][j],
+                                  VP9_DEF_UPDATE_PROB, ct_32x32p[j]);
+      }
+    }
+#ifdef MODE_STATS
+    update_tx_count_stats(cm);
+#endif
   } else {
-    vpx_memcpy(cm->fc.tx_probs, vp9_default_tx_probs,
-               sizeof(vp9_default_tx_probs));
+    vpx_memcpy(cm->fc.tx_probs_32x32p, vp9_default_tx_probs_32x32p,
+               sizeof(vp9_default_tx_probs_32x32p));
+    vpx_memcpy(cm->fc.tx_probs_16x16p, vp9_default_tx_probs_16x16p,
+               sizeof(vp9_default_tx_probs_16x16p));
+    vpx_memcpy(cm->fc.tx_probs_8x8p, vp9_default_tx_probs_8x8p,
+               sizeof(vp9_default_tx_probs_8x8p));
   }
 }
 
@@ -1475,7 +1566,9 @@
   vp9_copy(pc->fc.pre_comp_inter_prob, pc->fc.comp_inter_prob);
   vp9_copy(pc->fc.pre_comp_ref_prob, pc->fc.comp_ref_prob);
   vp9_copy(pc->fc.pre_single_ref_prob, pc->fc.single_ref_prob);
-  vp9_copy(pc->fc.pre_tx_probs, pc->fc.tx_probs);
+  vp9_copy(pc->fc.pre_tx_probs_8x8p, pc->fc.tx_probs_8x8p);
+  vp9_copy(pc->fc.pre_tx_probs_16x16p, pc->fc.tx_probs_16x16p);
+  vp9_copy(pc->fc.pre_tx_probs_32x32p, pc->fc.tx_probs_32x32p);
   vp9_copy(pc->fc.pre_mbskip_probs, pc->fc.mbskip_probs);
 
   if (xd->lossless) {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1866,16 +1866,35 @@
     }
 
     if (cpi->common.txfm_mode == TX_MODE_SELECT) {
-      const int count4x4 = cm->fc.tx_count_16x16p[TX_4X4] +
-                           cm->fc.tx_count_32x32p[TX_4X4] +
-                           cm->fc.tx_count_8x8p[TX_4X4];
-      const int count8x8_lp = cm->fc.tx_count_32x32p[TX_8X8] +
-                              cm->fc.tx_count_16x16p[TX_8X8];
-      const int count8x8_8x8p = cm->fc.tx_count_8x8p[TX_8X8];
-      const int count16x16_16x16p = cm->fc.tx_count_16x16p[TX_16X16];
-      const int count16x16_lp = cm->fc.tx_count_32x32p[TX_16X16];
-      const int count32x32 = cm->fc.tx_count_32x32p[TX_32X32];
+      int count4x4 = 0;
+      int count8x8_lp = 0, count8x8_8x8p = 0;
+      int count16x16_16x16p = 0, count16x16_lp = 0;
+      int count32x32 = 0;
 
+      for (i = 0; i < TX_SIZE_MAX_SB; i++)
+        count4x4 += cm->fc.tx_count_32x32p[i][TX_4X4];
+      for (i = 0; i < TX_SIZE_MAX_SB - 1; i++)
+        count4x4 += cm->fc.tx_count_16x16p[i][TX_4X4];
+      for (i = 0; i < TX_SIZE_MAX_SB - 2; i++)
+        count4x4 += cm->fc.tx_count_8x8p[i][TX_4X4];
+
+      for (i = 0; i < TX_SIZE_MAX_SB; i++)
+        count8x8_lp += cm->fc.tx_count_32x32p[i][TX_8X8];
+      for (i = 0; i < TX_SIZE_MAX_SB - 1; i++)
+        count8x8_lp += cm->fc.tx_count_16x16p[i][TX_8X8];
+
+      for (i = 0; i < TX_SIZE_MAX_SB - 2; i++)
+        count8x8_8x8p += cm->fc.tx_count_8x8p[i][TX_8X8];
+
+      for (i = 0; i < TX_SIZE_MAX_SB - 1; i++)
+        count16x16_16x16p += cm->fc.tx_count_16x16p[i][TX_16X16];
+
+      for (i = 0; i < TX_SIZE_MAX_SB; i++)
+        count16x16_lp += cm->fc.tx_count_32x32p[i][TX_16X16];
+
+      for (i = 0; i < TX_SIZE_MAX_SB; i++)
+        count32x32 += cm->fc.tx_count_32x32p[i][TX_32X32];
+
       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
           count32x32 == 0) {
         cpi->common.txfm_mode = ALLOW_8X8;
@@ -2057,12 +2076,13 @@
         mbmi->sb_type >= BLOCK_SIZE_SB8X8 &&
         !(mbmi->ref_frame[0] != INTRA_FRAME && (mbmi->mb_skip_coeff ||
           vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
+      const int context = vp9_get_pred_context(cm, xd, PRED_TX_SIZE);
       if (bsize >= BLOCK_SIZE_SB32X32) {
-        cm->fc.tx_count_32x32p[mbmi->txfm_size]++;
+        cm->fc.tx_count_32x32p[context][mbmi->txfm_size]++;
       } else if (bsize >= BLOCK_SIZE_MB16X16) {
-        cm->fc.tx_count_16x16p[mbmi->txfm_size]++;
+        cm->fc.tx_count_16x16p[context][mbmi->txfm_size]++;
       } else {
-        cm->fc.tx_count_8x8p[mbmi->txfm_size]++;
+        cm->fc.tx_count_8x8p[context][mbmi->txfm_size]++;
       }
     } else {
       int x, y;
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -107,6 +107,10 @@
 extern void init_nmvstats();
 extern void print_nmvstats();
 #endif
+#ifdef MODE_STATS
+extern void init_tx_count_stats();
+extern void write_tx_count_stats();
+#endif
 
 #ifdef SPEEDSTATS
 unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -1287,6 +1291,9 @@
 #ifdef NMV_STATS
   init_nmvstats();
 #endif
+#ifdef MODE_STATS
+  init_tx_count_stats();
+#endif
 
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
@@ -1534,6 +1541,10 @@
 #ifdef NMV_STATS
     if (cpi->pass != 1)
       print_nmvstats();
+#endif
+#ifdef MODE_STATS
+    if (cpi->pass != 1)
+      write_tx_count_stats();
 #endif
 
 #if CONFIG_INTERNAL_STATS
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -89,7 +89,9 @@
   int inter_mode_counts[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1][2];
   vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
 
-  vp9_prob tx_probs[TX_SIZE_PROBS];
+  vp9_prob tx_probs_8x8p[TX_SIZE_MAX_SB - 2][TX_SIZE_MAX_SB - 3];
+  vp9_prob tx_probs_16x16p[TX_SIZE_MAX_SB - 1][TX_SIZE_MAX_SB - 2];
+  vp9_prob tx_probs_32x32p[TX_SIZE_MAX_SB][TX_SIZE_MAX_SB - 1];
   vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
 } CODING_CONTEXT;
 
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -143,7 +143,9 @@
 
   vp9_copy(cc->coef_probs, cm->fc.coef_probs);
   vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
-  vp9_copy(cc->tx_probs, cm->fc.tx_probs);
+  vp9_copy(cc->tx_probs_8x8p, cm->fc.tx_probs_8x8p);
+  vp9_copy(cc->tx_probs_16x16p, cm->fc.tx_probs_16x16p);
+  vp9_copy(cc->tx_probs_32x32p, cm->fc.tx_probs_32x32p);
   vp9_copy(cc->mbskip_probs, cm->fc.mbskip_probs);
 }
 
@@ -182,7 +184,9 @@
 
   vp9_copy(cm->fc.coef_probs, cc->coef_probs);
   vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
-  vp9_copy(cm->fc.tx_probs, cc->tx_probs);
+  vp9_copy(cm->fc.tx_probs_8x8p, cc->tx_probs_8x8p);
+  vp9_copy(cm->fc.tx_probs_16x16p, cc->tx_probs_16x16p);
+  vp9_copy(cm->fc.tx_probs_32x32p, cc->tx_probs_32x32p);
   vp9_copy(cm->fc.mbskip_probs, cc->mbskip_probs);
 }
 
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -420,7 +420,6 @@
                                      int *d, int *distortion,
                                      int *s, int *skip,
                                      int64_t txfm_cache[NB_TXFM_MODES],
-                                     BLOCK_SIZE_TYPE bs,
                                      TX_SIZE max_txfm_size) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -430,15 +429,15 @@
   int n, m;
   int s0, s1;
 
-  int tx_probs_offset = get_tx_probs_offset(bs);
+  const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE);
 
   for (n = TX_4X4; n <= max_txfm_size; n++) {
     r[n][1] = r[n][0];
     for (m = 0; m <= n - (n == max_txfm_size); m++) {
       if (m == n)
-        r[n][1] += vp9_cost_zero(cm->fc.tx_probs[tx_probs_offset + m]);
+        r[n][1] += vp9_cost_zero(tx_probs[m]);
       else
-        r[n][1] += vp9_cost_one(cm->fc.tx_probs[tx_probs_offset + m]);
+        r[n][1] += vp9_cost_one(tx_probs[m]);
     }
   }
 
@@ -612,7 +611,6 @@
   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
 
   assert(bs == mbmi->sb_type);
-
   if (mbmi->ref_frame[0] > INTRA_FRAME)
     vp9_subtract_sby(x, bs);
 
@@ -643,7 +641,7 @@
                            TX_4X4);
 
   choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
-                           skip, txfm_cache, bs,
+                           skip, txfm_cache,
                            TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
                            - (bs < BLOCK_SIZE_MB16X16));
 }