shithub: libvpx

Download patch

ref: de6dfa6bb0cd680ca446a15b52b2a026859eb1e6
parent: c59e36fc76ce3c5dd0b9a40e8f95a225248ce018
author: Jingning Han <jingning@google.com>
date: Wed Aug 29 07:25:38 EDT 2012

hybrid transform of 16x16 dimension

Enable ADST/DCT of dimension 16x16 for I16X16 modes. This change provides
benefits mostly for hd sequences.

Set up the framework for selectable transform dimension.

Also allowing quantization parameter threshold to control the use
of hybrid transform (This is currently disabled by setting threshold
always above the quantization parameter. Adaptive thresholding can
be built upon this, which will further improve the coding performance.)

The coding performance gains (with respect to the codec that has all
other configuration settings turned on) are

derf:   0.013
yt:     0.086
hd:     0.198
std-hd: 0.501

Change-Id: Ibb4263a61fc74e0b3c345f54d73e8c73552bf926

--- a/configure
+++ b/configure
@@ -227,6 +227,7 @@
     tx16x16
     newbestrefmv
     new_mvref
+    hybridtransform16x16
 "
 CONFIG_LIST="
     external_build
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -133,13 +133,13 @@
 typedef enum {
   TX_4X4,                      // 4x4 dct transform
   TX_8X8,                      // 8x8 dct transform
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   TX_16X16,                    // 16x16 dct transform
 #endif
   TX_SIZE_MAX                  // Number of different transforms available
 } TX_SIZE;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 typedef enum {
   DCT_DCT   = 0,                      // DCT  in both horizontal and vertical
   ADST_DCT  = 1,                      // ADST in horizontal, DCT in vertical
@@ -155,10 +155,14 @@
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 #define ACTIVE_HT 110                // quantization stepsize threshold
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+#define ACTIVE_HT16 300
+#endif
+
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -190,7 +194,7 @@
 } MV_REF_TYPE;
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // convert MB_PREDICTION_MODE to B_PREDICTION_MODE
 static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
   B_PREDICTION_MODE b_mode;
@@ -244,7 +248,7 @@
 union b_mode_info {
   struct {
     B_PREDICTION_MODE first;
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
     B_PREDICTION_MODE test;
     TX_TYPE           tx_type;
 #endif
@@ -269,7 +273,7 @@
 
 typedef struct {
   MB_PREDICTION_MODE mode, uv_mode;
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   MB_PREDICTION_MODE mode_rdopt;
 #endif
 
@@ -457,15 +461,16 @@
 #endif
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   int q_index;
 #endif
 
 } MACROBLOCKD;
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 // transform mapping
 static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) {
+  // map transform type
   switch (bmode) {
     case B_TM_PRED :
     case B_RD_PRED :
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -13,7 +13,7 @@
    Generated file included by entropy.c */
 #define COEF_UPDATE_PROB 252
 #define COEF_UPDATE_PROB_8X8 252
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #define COEF_UPDATE_PROB_16X16 252
 #endif
 
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -488,7 +488,7 @@
   }
 #endif
 };
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static const vp8_prob
 vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16]
                             [COEF_BANDS]
@@ -546,103 +546,104 @@
     }
   },
   { /* block Type 1 */
-    { /* Coeff Band 0 */
-      { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128},
-      { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128},
-      { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128}
-    },
-    { /* Coeff Band 1 */
-      { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128},
-      { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128},
-      { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128},
-      { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}
-    },
-    { /* Coeff Band 2 */
-      { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128},
-      { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128},
-      { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128},
-      { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 3 */
-      { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128},
-      { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128},
-      { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128},
-      { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 4 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 5 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 6 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    },
-    { /* Coeff Band 7 */
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
-      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
-    }
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
   },
   { /* block Type 2 */
-    { /* Coeff Band 0 */
-      { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128},
-      { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128},
-      { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128},
-      { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}
-    },
-    { /* Coeff Band 1 */
-      { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128},
-      { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128},
-      { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128},
-      { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}
-    },
-    { /* Coeff Band 2 */
-      { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128},
-      { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128},
-      { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128},
-      { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}
-    },
-    { /* Coeff Band 3 */
-      { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128},
-      { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128},
-      { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128},
-      { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}
-    },
-    { /* Coeff Band 4 */
-      { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128},
-      { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128},
-      { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128},
-      { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}
-    },
-    { /* Coeff Band 5 */
-      { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128},
-      { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128},
-      { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128},
-      { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}
-    },
-    { /* Coeff Band 6 */
-      { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128},
-      { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128},
-      { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128},
-      { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}
-    },
-    { /* Coeff Band 7 */
-      { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128},
-      { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128},
-      { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128},
-      { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
-    }
+      { /* Coeff Band 0 */
+        { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128},
+        { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255},
+        { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255},
+        { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+      },
+      { /* Coeff Band 1 */
+        { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128},
+        { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128},
+        { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128},
+        { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128}
+      },
+      { /* Coeff Band 2 */
+        { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128},
+        { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128},
+        { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128},
+        { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128}
+      },
+      { /* Coeff Band 3 */
+        { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128},
+        { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128},
+        { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128},
+        { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128}
+      },
+      { /* Coeff Band 4 */
+        { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128},
+        { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128},
+        { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128},
+        { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128}
+      },
+      { /* Coeff Band 5 */
+        { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128},
+        { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128},
+        { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128},
+        { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128}
+      },
+      { /* Coeff Band 6 */
+        { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128},
+        { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128},
+        { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128},
+        { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128}
+      },
+      { /* Coeff Band 7 */
+        { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128},
+        { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128},
+        { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128},
+        { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128}
+      }
   },
   { /* block Type 3 */
     { /* Coeff Band 0 */
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -95,7 +95,7 @@
   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
 };
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 // Table can be optimized.
 DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]) = {
     0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
@@ -212,7 +212,7 @@
   vpx_memcpy(pc->fc.coef_probs_8x8, vp8_default_coef_probs_8x8,
              sizeof(pc->fc.coef_probs_8x8));
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vpx_memcpy(pc->fc.coef_probs_16x16, vp8_default_coef_probs_16x16,
              sizeof(pc->fc.coef_probs_16x16));
 #endif
@@ -335,7 +335,7 @@
         }
       }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   for (i = 0; i < BLOCK_TYPES_16X16; ++i)
     for (j = 0; j < COEF_BANDS; ++j)
       for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -67,6 +67,7 @@
 #else
 #define BLOCK_TYPES_8X8 3
 #endif
+
 #define BLOCK_TYPES_16X16 4
 
 /* Middle dimension is a coarsening of the coefficient's
@@ -75,7 +76,7 @@
 #define COEF_BANDS 8
 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands[16]);
 extern DECLARE_ALIGNED(64, const int, vp8_coef_bands_8x8[64]);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern DECLARE_ALIGNED(16, const int, vp8_coef_bands_16x16[256]);
 #endif
 
@@ -118,7 +119,7 @@
 extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
 void vp8_coef_tree_initialize(void);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d_16x16[256]);
 #endif
 void vp8_adapt_coef_probs(struct VP8Common *);
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -33,7 +33,7 @@
   rtcd->idct.idct8        = vp8_short_idct8x8_c;
   rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c;
   rtcd->idct.ihaar2       = vp8_short_ihaar2x2_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   rtcd->idct.idct16x16    = vp8_short_idct16x16_c;
 #endif
   rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -43,7 +43,7 @@
 #define Y2_WHT_UPSCALE_FACTOR 2
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_idct_idct16x16
 #define vp8_idct_idct16x16 vp8_short_idct16x16_c
 #endif
@@ -111,7 +111,7 @@
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 #include "vp8/common/blockd.h"
 void vp8_ihtllm_c(short *input, short *output, int pitch,
                   TX_TYPE tx_type, int tx_dim);
@@ -136,7 +136,7 @@
   vp8_idct_fn_t ihaar2;
   vp8_idct_fn_t ihaar2_1;
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_idct_fn_t            idct16x16;
 #endif
 } vp8_idct_rtcd_vtable_t;
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -37,7 +37,7 @@
 
 // TODO: these transforms can be further converted into integer forms
 //       for complexity optimization
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 float idct_4[16] = {
   0.500000000000000,   0.653281482438188,   0.500000000000000,   0.270598050073099,
   0.500000000000000,   0.270598050073099,  -0.500000000000000,  -0.653281482438188,
@@ -89,11 +89,85 @@
   0.483002021635509,  -0.466553967085785,   0.434217976756762,  -0.387095214016348,
   0.326790388032145,  -0.255357107325375,   0.175227946595736,  -0.089131608307532
 };
+#endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+float idct_16[256] = {
+  0.250000,  0.351851,  0.346760,  0.338330,  0.326641,  0.311806,  0.293969,  0.273300,
+  0.250000,  0.224292,  0.196424,  0.166664,  0.135299,  0.102631,  0.068975,  0.034654,
+  0.250000,  0.338330,  0.293969,  0.224292,  0.135299,  0.034654, -0.068975, -0.166664,
+ -0.250000, -0.311806, -0.346760, -0.351851, -0.326641, -0.273300, -0.196424, -0.102631,
+  0.250000,  0.311806,  0.196424,  0.034654, -0.135299, -0.273300, -0.346760, -0.338330,
+ -0.250000, -0.102631,  0.068975,  0.224292,  0.326641,  0.351851,  0.293969,  0.166664,
+  0.250000,  0.273300,  0.068975, -0.166664, -0.326641, -0.338330, -0.196424,  0.034654,
+  0.250000,  0.351851,  0.293969,  0.102631, -0.135299, -0.311806, -0.346760, -0.224292,
+  0.250000,  0.224292, -0.068975, -0.311806, -0.326641, -0.102631,  0.196424,  0.351851,
+  0.250000, -0.034654, -0.293969, -0.338330, -0.135299,  0.166664,  0.346760,  0.273300,
+  0.250000,  0.166664, -0.196424, -0.351851, -0.135299,  0.224292,  0.346760,  0.102631,
+ -0.250000, -0.338330, -0.068975,  0.273300,  0.326641,  0.034654, -0.293969, -0.311806,
+  0.250000,  0.102631, -0.293969, -0.273300,  0.135299,  0.351851,  0.068975, -0.311806,
+ -0.250000,  0.166664,  0.346760,  0.034654, -0.326641, -0.224292,  0.196424,  0.338330,
+  0.250000,  0.034654, -0.346760, -0.102631,  0.326641,  0.166664, -0.293969, -0.224292,
+  0.250000,  0.273300, -0.196424, -0.311806,  0.135299,  0.338330, -0.068975, -0.351851,
+  0.250000, -0.034654, -0.346760,  0.102631,  0.326641, -0.166664, -0.293969,  0.224292,
+  0.250000, -0.273300, -0.196424,  0.311806,  0.135299, -0.338330, -0.068975,  0.351851,
+  0.250000, -0.102631, -0.293969,  0.273300,  0.135299, -0.351851,  0.068975,  0.311806,
+ -0.250000, -0.166664,  0.346760, -0.034654, -0.326641,  0.224292,  0.196424, -0.338330,
+  0.250000, -0.166664, -0.196424,  0.351851, -0.135299, -0.224292,  0.346760, -0.102631,
+ -0.250000,  0.338330, -0.068975, -0.273300,  0.326641, -0.034654, -0.293969,  0.311806,
+  0.250000, -0.224292, -0.068975,  0.311806, -0.326641,  0.102631,  0.196424, -0.351851,
+  0.250000,  0.034654, -0.293969,  0.338330, -0.135299, -0.166664,  0.346760, -0.273300,
+  0.250000, -0.273300,  0.068975,  0.166664, -0.326641,  0.338330, -0.196424, -0.034654,
+  0.250000, -0.351851,  0.293969, -0.102631, -0.135299,  0.311806, -0.346760,  0.224292,
+  0.250000, -0.311806,  0.196424, -0.034654, -0.135299,  0.273300, -0.346760,  0.338330,
+ -0.250000,  0.102631,  0.068975, -0.224292,  0.326641, -0.351851,  0.293969, -0.166664,
+  0.250000, -0.338330,  0.293969, -0.224292,  0.135299, -0.034654, -0.068975,  0.166664,
+ -0.250000,  0.311806, -0.346760,  0.351851, -0.326641,  0.273300, -0.196424,  0.102631,
+  0.250000, -0.351851,  0.346760, -0.338330,  0.326641, -0.311806,  0.293969, -0.273300,
+  0.250000, -0.224292,  0.196424, -0.166664,  0.135299, -0.102631,  0.068975, -0.034654
+};
+
+float iadst_16[256] = {
+  0.033094,  0.098087,  0.159534,  0.215215,  0.263118,  0.301511,  0.329007,  0.344612,
+  0.347761,  0.338341,  0.316693,  0.283599,  0.240255,  0.188227,  0.129396,  0.065889,
+  0.065889,  0.188227,  0.283599,  0.338341,  0.344612,  0.301511,  0.215215,  0.098087,
+ -0.033094, -0.159534, -0.263118, -0.329007, -0.347761, -0.316693, -0.240255, -0.129396,
+  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,  0.000000, -0.188227, -0.316693,
+ -0.344612, -0.263118, -0.098087,  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,
+  0.129396,  0.316693,  0.329007,  0.159534, -0.098087, -0.301511, -0.338341, -0.188227,
+  0.065889,  0.283599,  0.344612,  0.215215, -0.033094, -0.263118, -0.347761, -0.240255,
+  0.159534,  0.344612,  0.240255, -0.065889, -0.316693, -0.301511, -0.033094,  0.263118,
+  0.338341,  0.129396, -0.188227, -0.347761, -0.215215,  0.098087,  0.329007,  0.283599,
+  0.188227,  0.344612,  0.098087, -0.263118, -0.316693, -0.000000,  0.316693,  0.263118,
+ -0.098087, -0.344612, -0.188227,  0.188227,  0.344612,  0.098087, -0.263118, -0.316693,
+  0.215215,  0.316693, -0.065889, -0.347761, -0.098087,  0.301511,  0.240255, -0.188227,
+ -0.329007,  0.033094,  0.344612,  0.129396, -0.283599, -0.263118,  0.159534,  0.338341,
+  0.240255,  0.263118, -0.215215, -0.283599,  0.188227,  0.301511, -0.159534, -0.316693,
+  0.129396,  0.329007, -0.098087, -0.338341,  0.065889,  0.344612, -0.033094, -0.347761,
+  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,  0.000000, -0.344612,  0.098087,
+  0.316693, -0.188227, -0.263118,  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,
+  0.283599,  0.098087, -0.347761,  0.129396,  0.263118, -0.301511, -0.065889,  0.344612,
+ -0.159534, -0.240255,  0.316693,  0.033094, -0.338341,  0.188227,  0.215215, -0.329007,
+  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000,
+ -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,
+  0.316693, -0.098087, -0.188227,  0.344612, -0.263118, -0.000000,  0.263118, -0.344612,
+  0.188227,  0.098087, -0.316693,  0.316693, -0.098087, -0.188227,  0.344612, -0.263118,
+  0.329007, -0.188227, -0.033094,  0.240255, -0.344612,  0.301511, -0.129396, -0.098087,
+  0.283599, -0.347761,  0.263118, -0.065889, -0.159534,  0.316693, -0.338341,  0.215215,
+  0.338341, -0.263118,  0.129396,  0.033094, -0.188227,  0.301511, -0.347761,  0.316693,
+ -0.215215,  0.065889,  0.098087, -0.240255,  0.329007, -0.344612,  0.283599, -0.159534,
+  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,  0.000000, -0.098087,  0.188227,
+ -0.263118,  0.316693, -0.344612,  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,
+  0.347761, -0.344612,  0.338341, -0.329007,  0.316693, -0.301511,  0.283599, -0.263118,
+  0.240255, -0.215215,  0.188227, -0.159534,  0.129396, -0.098087,  0.065889, -0.033094
+};
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_ihtllm_c(short *input, short *output, int pitch,
                   TX_TYPE tx_type, int tx_dim) {
   int i, j, k;
-  float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+  float bufa[256], bufb[256]; // buffers are for floating-point test purpose
                             // the implementation could be simplified in
                             // conjunction with integer transform
 
@@ -126,11 +200,13 @@
   switch(tx_type) {
     case ADST_ADST :
     case ADST_DCT  :
-      ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+      ptv = (tx_dim == 4) ? &iadst_4[0] :
+                            ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
       break;
 
     default :
-      ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+      ptv = (tx_dim == 4) ? &idct_4[0] :
+                            ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
       break;
   }
 
@@ -155,11 +231,13 @@
   switch(tx_type) {
     case ADST_ADST :
     case  DCT_ADST :
-      pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+      pth = (tx_dim == 4) ? &iadst_4[0] :
+                            ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
       break;
 
     default :
-      pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+      pth = (tx_dim == 4) ? &idct_4[0] :
+                            ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
       break;
   }
 
@@ -178,11 +256,13 @@
     switch(tx_type) {
       case ADST_ADST :
       case  DCT_ADST :
-        pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
+        pth = (tx_dim == 4) ? &iadst_4[0] :
+                              ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
         break;
 
       default :
-        pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
+        pth = (tx_dim == 4) ? &idct_4[0] :
+                              ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
         break;
     }
   }
@@ -692,7 +772,7 @@
 }
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #if 0
 // Keep a really bad float version as reference for now.
 void vp8_short_idct16x16_c(short *input, short *output, int pitch) {
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -171,7 +171,7 @@
 
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd,
                                    short *input_dqcoeff,
                                    short *output_coeff, int pitch) {
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -30,7 +30,7 @@
 extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd);
 extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *xd);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern void vp8_inverse_transform_b_16x16(const vp8_idct_rtcd_vtable_t *rtcd,
                                           short *input_dqcoeff, short *output_coeff,
                                           int pitch);
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -336,7 +336,7 @@
             (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -361,7 +361,7 @@
             (y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -479,7 +479,7 @@
             (y_ptr, 0, 0, post->y_stride, 0, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
@@ -497,7 +497,7 @@
             (y_ptr, 0, 0, post->y_stride, 0, &lfi);
 
           if (!skip_lf
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
               && tx_type != TX_16X16
 #endif
               ) {
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -52,7 +52,7 @@
   vp8_prob mbsplit_prob [VP8_NUMMBSPLITS - 1];
   vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   vp8_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
   MV_CONTEXT mvc[2];
@@ -76,7 +76,7 @@
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   vp8_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
@@ -84,7 +84,7 @@
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
   unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 #endif
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -750,7 +750,6 @@
 }
 
 
-
 void vp8_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
                                             unsigned char *dst_y,
                                             unsigned char *dst_u,
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -125,7 +125,7 @@
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
   xd->q_index = QIndex;
 #endif
 
@@ -234,17 +234,21 @@
   int orig_skip_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
 #endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
   int QIndex;
   int active_ht;
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int active_ht16;
+#endif
+
   // re-initialize macroblock dequantizer before detokenization
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
 
   if (pbi->common.frame_type == KEY_FRAME) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (xd->mode_info_context->mbmi.mode <= TM_PRED ||
         xd->mode_info_context->mbmi.mode == NEWMV ||
         xd->mode_info_context->mbmi.mode == ZEROMV ||
@@ -263,7 +267,7 @@
     else
       xd->mode_info_context->mbmi.txfm_size = TX_4X4;
   } else {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (xd->mode_info_context->mbmi.mode <= TM_PRED ||
         xd->mode_info_context->mbmi.mode == NEWMV ||
         xd->mode_info_context->mbmi.mode == ZEROMV ||
@@ -316,7 +320,7 @@
       xd->block[i].eob = 0;
       xd->eobs[i] = 0;
     }
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16)
       eobtotal = vp8_decode_mb_tokens_16x16(pbi, xd);
     else
@@ -377,7 +381,7 @@
 //  if (xd->segmentation_enabled)
 //    mb_init_dequantizer(pbi, xd);
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
   // parse transform types for intra 4x4 mode
   QIndex = xd->q_index;
   active_ht = (QIndex < ACTIVE_HT);
@@ -391,6 +395,10 @@
   }
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  active_ht16 = (QIndex < ACTIVE_HT16);
+#endif
+
   /* do prediction */
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
 #if CONFIG_SUPERBLOCKS
@@ -537,11 +545,28 @@
   } else {
     BLOCKD *b = &xd->block[24];
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16) {
+#if CONFIG_HYBRIDTRANSFORM16X16
+      if (mode < I8X8_PRED && active_ht16) {
+        BLOCKD *bd = &xd->block[0];
+        TX_TYPE txfm;
+        txfm_map(bd, pred_mode_conv(mode));
+        txfm = bd->bmi.as_mode.tx_type;
+
+        vp8_ht_dequant_idct_add_16x16_c(txfm, xd->qcoeff,
+                                        xd->block[0].dequant, xd->predictor,
+                                        xd->dst.y_buffer, 16, xd->dst.y_stride);
+      } else {
+        vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant,
+                                     xd->predictor, xd->dst.y_buffer,
+                                     16, xd->dst.y_stride);
+      }
+#else
       vp8_dequant_idct_add_16x16_c(xd->qcoeff, xd->block[0].dequant,
                                    xd->predictor, xd->dst.y_buffer,
                                    16, xd->dst.y_stride);
+#endif
     }
     else
 #endif
@@ -641,7 +666,7 @@
     if (!xd->mode_info_context->mbmi.encoded_as_sb) {
 #endif
       if (tx_type == TX_8X8
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || tx_type == TX_16X16
 #endif
       )
@@ -1047,7 +1072,7 @@
         }
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   // 16x16
   if (vp8_read_bit(bc)) {
     // read coef probability tree
@@ -1430,7 +1455,7 @@
 
   vp8_copy(pbi->common.fc.pre_coef_probs, pbi->common.fc.coef_probs);
   vp8_copy(pbi->common.fc.pre_coef_probs_8x8, pbi->common.fc.coef_probs_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(pbi->common.fc.pre_coef_probs_16x16, pbi->common.fc.coef_probs_16x16);
 #endif
   vp8_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
@@ -1443,7 +1468,7 @@
   vp8_copy(pbi->common.fc.pre_mvc_hp, pbi->common.fc.mvc_hp);
   vp8_zero(pbi->common.fc.coef_counts);
   vp8_zero(pbi->common.fc.coef_counts_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_zero(pbi->common.fc.coef_counts_16x16);
 #endif
   vp8_zero(pbi->common.fc.ymode_counts);
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -468,7 +468,48 @@
 #endif
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_HYBRIDTRANSFORM16X16
+void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
+                                     unsigned char *pred, unsigned char *dest,
+                                     int pitch, int stride) {
+  short output[256];
+  short *diff_ptr = output;
+  int r, c, i;
+
+  input[0]= input[0] * dq[0];
+
+  // recover quantizer for 4 4x4 blocks
+  for (i = 1; i < 256; i++)
+    input[i] = input[i] * dq[1];
+
+  // inverse hybrid transform
+  vp8_ihtllm_c(input, output, 32, tx_type, 16);
+
+  // the idct halves ( >> 1) the pitch
+  // vp8_short_idct16x16_c(input, output, 32);
+
+  vpx_memset(input, 0, 512);
+
+  for (r = 0; r < 16; r++) {
+    for (c = 0; c < 16; c++) {
+      int a = diff_ptr[c] + pred[c];
+
+      if (a < 0)
+        a = 0;
+      else if (a > 255)
+        a = 255;
+
+      dest[c] = (unsigned char) a;
+    }
+
+    dest += stride;
+    diff_ptr += 16;
+    pred += pitch;
+  }
+}
+#endif
+
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_dequant_idct_add_16x16_c(short *input, short *dq, unsigned char *pred,
                                   unsigned char *dest, int pitch, int stride) {
   short output[256];
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -145,7 +145,7 @@
 #endif
 extern prototype_dequant_idct_add_uv_block_8x8(vp8_dequant_idct_add_uv_block_8x8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_dequant_idct_add_16x16
 #define vp8_dequant_idct_add_16x16 vp8_dequant_idct_add_16x16_c
 #endif
@@ -184,7 +184,7 @@
   vp8_dequant_dc_idct_add_y_block_fn_t_8x8 dc_idct_add_y_block_8x8;
   vp8_dequant_idct_add_y_block_fn_t_8x8    idct_add_y_block_8x8;
   vp8_dequant_idct_add_uv_block_fn_t_8x8   idct_add_uv_block_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_dequant_idct_add_fn_t            idct_add_16x16;
 #endif
 } vp8_dequant_rtcd_vtable_t;
@@ -199,6 +199,12 @@
 void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq,
                                    unsigned char *pred, unsigned char *dest,
                                    int pitch, int stride);
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM16X16
+void vp8_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq,
+                                     unsigned char *pred, unsigned char *dest,
+                                     int pitch, int stride);
 #endif
 
 #if CONFIG_SUPERBLOCKS
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -39,7 +39,7 @@
   7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X,
 };
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 DECLARE_ALIGNED(16, const int, coef_bands_x_16x16[256]) = {
   0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X, 5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X,
   6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
@@ -105,7 +105,7 @@
   if ((xd->mode_info_context->mbmi.mode != B_PRED &&
       xd->mode_info_context->mbmi.mode != I8X8_PRED &&
       xd->mode_info_context->mbmi.mode != SPLITMV)
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || xd->mode_info_context->mbmi.txfm_size == TX_16X16
 #endif
       ) {
@@ -225,7 +225,7 @@
   }
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void static count_tokens_16x16(INT16 *qcoeff_ptr, int block, int type,
                                ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                int eob, int seg_eob, FRAME_CONTEXT *fc) {
@@ -302,7 +302,7 @@
     case TX_8X8:
       coef_probs = fc->coef_probs_8x8[type][0][0];
       break;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     case TX_16X16:
       coef_probs = fc->coef_probs_16x16[type][0][0];
       break;
@@ -398,7 +398,7 @@
   }
   else if (block_type == TX_8X8)
     count_tokens_8x8(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   else
     count_tokens_16x16(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
 #endif
@@ -405,7 +405,7 @@
   return c;
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 int vp8_decode_mb_tokens_16x16(VP8D_COMP *pbi, MACROBLOCKD *xd) {
   ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
   ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
@@ -417,6 +417,7 @@
   INT16 *qcoeff_ptr = &xd->qcoeff[0];
 
   type = PLANE_TYPE_Y_WITH_DC;
+
   if (seg_active)
       seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
   else
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -17,7 +17,7 @@
 void vp8_reset_mb_tokens_context(MACROBLOCKD *xd);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
 int vp8_decode_mb_tokens_8x8(VP8D_COMP *, MACROBLOCKD *);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 int vp8_decode_mb_tokens_16x16(VP8D_COMP *, MACROBLOCKD *);
 #endif
 
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -22,7 +22,7 @@
   pbi->mb.rtcd                     = &pbi->common.rtcd;
   pbi->dequant.block_2x2           = vp8_dequantize_b_2x2_c;
   pbi->dequant.idct_add_8x8        = vp8_dequant_idct_add_8x8_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   pbi->dequant.idct_add_16x16      = vp8_dequant_idct_add_16x16_c;
 #endif
   pbi->dequant.dc_idct_add_8x8     = vp8_dequant_dc_idct_add_8x8_c;
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -54,7 +54,7 @@
 
   vp8_prob const *coef_probs[BLOCK_TYPES];
   vp8_prob const *coef_probs_8x8[BLOCK_TYPES_8X8];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob const *coef_probs_16X16[BLOCK_TYPES_16X16];
 #endif
 
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -46,7 +46,7 @@
                                   [COEF_BANDS]
                                   [PREV_COEF_CONTEXTS]
                                   [ENTROPY_NODES] [2];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16]
                                     [COEF_BANDS]
                                     [PREV_COEF_CONTEXTS]
@@ -1422,7 +1422,7 @@
     }
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   //16x16
   for (i = 0; i < BLOCK_TYPES_16X16; ++i) {
     for (j = 0; j < COEF_BANDS; ++j) {
@@ -1794,7 +1794,7 @@
     }
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   // 16x16
   /* dry run to see if update is necessary */
   update[0] = update[1] = 0;
@@ -2304,7 +2304,7 @@
 
   vp8_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs);
   vp8_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16);
 #endif
   vp8_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
@@ -2440,7 +2440,7 @@
     fprintf(f, "  },\n");
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fprintf(f, "const vp8_prob\n"
           "vp8_coef_update_probs_16x16[BLOCK_TYPES_16X16]\n"
           "                           [COEF_BANDS]\n"
@@ -2474,7 +2474,7 @@
   f = fopen("treeupdate.bin", "wb");
   fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f);
   fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
 #endif
   fclose(f);
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -35,12 +35,12 @@
   unsigned char *quant_shift;
   short *zbin;
   short *zbin_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   short *zbin_16x16;
 #endif
   short *zrun_zbin_boost;
   short *zrun_zbin_boost_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   short *zrun_zbin_boost_16x16;
 #endif
   short *round;
@@ -55,7 +55,7 @@
 
   int eob_max_offset;
   int eob_max_offset_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   int eob_max_offset_16x16;
 #endif
 } BLOCK;
@@ -173,11 +173,11 @@
   void (*quantize_b)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
   void (*vp8_short_fdct8x8)(short *input, short *output, int pitch);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   void (*vp8_short_fdct16x16)(short *input, short *output, int pitch);
 #endif
   void (*short_fhaar2x2)(short *input, short *output, int pitch);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
 #endif
   void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -13,7 +13,7 @@
 #include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 
-#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM16X16
 
 #include "vp8/common/blockd.h"
 
@@ -72,6 +72,78 @@
 };
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM16X16 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
+float dct_16[256] = {
+  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
+  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
+  0.351851,  0.338330,  0.311806,  0.273300,  0.224292,  0.166664,  0.102631,  0.034654,
+ -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
+  0.346760,  0.293969,  0.196424,  0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
+ -0.346760, -0.293969, -0.196424, -0.068975,  0.068975,  0.196424,  0.293969,  0.346760,
+  0.338330,  0.224292,  0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
+  0.102631,  0.273300,  0.351851,  0.311806,  0.166664, -0.034654, -0.224292, -0.338330,
+  0.326641,  0.135299, -0.135299, -0.326641, -0.326641, -0.135299,  0.135299,  0.326641,
+  0.326641,  0.135299, -0.135299, -0.326641, -0.326641, -0.135299,  0.135299,  0.326641,
+  0.311806,  0.034654, -0.273300, -0.338330, -0.102631,  0.224292,  0.351851,  0.166664,
+ -0.166664, -0.351851, -0.224292,  0.102631,  0.338330,  0.273300, -0.034654, -0.311806,
+  0.293969, -0.068975, -0.346760, -0.196424,  0.196424,  0.346760,  0.068975, -0.293969,
+ -0.293969,  0.068975,  0.346760,  0.196424, -0.196424, -0.346760, -0.068975,  0.293969,
+  0.273300, -0.166664, -0.338330,  0.034654,  0.351851,  0.102631, -0.311806, -0.224292,
+  0.224292,  0.311806, -0.102631, -0.351851, -0.034654,  0.338330,  0.166664, -0.273300,
+  0.250000, -0.250000, -0.250000,  0.250000,  0.250000, -0.250000, -0.250000,  0.250000,
+  0.250000, -0.250000, -0.250000,  0.250000,  0.250000, -0.250000, -0.250000,  0.250000,
+  0.224292, -0.311806, -0.102631,  0.351851, -0.034654, -0.338330,  0.166664,  0.273300,
+ -0.273300, -0.166664,  0.338330,  0.034654, -0.351851,  0.102631,  0.311806, -0.224292,
+  0.196424, -0.346760,  0.068975,  0.293969, -0.293969, -0.068975,  0.346760, -0.196424,
+ -0.196424,  0.346760, -0.068975, -0.293969,  0.293969,  0.068975, -0.346760,  0.196424,
+  0.166664, -0.351851,  0.224292,  0.102631, -0.338330,  0.273300,  0.034654, -0.311806,
+  0.311806, -0.034654, -0.273300,  0.338330, -0.102631, -0.224292,  0.351851, -0.166664,
+  0.135299, -0.326641,  0.326641, -0.135299, -0.135299,  0.326641, -0.326641,  0.135299,
+  0.135299, -0.326641,  0.326641, -0.135299, -0.135299,  0.326641, -0.326641,  0.135299,
+  0.102631, -0.273300,  0.351851, -0.311806,  0.166664,  0.034654, -0.224292,  0.338330,
+ -0.338330,  0.224292, -0.034654, -0.166664,  0.311806, -0.351851,  0.273300, -0.102631,
+  0.068975, -0.196424,  0.293969, -0.346760,  0.346760, -0.293969,  0.196424, -0.068975,
+ -0.068975,  0.196424, -0.293969,  0.346760, -0.346760,  0.293969, -0.196424,  0.068975,
+  0.034654, -0.102631,  0.166664, -0.224292,  0.273300, -0.311806,  0.338330, -0.351851,
+  0.351851, -0.338330,  0.311806, -0.273300,  0.224292, -0.166664,  0.102631, -0.034654
+};
+
+float adst_16[256] = {
+  0.033094,  0.065889,  0.098087,  0.129396,  0.159534,  0.188227,  0.215215,  0.240255,
+  0.263118,  0.283599,  0.301511,  0.316693,  0.329007,  0.338341,  0.344612,  0.347761,
+  0.098087,  0.188227,  0.263118,  0.316693,  0.344612,  0.344612,  0.316693,  0.263118,
+  0.188227,  0.098087,  0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612,
+  0.159534,  0.283599,  0.344612,  0.329007,  0.240255,  0.098087, -0.065889, -0.215215,
+ -0.316693, -0.347761, -0.301511, -0.188227, -0.033094,  0.129396,  0.263118,  0.338341,
+  0.215215,  0.338341,  0.316693,  0.159534, -0.065889, -0.263118, -0.347761, -0.283599,
+ -0.098087,  0.129396,  0.301511,  0.344612,  0.240255,  0.033094, -0.188227, -0.329007,
+  0.263118,  0.344612,  0.188227, -0.098087, -0.316693, -0.316693, -0.098087,  0.188227,
+  0.344612,  0.263118,  0.000000, -0.263118, -0.344612, -0.188227,  0.098087,  0.316693,
+  0.301511,  0.301511,  0.000000, -0.301511, -0.301511, -0.000000,  0.301511,  0.301511,
+  0.000000, -0.301511, -0.301511, -0.000000,  0.301511,  0.301511,  0.000000, -0.301511,
+  0.329007,  0.215215, -0.188227, -0.338341, -0.033094,  0.316693,  0.240255, -0.159534,
+ -0.344612, -0.065889,  0.301511,  0.263118, -0.129396, -0.347761, -0.098087,  0.283599,
+  0.344612,  0.098087, -0.316693, -0.188227,  0.263118,  0.263118, -0.188227, -0.316693,
+  0.098087,  0.344612,  0.000000, -0.344612, -0.098087,  0.316693,  0.188227, -0.263118,
+  0.347761, -0.033094, -0.344612,  0.065889,  0.338341, -0.098087, -0.329007,  0.129396,
+  0.316693, -0.159534, -0.301511,  0.188227,  0.283599, -0.215215, -0.263118,  0.240255,
+  0.338341, -0.159534, -0.263118,  0.283599,  0.129396, -0.344612,  0.033094,  0.329007,
+ -0.188227, -0.240255,  0.301511,  0.098087, -0.347761,  0.065889,  0.316693, -0.215215,
+  0.316693, -0.263118, -0.098087,  0.344612, -0.188227, -0.188227,  0.344612, -0.098087,
+ -0.263118,  0.316693,  0.000000, -0.316693,  0.263118,  0.098087, -0.344612,  0.188227,
+  0.283599, -0.329007,  0.098087,  0.215215, -0.347761,  0.188227,  0.129396, -0.338341,
+  0.263118,  0.033094, -0.301511,  0.316693, -0.065889, -0.240255,  0.344612, -0.159534,
+  0.240255, -0.347761,  0.263118, -0.033094, -0.215215,  0.344612, -0.283599,  0.065889,
+  0.188227, -0.338341,  0.301511, -0.098087, -0.159534,  0.329007, -0.316693,  0.129396,
+  0.188227, -0.316693,  0.344612, -0.263118,  0.098087,  0.098087, -0.263118,  0.344612,
+ -0.316693,  0.188227,  0.000000, -0.188227,  0.316693, -0.344612,  0.263118, -0.098087,
+  0.129396, -0.240255,  0.316693, -0.347761,  0.329007, -0.263118,  0.159534, -0.033094,
+ -0.098087,  0.215215, -0.301511,  0.344612, -0.338341,  0.283599, -0.188227,  0.065889,
+  0.065889, -0.129396,  0.188227, -0.240255,  0.283599, -0.316693,  0.338341, -0.347761,
+  0.344612, -0.329007,  0.301511, -0.263118,  0.215215, -0.159534,  0.098087, -0.033094
+};
+#endif
+
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
 static const int xC3S5 = 13623;
@@ -327,11 +399,11 @@
 
 }
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_fht_c(short *input, short *output, int pitch,
                TX_TYPE tx_type, int tx_dim) {
   int i, j, k;
-  float bufa[64], bufb[64]; // buffers are for floating-point test purpose
+  float bufa[256], bufb[256]; // buffers are for floating-point test purpose
                              // the implementation could be simplified in
                              // conjunction with integer transform
   short *ip = input;
@@ -359,11 +431,13 @@
   switch(tx_type) {
     case ADST_ADST :
     case ADST_DCT  :
-      ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
+      ptv = (tx_dim == 4) ? &adst_4[0] :
+                            ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
       break;
 
     default :
-      ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
+      ptv = (tx_dim == 4) ? &dct_4[0] :
+                            ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
       break;
   }
 
@@ -387,11 +461,13 @@
   switch(tx_type) {
     case ADST_ADST :
     case  DCT_ADST :
-      pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
+      pth = (tx_dim == 4) ? &adst_4[0] :
+                            ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
       break;
 
     default :
-      pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
+      pth = (tx_dim == 4) ? &dct_4[0] :
+                            ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
       break;
   }
 
@@ -410,11 +486,13 @@
     switch(tx_type) {
       case ADST_ADST :
       case  DCT_ADST :
-        pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
+        pth = (tx_dim == 4) ? &adst_4[0] :
+                              ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
         break;
 
       default :
-        pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
+        pth = (tx_dim == 4) ? &dct_4[0] :
+                              ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
         break;
     }
   }
@@ -608,7 +686,7 @@
 }
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static const double C1 = 0.995184726672197;
 static const double C2 = 0.98078528040323;
 static const double C3 = 0.956940335732209;
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -26,12 +26,12 @@
 #endif
 
 
-#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_fht_c(short *input, short *output, int pitch,
                TX_TYPE tx_type, int tx_dim);
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_fdct_short16x16
 #define vp8_fdct_short16x16 vp8_short_fdct16x16_c
 #endif
@@ -81,7 +81,7 @@
 
 typedef prototype_fdct(*vp8_fdct_fn_t);
 typedef struct {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_fdct_fn_t    short16x16;
 #endif
   vp8_fdct_fn_t    short8x8;
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1361,7 +1361,7 @@
   vp8_zero(cpi->MVcount_hp);
   vp8_zero(cpi->coef_counts);
   vp8_zero(cpi->coef_counts_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_zero(cpi->coef_counts_16x16);
 #endif
 
@@ -1825,7 +1825,7 @@
   }
 
   /* test code: set transform size based on mode selection */
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (mbmi->mode <= TM_PRED) {
     mbmi->txfm_size = TX_16X16;
     cpi->t16x16_count++;
@@ -1922,7 +1922,7 @@
   set_pred_flag(xd, PRED_REF, ref_pred_flag);
 
   /* test code: set transform size based on mode selection */
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (mbmi->mode <= TM_PRED || mbmi->mode == NEWMV || mbmi->mode == ZEROMV ||
       mbmi->mode == NEARMV ||  mbmi->mode == NEARESTMV) {
     mbmi->txfm_size = TX_16X16;
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -127,6 +127,9 @@
   BLOCK *b = &x->block[0];
 
   int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  TX_TYPE txfm_type = x->e_mbd.mode_info_context->bmi[0].as_mode.tx_type;
+#endif
 
 #if CONFIG_COMP_INTRA_PRED
   if (x->e_mbd.mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1))
@@ -139,9 +142,22 @@
 
   ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
+#if CONFIG_HYBRIDTRANSFORM16X16
+  {
+    if ((x->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) &&
+        (x->q_index < ACTIVE_HT16)) {
+      BLOCKD  *bd = &x->e_mbd.block[0];
+      txfm_map(bd, pred_mode_conv(x->e_mbd.mode_info_context->mbmi.mode));
+      txfm_type = bd->bmi.as_mode.tx_type;
+      vp8_fht_c(b->src_diff, b->coeff, 32, txfm_type, 16);
+    } else
+      vp8_transform_intra_mby_16x16(x);
+  }
+#else
     vp8_transform_intra_mby_16x16(x);
+#endif
   else
 #endif
   if (tx_type == TX_8X8)
@@ -149,7 +165,7 @@
   else
     vp8_transform_intra_mby(x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_quantize_mby_16x16(x);
   else
@@ -160,7 +176,7 @@
     vp8_quantize_mby(x);
 
   if (x->optimize) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16)
       vp8_optimize_mby_16x16(x, rtcd);
     else
@@ -171,9 +187,20 @@
       vp8_optimize_mby(x, rtcd);
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
+#if CONFIG_HYBRIDTRANSFORM16X16
+  {
+    if ((x->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) &&
+        (x->q_index < ACTIVE_HT16)) {
+      BLOCKD *bd = &x->e_mbd.block[0];
+      vp8_ihtllm_c(bd->dqcoeff, bd->diff, 32, txfm_type, 16);
+    } else
+      vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+  }
+#else
     vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+#endif
   else
 #endif
   if (tx_type == TX_8X8)
@@ -219,7 +246,7 @@
 
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
   int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16) tx_type = TX_8X8; // 16x16 for U and V should default to 8x8 behavior.
 #endif
 #if CONFIG_COMP_INTRA_PRED
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -300,7 +300,7 @@
   }
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_transform_mbuv_16x16(MACROBLOCK *x) {
   int i;
 
@@ -880,7 +880,7 @@
 
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void optimize_b_16x16(MACROBLOCK *mb, int i, int type,
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                       const VP8_ENCODER_RTCD *rtcd) {
@@ -1105,7 +1105,7 @@
 
   vp8_subtract_mb(rtcd, x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_transform_mb_16x16(x);
   else
@@ -1115,7 +1115,7 @@
   else
     transform_mb(x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_quantize_mb_16x16(x);
   else
@@ -1126,7 +1126,7 @@
     vp8_quantize_mb(x);
 
   if (x->optimize) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     if (tx_type == TX_16X16)
       optimize_mb_16x16(x, rtcd);
     else
@@ -1137,7 +1137,7 @@
       optimize_mb(x, rtcd);
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
   else
@@ -1214,7 +1214,7 @@
 
   ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_transform_mby_16x16(x);
   else
@@ -1226,7 +1226,7 @@
 
   vp8_quantize_mby(x);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16)
     vp8_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
   else
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -121,7 +121,7 @@
 void vp8_optimize_mby_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_transform_mb_16x16(MACROBLOCK *mb);
 void vp8_transform_mby_16x16(MACROBLOCK *x);
 void vp8_transform_mbuv_16x16(MACROBLOCK *x);
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -99,7 +99,7 @@
   cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
 
   cpi->rtcd.fdct.short8x8                  = vp8_short_fdct8x8_c;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   cpi->rtcd.fdct.short16x16                = vp8_short_fdct16x16_c;
 #endif
   cpi->rtcd.fdct.haar_short2x2             = vp8_short_fhaar2x2_c;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1196,7 +1196,7 @@
   }
 
   if (cpi->sf.improved_dct) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->mb.vp8_short_fdct16x16 = FDCT_INVOKE(&cpi->rtcd.fdct, short16x16);
 #endif
     cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8);
@@ -1203,7 +1203,7 @@
     cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
     cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
   } else {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->mb.vp8_short_fdct16x16 = FDCT_INVOKE(&cpi->rtcd.fdct, short16x16);
 #endif
     cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8);
@@ -1218,7 +1218,7 @@
   cpi->mb.quantize_b      = vp8_regular_quantize_b;
   cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;
   cpi->mb.quantize_b_8x8  = vp8_regular_quantize_b_8x8;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   cpi->mb.quantize_b_16x16= vp8_regular_quantize_b_16x16;
 #endif
   cpi->mb.quantize_b_2x2  = vp8_regular_quantize_b_2x2;
@@ -3684,7 +3684,7 @@
   update_reference_frames(cm);
   vp8_copy(cpi->common.fc.coef_counts, cpi->coef_counts);
   vp8_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
 #endif
   vp8_adapt_coef_probs(&cpi->common);
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -99,7 +99,7 @@
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
   vp8_prob coef_probs_8x8[BLOCK_TYPES_8X8]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob coef_probs_16x16[BLOCK_TYPES_16X16]
       [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
 #endif
@@ -399,7 +399,7 @@
   DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]);
   DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]);
   DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]);
   DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]);
@@ -560,7 +560,7 @@
   unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   vp8_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
   unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2];
@@ -623,7 +623,7 @@
   int skip_false_count[3];
   int t4x4_count;
   int t8x8_count;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   int t16x16_count;
 #endif
 
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -311,7 +311,7 @@
 
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_quantize_mby_16x16(MACROBLOCK *x) {
   int i;
   for (i = 0; i < 16; i++)
@@ -428,7 +428,7 @@
                                           48, 48, 48, 48, 48, 48, 48, 48,
                                           48, 48, 48, 48, 48, 48, 48, 48
                                         };
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   static const int zbin_boost_16x16[256] = {
      0,  0,  0,  8,  8,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
     30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
@@ -469,7 +469,7 @@
                  cpi->Y1quant_shift[Q] + 0, quant_val);
     cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
     cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
 #endif
     cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
@@ -477,7 +477,7 @@
     cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
     cpi->zrun_zbin_boost_y1_8x8[Q][0] =
       ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
 #endif
 
@@ -487,7 +487,7 @@
                  cpi->Y2quant_shift[Q] + 0, quant_val);
     cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
     cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
 #endif
     cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7;
@@ -495,7 +495,7 @@
     cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
     cpi->zrun_zbin_boost_y2_8x8[Q][0] =
       ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
 #endif
 
@@ -504,7 +504,7 @@
                  cpi->UVquant_shift[Q] + 0, quant_val);
     cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
     cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
 #endif
     cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
@@ -512,7 +512,7 @@
     cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
     cpi->zrun_zbin_boost_uv_8x8[Q][0] =
       ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
 #endif
 
@@ -570,7 +570,7 @@
         ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
     }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     // 16x16 structures. Same comment above applies.
     for (i = 1; i < 256; i++) {
       int rc = vp8_default_zig_zag1d_16x16[i];
@@ -626,7 +626,7 @@
     x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
     x->block[i].zbin = cpi->Y1zbin[QIndex];
     x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex];
 #endif
     x->block[i].round = cpi->Y1round[QIndex];
@@ -633,7 +633,7 @@
     x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
     x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
     x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex];
 #endif
     x->block[i].zbin_extra = (short)zbin_extra;
@@ -644,7 +644,7 @@
         get_segdata(xd, segment_id, SEG_LVL_EOB);
       x->block[i].eob_max_offset_8x8 =
         get_segdata(xd, segment_id, SEG_LVL_EOB);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       x->block[i].eob_max_offset_16x16 =
         get_segdata(xd, segment_id, SEG_LVL_EOB);
 #endif
@@ -651,7 +651,7 @@
     } else {
       x->block[i].eob_max_offset = 16;
       x->block[i].eob_max_offset_8x8 = 64;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       x->block[i].eob_max_offset_16x16 = 256;
 #endif
     }
@@ -668,7 +668,7 @@
     x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
     x->block[i].zbin = cpi->UVzbin[QIndex];
     x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex];
 #endif
     x->block[i].round = cpi->UVround[QIndex];
@@ -675,7 +675,7 @@
     x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
     x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
     x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex];
 #endif
 
@@ -703,7 +703,7 @@
   x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
   x->block[24].zbin = cpi->Y2zbin[QIndex];
   x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex];
 #endif
   x->block[24].round = cpi->Y2round[QIndex];
@@ -710,7 +710,7 @@
   x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
   x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
   x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex];
 #endif
   x->block[24].zbin_extra = (short)zbin_extra;
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -46,7 +46,7 @@
 #endif
 extern prototype_quantize_block(vp8_quantize_quantb_8x8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 #ifndef vp8_quantize_quantb_16x16
 #define vp8_quantize_quantb_16x16 vp8_regular_quantize_b_16x16
 #endif
@@ -77,7 +77,7 @@
 extern prototype_quantize_mb(vp8_quantize_mby_8x8);
 extern prototype_quantize_mb(vp8_quantize_mbuv_8x8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_quantize_mb_16x16(MACROBLOCK *x);
 extern prototype_quantize_block(vp8_quantize_quantb_16x16);
 extern prototype_quantize_mb(vp8_quantize_mby_16x16);
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -175,7 +175,7 @@
 #if CONFIG_SWITCHABLE_INTERP
   vp8_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
 #endif
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
 #endif
 }
@@ -232,7 +232,7 @@
 #if CONFIG_SWITCHABLE_INTERP
   vp8_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
 #endif
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
 #endif
 }
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -365,7 +365,7 @@
     (const vp8_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8,
     BLOCK_TYPES_8X8);
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fill_token_costs(
     cpi->mb.token_costs[TX_16X16],
     (const vp8_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16,
@@ -615,7 +615,7 @@
       band = vp8_coef_bands_8x8;
       default_eob = 64;
       break;
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     case TX_16X16:
       scan = vp8_default_zig_zag1d_16x16;
       band = vp8_coef_bands_16x16;
@@ -787,7 +787,7 @@
   *Rate = vp8_rdcost_mby_8x8(mb, 1);
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static int vp8_rdcost_mby_16x16(MACROBLOCK *mb) {
   int cost;
   MACROBLOCKD *xd = &mb->e_mbd;
@@ -813,8 +813,28 @@
     mb->e_mbd.predictor,
     mb->block[0].src_stride);
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  if ((mb->e_mbd.mode_info_context->mbmi.mode_rdopt < I8X8_PRED) &&
+      (mb->q_index < ACTIVE_HT16)) {
+    BLOCKD *b  = &mb->e_mbd.block[0];
+    BLOCK  *be = &mb->block[0];
+    txfm_map(b, pred_mode_conv(mb->e_mbd.mode_info_context->mbmi.mode_rdopt));
+    vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 16);
+  } else
+    vp8_transform_mby_16x16(mb);
+#else
   vp8_transform_mby_16x16(mb);
+#endif
+
   vp8_quantize_mby_16x16(mb);
+#if CONFIG_HYBRIDTRANSFORM16X16
+  // TODO(jingning) is it possible to quickly determine whether to force
+  //                trailing coefficients to be zero, instead of running trellis
+  //                optimization in the rate-distortion optimization loop?
+  if (mb->e_mbd.mode_info_context->mbmi.mode_rdopt < I8X8_PRED)
+    vp8_optimize_mby_16x16(mb, rtcd);
+#endif
+
   d = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(mb, 0);
 
   *Distortion = (d >> 2);
@@ -1193,9 +1213,17 @@
   int UNINITIALIZED_IS_SAFE(skip);
   MACROBLOCKD *xd = &x->e_mbd;
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int best_txtype, rd_txtype;
+#endif
+
   // Y Search for 16x16 intra prediction mode
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
     mbmi->mode = mode;
+#if CONFIG_HYBRIDTRANSFORM16X16
+    mbmi->mode_rdopt = mode;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
     for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
       mbmi->second_mode = mode2;
@@ -1211,7 +1239,7 @@
       }
 #endif
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       macro_block_yrd_16x16(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd));
 #else
       macro_block_yrd_8x8(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd));
@@ -1222,6 +1250,10 @@
 
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+      rd_txtype = x->e_mbd.block[0].bmi.as_mode.tx_type;
+#endif
+
       if (this_rd < best_rd) {
 #if CONFIG_TX16X16
         skip = mby_is_skippable_16x16(xd);
@@ -1236,6 +1268,9 @@
         *Rate = rate;
         *rate_y = ratey;
         *Distortion = distortion;
+#if CONFIG_HYBRIDTRANSFORM16X16
+        best_txtype = rd_txtype;
+#endif
       }
 #if CONFIG_COMP_INTRA_PRED
     }
@@ -1244,6 +1279,10 @@
 
   *skippable = skip;
   mbmi->mode = mode_selected;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  x->e_mbd.block[0].bmi.as_mode.tx_type = best_txtype;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
   mbmi->second_mode = mode2_selected;
 #endif
@@ -2871,7 +2910,7 @@
                             int *rate2, int *distortion2, int *rate_y,
                             int *distortion, int* rate_uv, int *distortion_uv) {
   // Y cost and distortion
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (this_mode == ZEROMV ||
       this_mode == NEARESTMV ||
       this_mode == NEARMV ||
@@ -2883,7 +2922,7 @@
       macro_block_yrd_8x8(x, rate_y, distortion, IF_RTCD(&cpi->rtcd));
     else
       macro_block_yrd(x, rate_y, distortion, IF_RTCD(&cpi->rtcd));
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   }
 #endif
 
@@ -2892,7 +2931,7 @@
 
   // UV cost and distortion
   if (cpi->common.txfm_mode == ALLOW_8X8
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || this_mode == ZEROMV ||
       this_mode == NEARESTMV ||
       this_mode == NEARMV ||
@@ -3020,6 +3059,10 @@
   unsigned int ref_costs[MAX_REF_FRAMES];
   int_mv seg_mvs[BLOCK_MAX_SEGMENTS - 1][16 /* n_blocks */][MAX_REF_FRAMES - 1];
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int best_txtype, rd_txtype;
+#endif
+
   vpx_memset(mode8x8, 0, sizeof(mode8x8));
   vpx_memset(&frame_mv, 0, sizeof(frame_mv));
   vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3245,10 +3288,14 @@
           // FIXME compound intra prediction
           RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
               (&x->e_mbd);
-#if CONFIG_TX16X16
+
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
           // FIXME: breaks lossless since 4x4 isn't allowed
           macro_block_yrd_16x16(x, &rate_y, &distortion,
                                 IF_RTCD(&cpi->rtcd));
+#if CONFIG_HYBRIDTRANSFORM16X16
+          rd_txtype = x->e_mbd.block[0].bmi.as_mode.tx_type;
+#endif
           rate2 += rate_y;
           distortion2 += distortion;
           rate2 += x->mbmode_cost[x->e_mbd.frame_type][mbmi->mode];
@@ -3509,6 +3556,7 @@
           if (flag)
             continue;
         case ZEROMV:
+
         default:
           break;
       }
@@ -3626,6 +3674,15 @@
                       && this_mode != B_PRED
                       && this_mode != I8X8_PRED);
 
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
+        if (this_mode <= TM_PRED ||
+            this_mode == NEWMV ||
+            this_mode == ZEROMV ||
+            this_mode == NEARESTMV ||
+            this_mode == NEARMV)
+          mb_skippable = mb_is_skippable_16x16(&x->e_mbd);
+        else
+#endif
         if ((cpi->common.txfm_mode == ALLOW_8X8) && has_y2) {
           if (mbmi->ref_frame != INTRA_FRAME) {
 #if CONFIG_TX16X16
@@ -3718,6 +3775,10 @@
           // Note index of best mode so far
           best_mode_index = mode_index;
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+          best_txtype = rd_txtype;
+#endif
+
           if (this_mode <= B_PRED) {
             if (cpi->common.txfm_mode == ALLOW_8X8
                 && this_mode != B_PRED
@@ -3869,6 +3930,11 @@
     }
   }
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  if (best_mbmode.mode < I8X8_PRED)
+    xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
+#endif
+
   if (best_mbmode.mode == I8X8_PRED)
     set_i8x8_block_modes(x, mode8x8);
 
@@ -3956,6 +4022,10 @@
   int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8;
   int y_intra16x16_skippable;
 
+#if CONFIG_HYBRIDTRANSFORM16X16
+  int best_txtype;
+#endif
+
   mbmi->ref_frame = INTRA_FRAME;
   rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv,
                           &uv_intra_skippable);
@@ -3981,8 +4051,12 @@
                                           &rate16x16_tokenonly, &dist16x16,
                                           &y_intra16x16_skippable);
   mode16x16 = mbmi->mode;
+#if CONFIG_HYBRIDTRANSFORM16X16
+  best_txtype = xd->block[0].bmi.as_mode.tx_type;
+  xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
+#endif
 
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM || CONFIG_HYBRIDTRANSFORM8X8
   mbmi->mode_rdopt = I8X8_PRED;
 #endif
 
@@ -4041,6 +4115,10 @@
       mbmi->mode = mode16x16;
       rate = rate16x16 + rateuv8x8;
       dist = dist16x16 + (distuv8x8 >> 2);
+#if CONFIG_HYBRIDTRANSFORM16X16
+      // save this into supermacroblock coding decision buffer
+      xd->mode_info_context->bmi[0].as_mode.tx_type = best_txtype;
+#endif
     }
     if (cpi->common.mb_no_coeff_skip)
       rate += vp8_cost_bit(get_pred_prob(cm, xd, PRED_MBSKIP), 0);
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -26,7 +26,7 @@
 #ifdef ENTROPY_STATS
 INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 #endif
 extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS]
@@ -33,7 +33,7 @@
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES][2];
 extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];
 #endif
@@ -42,7 +42,7 @@
                   MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);
 void vp8_stuff_mb_8x8(VP8_COMP *cpi,
                       MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 void vp8_stuff_mb_16x16(VP8_COMP *cpi, MACROBLOCKD *xd,
                         TOKENEXTRA **t, int dry_run);
 #endif
@@ -112,7 +112,7 @@
   vp8_dct_value_cost_ptr   = dct_value_cost + DCT_MAX_VALUE;
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static void tokenize1st_order_b_16x16(MACROBLOCKD *xd,
                                       const BLOCKD *const b,
                                       TOKENEXTRA **tp,
@@ -764,7 +764,7 @@
   return (mby_is_skippable_8x8(xd) & mbuv_is_skippable_8x8(xd));
 }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 int mby_is_skippable_16x16(MACROBLOCKD *xd) {
   int skip = 1;
   //skip &= (xd->block[0].eob < 2); // I think this should be commented? No second order == DC must be coded
@@ -811,12 +811,12 @@
   has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED
                   && xd->mode_info_context->mbmi.mode != I8X8_PRED
                   && xd->mode_info_context->mbmi.mode != SPLITMV);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16) has_y2_block = 0; // Because of inter frames
 #endif
 
   switch (tx_type) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     case TX_16X16:
       xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd);
       break;
@@ -833,7 +833,7 @@
     if (!dry_run)
       cpi->skip_true_count[mb_skip_context] += skip_inc;
     if (!cpi->common.mb_no_coeff_skip) {
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       if (tx_type == TX_16X16)
         vp8_stuff_mb_16x16(cpi, xd, t, dry_run);
       else
@@ -869,12 +869,14 @@
     plane_type = 0;
   }
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   if (tx_type == TX_16X16) {
     ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context;
     ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context;
+
     tokenize1st_order_b_16x16(xd, xd->block, t, 3,
                               xd->frame_type, A, L, cpi, dry_run);
+
     for (b = 1; b < 16; b++) {
       *(A + vp8_block2above[b]) = *(A);
       *(L + vp8_block2left[b] ) = *(L);
@@ -955,13 +957,13 @@
   if (!f) {
     vpx_memset(context_counters, 0, sizeof(context_counters));
     vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8));
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16));
 #endif
   } else {
     fread(context_counters, sizeof(context_counters), 1, f);
     fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
 #endif
     fclose(f);
@@ -971,13 +973,13 @@
   if (!f) {
     vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist));
     vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8));
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16));
 #endif
   } else {
     fread(tree_update_hist, sizeof(tree_update_hist), 1, f);
     fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
     fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
 #endif
     fclose(f);
@@ -1055,7 +1057,7 @@
   } while (++type < BLOCK_TYPES_8X8);
   fprintf(f, "\n};\n");
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fprintf(f, "static const unsigned int\nvp8_default_coef_counts_16x16"
           "[BLOCK_TYPES_16X16] [COEF_BANDS]"
           "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {");
@@ -1158,7 +1160,7 @@
   } while (++type < BLOCK_TYPES_8X8);
   fprintf(f, "\n};\n");
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fprintf(f, "static const vp8_prob\n"
           "vp8_default_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS]\n"
           "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {");
@@ -1198,7 +1200,7 @@
   f = fopen("context.bin", "wb");
   fwrite(context_counters, sizeof(context_counters), 1, f);
   fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
 #endif
   fclose(f);
@@ -1340,7 +1342,7 @@
 }
 
 
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 static __inline
 void stuff1st_order_b_16x16(const BLOCKD *const b,
                             TOKENEXTRA **tp,
@@ -1496,7 +1498,7 @@
   if ((xd->mode_info_context->mbmi.mode != B_PRED
       && xd->mode_info_context->mbmi.mode != I8X8_PRED
       && xd->mode_info_context->mbmi.mode != SPLITMV)
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
       || xd->mode_info_context->mbmi.txfm_size == TX_16X16
 #endif
       ) {
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -48,7 +48,7 @@
                              [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
 extern INT64 context_counters_8x8[BLOCK_TYPES_8X8][COEF_BANDS]
                                  [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
-#if CONFIG_TX16X16
+#if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
 extern INT64 context_counters_16x16[BLOCK_TYPES_16X16][COEF_BANDS]
                                    [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
 #endif