shithub: libvpx

Download patch

ref: 9824230fe3ee33fa2deac0745521e625b4c38be9
parent: 6fd0929fb53ca5dee83ff99f34d987a88eefa1ea
author: Jingning Han <jingning@google.com>
date: Mon Jun 25 08:26:09 EDT 2012

Adds hybrid transform

Adds ADST/DCT hybrid transform coding for Intra4x4 mode.
The ADST is applied to directions in which the boundary
pixels are used for prediction, while DCT applied to
directions without corresponding boundary prediction.

Adds enum TX_TYPE in b_mode_infor to indicate the transform
type used.

Make coding style consistent with google style.
Fixed the commented issues.

Experimental results in terms of bit-rate reduction:
derf:   0.731%
yt:     0.982%
std-hd: 0.459%
hd:     0.725%

Will be looking at 8x8 transforms next.

Change-Id: I46dbd7b80dbb3e8856e9c34fbc58cb3764a12fcf

--- a/configure
+++ b/configure
@@ -228,6 +228,7 @@
     adaptive_entropy
     pred_filter
     lossless
+    hybridtransform
 "
 CONFIG_LIST="
     external_build
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -124,6 +124,15 @@
 
 } TX_SIZE;
 
+#if CONFIG_HYBRIDTRANSFORM
+typedef enum {
+  DCT_DCT   = 0,                      // DCT  in both horizontal and vertical
+  ADST_DCT  = 1,                      // ADST in horizontal, DCT in vertical
+  DCT_ADST  = 2,                      // DCT  in horizontal, ADST in vertical
+  ADST_ADST = 3                       // ADST in both directions
+} TX_TYPE;
+#endif
+
 #define VP8_YMODES  (B_PRED + 1)
 #define VP8_UV_MODES (TM_PRED + 1)
 #define VP8_I8X8_MODES (TM_PRED + 1)
@@ -130,6 +139,10 @@
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
+#if CONFIG_HYBRIDTRANSFORM
+#define ACTIVE_HT 110                // quantization stepsize threshold
+#endif
+
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -163,6 +176,11 @@
 union b_mode_info {
   struct {
     B_PREDICTION_MODE first;
+#if CONFIG_HYBRIDTRANSFORM
+    B_PREDICTION_MODE test;
+    TX_TYPE           tx_type;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
     B_PREDICTION_MODE second;
 #endif
@@ -183,6 +201,10 @@
 
 typedef struct {
   MB_PREDICTION_MODE mode, uv_mode;
+#if CONFIG_HYBRIDTRANSFORM
+  MB_PREDICTION_MODE mode_rdopt;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
   MB_PREDICTION_MODE second_mode, second_uv_mode;
 #endif
@@ -344,6 +366,10 @@
 #endif
 
   int mb_index;   // Index of the MB in the SB (0..3)
+
+#if CONFIG_HYBRIDTRANSFORM
+  int q_index;
+#endif
 
 } MACROBLOCKD;
 
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -65,6 +65,24 @@
   9, 12, 13, 10,
   7, 11, 14, 15,
 };
+
+
+#if CONFIG_HYBRIDTRANSFORM
+DECLARE_ALIGNED(16, const int, vp8_col_scan[16]) = {
+  0, 4,  8, 12,
+  1, 5,  9, 13,
+  2, 6, 10, 14,
+  3, 7, 11, 15
+};
+DECLARE_ALIGNED(16, const int, vp8_row_scan[16]) = {
+  0,   1,  2,  3,
+  4,   5,  6,  7,
+  8,   9, 10, 11,
+  12, 13, 14, 15
+};
+#endif
+
+
 DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5,
                                                         5, 3, 6, 3, 5, 4, 6, 6,
                                                         6, 5, 5, 6, 6, 6, 6, 6,
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -107,6 +107,12 @@
 struct VP8Common;
 void vp8_default_coef_probs(struct VP8Common *);
 extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
+
+#if CONFIG_HYBRIDTRANSFORM
+extern DECLARE_ALIGNED(16, const int, vp8_col_scan[16]);
+extern DECLARE_ALIGNED(16, const int, vp8_row_scan[16]);
+#endif
+
 extern short vp8_default_zig_zag_mask[16];
 extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]);
 extern short vp8_default_zig_zag_mask_8x8[64];// int64_t
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -97,6 +97,12 @@
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM
+#include "vp8/common/blockd.h"
+void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+#endif
+
+
 typedef prototype_idct((*vp8_idct_fn_t));
 typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
 typedef prototype_second_order((*vp8_second_order_fn_t));
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -25,6 +25,9 @@
 #include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 
+#if CONFIG_HYBRIDTRANSFORM
+#include "vp8/common/blockd.h"
+#endif
 
 #include <math.h>
 
@@ -31,6 +34,130 @@
 static const int cospi8sqrt2minus1 = 20091;
 static const int sinpi8sqrt2      = 35468;
 static const int rounding = 0;
+
+#if CONFIG_HYBRIDTRANSFORM
+float idct_4[16] = {
+  0.500000000000000,   0.653281482438188,   0.500000000000000,   0.270598050073099,
+  0.500000000000000,   0.270598050073099,  -0.500000000000000,  -0.653281482438188,
+  0.500000000000000,  -0.270598050073099,  -0.500000000000000,   0.653281482438188,
+  0.500000000000000,  -0.653281482438188,   0.500000000000000,  -0.270598050073099
+};
+
+float iadst_4[16] = {
+  0.228013428883779,   0.577350269189626,   0.656538502008139,   0.428525073124360,
+  0.428525073124360,   0.577350269189626,  -0.228013428883779,  -0.656538502008139,
+  0.577350269189626,                   0,  -0.577350269189626,   0.577350269189626,
+  0.656538502008139,  -0.577350269189626,   0.428525073124359,  -0.228013428883779
+};
+#endif
+
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+  int i, j, k;
+  float bufa[16], bufb[16]; // buffers are for floating-point test purpose
+                            // the implementation could be simplified in conjunction with integer transform
+  short *ip = input;
+  short *op = output;
+  int shortpitch = pitch >> 1;
+
+  float *pfa = &bufa[0];
+  float *pfb = &bufb[0];
+
+  // pointers to vertical and horizontal transforms
+  float *ptv, *pth;
+
+  // load and convert residual array into floating-point
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = (float)ip[i];
+    }
+    pfa += 4;
+    ip  += 4;
+  }
+
+  // vertical transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case ADST_DCT  :
+      ptv = &iadst_4[0];
+      break;
+
+    default :
+      ptv = &idct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfb[i] = 0 ;
+      for(k = 0; k < 4; k++) {
+        pfb[i] += ptv[k] * pfa[(k<<2)];
+      }
+      pfa += 1;
+    }
+
+    pfb += 4;
+    ptv += 4;
+    pfa = &bufa[0];
+  }
+
+  // horizontal transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case  DCT_ADST :
+      pth = &iadst_4[0];
+      break;
+
+    default :
+      pth = &idct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = 0;
+      for(k = 0; k < 4; k++) {
+        pfa[i] += pfb[k] * pth[k];
+      }
+      pth += 4;
+     }
+
+    pfa += 4;
+    pfb += 4;
+
+    switch(tx_type) {
+      case ADST_ADST :
+      case  DCT_ADST :
+        pth = &iadst_4[0];
+        break;
+
+      default :
+        pth = &idct_4[0];
+        break;
+    }
+  }
+
+  // convert to short integer format and load BLOCKD buffer
+  op  = output;
+  pfa = &bufa[0];
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
+                             -(short)( - pfa[i] / 8 + 0.49);
+    }
+    op  += shortpitch;
+    pfa += 4;
+  }
+}
+#endif
+
 
 void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) {
   int i;
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -31,6 +31,11 @@
 
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
+  vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type);
+}
+#endif
 
 void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
   if (b->eob <= 1)
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -15,6 +15,11 @@
 #include "vpx_ports/config.h"
 #include "idct.h"
 #include "blockd.h"
+
+#if CONFIG_HYBRIDTRANSFORM
+extern void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
+#endif
+
 extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch);
 extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -128,6 +128,11 @@
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
+#if CONFIG_HYBRIDTRANSFORM
+  xd->q_index = QIndex;
+#endif
+
+
 #if CONFIG_LOSSLESS
   if (!QIndex) {
     pbi->common.rtcd.idct.idct1        = vp8_short_inv_walsh4x4_1_x8_c;
@@ -208,6 +213,11 @@
   int i;
   int tx_type;
 
+#if CONFIG_HYBRIDTRANSFORM
+  int QIndex = xd->q_index;
+  int active_ht = (QIndex < ACTIVE_HT);
+#endif
+
   if (pbi->common.frame_type == KEY_FRAME) {
     if (pbi->common.txfm_mode == ALLOW_8X8 &&
         xd->mode_info_context->mbmi.mode != I8X8_PRED &&
@@ -281,6 +291,39 @@
   if (xd->segmentation_enabled)
     mb_init_dequantizer(pbi, xd);
 
+#if CONFIG_HYBRIDTRANSFORM
+  // parse transform types for intra 4x4 mode
+  if (mode == B_PRED) {
+    for (i = 0; i < 16; i++) {
+      BLOCKD *b = &xd->block[i];
+      int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
+      if(active_ht) {
+        switch(b_mode) {
+          case B_TM_PRED :
+          case B_RD_PRED :
+            b->bmi.as_mode.tx_type = ADST_ADST;
+            break;
+
+          case B_VE_PRED :
+          case B_VR_PRED :
+            b->bmi.as_mode.tx_type = ADST_DCT;
+            break ;
+
+          case B_HE_PRED :
+          case B_HD_PRED :
+          case B_HU_PRED :
+            b->bmi.as_mode.tx_type = DCT_ADST;
+            break;
+
+          default :
+            b->bmi.as_mode.tx_type = DCT_DCT;
+            break;
+        }
+      }
+    } // loop over 4x4 blocks
+  }
+#endif
+
   /* do prediction */
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
     if (mode != I8X8_PRED) {
@@ -360,16 +403,29 @@
       }
 #endif
 
-      if (xd->eobs[i] > 1) {
-        DEQUANT_INVOKE(&pbi->dequant, idct_add)
-        (b->qcoeff, b->dequant,  b->predictor,
-         *(b->base_dst) + b->dst, 16, b->dst_stride);
-      } else {
-        IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-        (b->qcoeff[0] * b->dequant[0], b->predictor,
-         *(b->base_dst) + b->dst, 16, b->dst_stride);
-        ((int *)b->qcoeff)[0] = 0;
+#if CONFIG_HYBRIDTRANSFORM
+      if(active_ht)
+        vp8_ht_dequant_idct_add_c( (TX_TYPE)b->bmi.as_mode.tx_type, b->qcoeff,
+                                   b->dequant, b->predictor,
+                                   *(b->base_dst) + b->dst, 16, b->dst_stride);
+      else
+        vp8_dequant_idct_add_c(b->qcoeff, b->dequant, b->predictor,
+                               *(b->base_dst) + b->dst, 16, b->dst_stride);
+#else
+      if (xd->eobs[i] > 1)
+      {
+          DEQUANT_INVOKE(&pbi->dequant, idct_add)
+              (b->qcoeff, b->dequant,  b->predictor,
+              *(b->base_dst) + b->dst, 16, b->dst_stride);
       }
+      else
+      {
+          IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+              (b->qcoeff[0] * b->dequant[0], b->predictor,
+              *(b->base_dst) + b->dst, 16, b->dst_stride);
+          ((int *)b->qcoeff)[0] = 0;
+      }
+#endif
     }
   } else if (mode == SPLITMV) {
     DEQUANT_INVOKE(&pbi->dequant, idct_add_y_block)
@@ -378,8 +434,6 @@
      xd->dst.y_stride, xd->eobs);
   } else {
     BLOCKD *b = &xd->block[24];
-
-
     if (tx_type == TX_8X8) {
       DEQUANT_INVOKE(&pbi->dequant, block_2x2)(b);
 #ifdef DEC_DEBUG
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -41,6 +41,44 @@
   }
 }
 
+
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,
+                               unsigned char *pred, unsigned char *dest,
+                               int pitch, int stride) {
+  short output[16];
+  short *diff_ptr = output;
+  int r, c;
+  int i;
+
+  for (i = 0; i < 16; i++) {
+    input[i] = dq[i] * input[i];
+  }
+
+  vp8_iht4x4llm_c( input, output, 4 << 1, tx_type );
+
+  vpx_memset(input, 0, 32);
+
+  for (r = 0; r < 4; r++) {
+      for (c = 0; c < 4; c++) {
+        int a = diff_ptr[c] + pred[c];
+
+        if (a < 0)
+            a = 0;
+
+        if (a > 255)
+            a = 255;
+
+        dest[c] = (unsigned char) a;
+    }
+
+      dest += stride;
+      diff_ptr += 4;
+      pred += pitch;
+  }
+}
+#endif
+
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
                             unsigned char *dest, int pitch, int stride) {
   short output[16];
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -76,6 +76,17 @@
 #endif
 extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 
+#if CONFIG_HYBRIDTRANSFORM
+// declare dequantization and inverse transform module of hybrid transform decoder
+#ifndef vp8_ht_dequant_idct_add
+#define vp8_ht_dequant_idct_add vp8_ht_dequant_idct_add_c
+#endif
+extern void vp8_ht_dequant_idct_add(TX_TYPE tx_type, short *input, short *dq,
+                                    unsigned char *pred, unsigned char *dest,
+                                    int pitch, int stride);
+
+#endif
+
 #ifndef vp8_dequant_dc_idct_add
 #define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
 #endif
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -119,6 +119,53 @@
   else return DCT_VAL_CATEGORY6;
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr,
+                                       int block, int type, ENTROPY_CONTEXT *a,
+                                       ENTROPY_CONTEXT *l, int eob, int seg_eob,
+                                       FRAME_CONTEXT *fc) {
+  int c, pt, token, band;
+  const int *scan;
+
+  int QIndex = xd->q_index;
+  int active_ht = (QIndex < ACTIVE_HT) &&
+                  (xd->mode_info_context->mbmi.mode == B_PRED);
+
+  if(active_ht) {
+    switch(xd->block[block].bmi.as_mode.tx_type) {
+      case ADST_DCT :
+        scan = vp8_row_scan;
+        break;
+
+      case DCT_ADST :
+        scan = vp8_col_scan;
+        break;
+
+      default :
+        scan = vp8_default_zig_zag1d;
+        break;
+    }
+  } else {
+    scan = vp8_default_zig_zag1d;
+  }
+
+  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+  for (c = !type; c < eob; ++c) {
+    int rc = scan[c];
+    int v = qcoeff_ptr[rc];
+    band = vp8_coef_bands[c];
+    token = get_token(v);
+    fc->coef_counts[type][band][pt][token]++;
+    pt = vp8_prev_token_class[token];
+  }
+
+  if (eob < seg_eob) {
+    band = vp8_coef_bands[c];
+    fc->coef_counts[type][band][pt][DCT_EOB_TOKEN]++;
+  }
+}
+#endif
+
 void static count_tokens(INT16 *qcoeff_ptr, int block, int type,
                          ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                          int eob, int seg_eob, FRAME_CONTEXT *const fc) {
@@ -289,8 +336,14 @@
     WRITE_COEF_CONTINUE(val);
   }
 #if CONFIG_ADAPTIVE_ENTROPY
+
   if (block_type == TX_4X4)
+#if CONFIG_HYBRIDTRANSFORM
+    count_tokens_adaptive_scan(xd, qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
+#else
     count_tokens(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
+#endif
+
   else
     count_tokens_8x8(qcoeff_ptr, i, type, a, l, c, seg_eob, fc);
 #endif
@@ -351,12 +404,21 @@
   return eobtotal;
 }
 
+
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *xd) {
   ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
   ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
 
   char *const eobs = xd->eobs;
+#if CONFIG_HYBRIDTRANSFORM
+  const int *scan = vp8_default_zig_zag1d;
+  int QIndex = xd->q_index;
+  int active_ht = (QIndex < ACTIVE_HT) &&
+                  (xd->mode_info_context->mbmi.mode == B_PRED);
+#else
   const int *const scan = vp8_default_zig_zag1d;
+#endif
+
   int c, i, type, eobtotal = 0, seg_eob = 16;
   INT16 *qcoeff_ptr = &xd->qcoeff[0];
 
@@ -388,6 +450,41 @@
     if (i == 16)
       type = PLANE_TYPE_UV;
 
+#if CONFIG_HYBRIDTRANSFORM
+    if (type == PLANE_TYPE_Y_WITH_DC &&
+        xd->mode_info_context->mbmi.mode == B_PRED &&
+        active_ht) {
+      BLOCKD *b = &xd->block[i];
+      switch(b->bmi.as_mode.first) {
+        case B_TM_PRED :
+        case B_RD_PRED :
+          b->bmi.as_mode.tx_type = ADST_ADST;
+          scan = vp8_default_zig_zag1d;
+          break;
+
+        case B_VE_PRED :
+        case B_VR_PRED :
+          b->bmi.as_mode.tx_type = ADST_DCT;
+          scan = vp8_row_scan;
+          break ;
+
+        case B_HE_PRED :
+        case B_HD_PRED :
+        case B_HU_PRED :
+          b->bmi.as_mode.tx_type = DCT_ADST;
+          scan = vp8_col_scan;
+          break;
+
+        default :
+          b->bmi.as_mode.tx_type = DCT_DCT;
+          scan = vp8_default_zig_zag1d;
+          break;
+      }
+    }
+    if (type == PLANE_TYPE_UV) {
+      scan = vp8_default_zig_zag1d;
+    }
+#endif
     c = vp8_decode_coefs(dx, xd, a, l, type, seg_eob, qcoeff_ptr,
                          i, scan, TX_4X4, coef_bands_x);
     a[0] = l[0] = ((eobs[i] = c) != !type);
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -13,6 +13,28 @@
 #include "vpx_ports/config.h"
 #include "vp8/common/idct.h"
 
+#if CONFIG_HYBRIDTRANSFORM
+
+#include "vp8/common/blockd.h"
+
+float dct_4[16] = {
+  0.500000000000000,  0.500000000000000,  0.500000000000000,  0.500000000000000,
+  0.653281482438188,  0.270598050073099, -0.270598050073099, -0.653281482438188,
+  0.500000000000000, -0.500000000000000, -0.500000000000000,  0.500000000000000,
+  0.270598050073099, -0.653281482438188,  0.653281482438188, -0.270598050073099
+};
+
+float adst_4[16] = {
+  0.228013428883779,  0.428525073124360,  0.577350269189626,  0.656538502008139,
+  0.577350269189626,  0.577350269189626,  0.000000000000000, -0.577350269189626,
+  0.656538502008139, -0.228013428883779, -0.577350269189626,  0.428525073124359,
+  0.428525073124360, -0.656538502008139,  0.577350269189626, -0.228013428883779
+};
+#endif
+
+
+#if CONFIG_INT_8X8FDCT
+
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
 static const int xC3S5 = 13623;
@@ -268,6 +290,112 @@
 
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+  int i, j, k;
+  float bufa[16], bufb[16]; // buffers are for floating-point test purpose
+                             // the implementation could be simplified in
+                             // conjunction with integer transform
+  short *ip = input;
+  short *op = output;
+
+  float *pfa = &bufa[0];
+  float *pfb = &bufb[0];
+
+  // pointers to vertical and horizontal transforms
+  float *ptv, *pth;
+
+  // load and convert residual array into floating-point
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = (float)ip[i];
+    }
+    pfa += 4;
+    ip  += pitch / 2;
+  }
+
+  // vertical transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case ADST_DCT  :
+      ptv = &adst_4[0];
+      break;
+
+    default :
+      ptv = &dct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfb[i] = 0;
+      for(k = 0; k < 4; k++) {
+        pfb[i] += ptv[k] * pfa[(k<<2)];
+      }
+      pfa += 1;
+    }
+    pfb += 4;
+    ptv += 4;
+    pfa = &bufa[0];
+  }
+
+  // horizontal transformation
+  pfa = &bufa[0];
+  pfb = &bufb[0];
+
+  switch(tx_type) {
+    case ADST_ADST :
+    case  DCT_ADST :
+      pth = &adst_4[0];
+      break;
+
+    default :
+      pth = &dct_4[0];
+      break;
+  }
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      pfa[i] = 0;
+      for(k = 0; k < 4; k++) {
+        pfa[i] += pfb[k] * pth[k];
+      }
+      pth += 4;
+     }
+
+    pfa += 4;
+    pfb += 4;
+
+    switch(tx_type) {
+      case ADST_ADST :
+      case  DCT_ADST :
+        pth = &adst_4[0];
+        break;
+
+      default :
+        pth = &dct_4[0];
+        break;
+    }
+  }
+
+  // convert to short integer format and load BLOCKD buffer
+  op  = output ;
+  pfa = &bufa[0] ;
+
+  for(j = 0; j < 4; j++) {
+    for(i = 0; i < 4; i++) {
+      op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
+                                   -(short)(- 8 * pfa[i] + 0.49);
+    }
+    op  += 4;
+    pfa += 4;
+  }
+}
+#endif
+
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch) {
   int i;
   int a1, b1, c1, d1;
@@ -309,9 +437,18 @@
   }
 }
 
-void vp8_short_fdct8x4_c(short *input, short *output, int pitch) {
-  vp8_short_fdct4x4_c(input,   output,    pitch);
-  vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_fht8x4_c(short *input, short *output, int pitch,
+                  TX_TYPE tx_type) {
+  vp8_fht4x4_c(input,     output,      pitch, tx_type);
+  vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type);
+}
+#endif
+
+void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
+{
+    vp8_short_fdct4x4_c(input,   output,    pitch);
+    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
 void vp8_short_walsh4x4_c(short *input, short *output, int pitch) {
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -23,6 +23,10 @@
 #endif
 
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+#endif
 
 #ifndef vp8_fdct_short8x8
 #define vp8_fdct_short8x8  vp8_short_fdct8x8_c
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -32,8 +32,11 @@
 #define IF_RTCD(x) NULL
 #endif
 
-int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
+#if CONFIG_HYBRIDTRANSFORM
+extern void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d);
+#endif
 
+int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
   int i;
   int intra_pred_var = 0;
   (void) cpi;
@@ -64,6 +67,12 @@
   BLOCKD *b = &x->e_mbd.block[ib];
   BLOCK *be = &x->block[ib];
 
+#if CONFIG_HYBRIDTRANSFORM
+    int QIndex = x->q_index;
+    int active_ht = (QIndex < ACTIVE_HT);
+#endif
+
+
 #if CONFIG_COMP_INTRA_PRED
   if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) {
 #endif
@@ -78,12 +87,46 @@
 
   ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
 
-  x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+#if CONFIG_HYBRIDTRANSFORM
+    if(active_ht) {
+      b->bmi.as_mode.test = b->bmi.as_mode.first;
+      switch(b->bmi.as_mode.first) {
+        // case B_DC_PRED :
+        case B_TM_PRED :
+        case B_RD_PRED :
+          b->bmi.as_mode.tx_type = ADST_ADST;
+          break;
 
-  x->quantize_b(be, b);
+        case B_VE_PRED :
+        case B_VR_PRED :
+          b->bmi.as_mode.tx_type = ADST_DCT;
+          break;
 
-  vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
+        case B_HE_PRED :
+        case B_HD_PRED :
+        case B_HU_PRED :
+          b->bmi.as_mode.tx_type = DCT_ADST;
+          break;
 
+        default :
+          b->bmi.as_mode.tx_type = DCT_DCT;
+          break;
+      }
+
+      vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
+      vp8_ht_quantize_b(be, b);
+      vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;
+    } else {
+      x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32) ;
+      x->quantize_b(be, b) ;
+      vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;
+    }
+#else
+    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+    x->quantize_b(be, b);
+    vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
+#endif
+
   RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 }
 
@@ -273,7 +316,6 @@
     RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor,
                                               b->diff, *(b->base_dst) + b->dst, b->dst_stride);
   }
-
 }
 
 extern const int vp8_i8x8_block[4];
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -22,6 +22,72 @@
 extern int enc_debug;
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM
+void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d) {
+  int i, rc, eob;
+  int zbin;
+  int x, y, z, sz;
+  short *zbin_boost_ptr  = b->zrun_zbin_boost;
+  short *coeff_ptr       = b->coeff;
+  short *zbin_ptr        = b->zbin;
+  short *round_ptr       = b->round;
+  short *quant_ptr       = b->quant;
+  unsigned char *quant_shift_ptr = b->quant_shift;
+  short *qcoeff_ptr      = d->qcoeff;
+  short *dqcoeff_ptr     = d->dqcoeff;
+  short *dequant_ptr     = d->dequant;
+  short zbin_oq_value    = b->zbin_extra;
+
+  int const *pt_scan ;
+
+  switch(d->bmi.as_mode.tx_type) {
+    case ADST_DCT :
+      pt_scan = vp8_row_scan;
+      break;
+
+    case DCT_ADST :
+      pt_scan = vp8_col_scan;
+      break;
+
+    default :
+      pt_scan = vp8_default_zig_zag1d;
+      break;
+  }
+
+  vpx_memset(qcoeff_ptr, 0, 32);
+  vpx_memset(dqcoeff_ptr, 0, 32);
+
+  eob = -1;
+
+  for (i = 0; i < b->eob_max_offset; i++) {
+    rc   = pt_scan[i];
+    z    = coeff_ptr[rc];
+
+    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+    zbin_boost_ptr ++;
+
+    sz = (z >> 31);                                 // sign of z
+    x  = (z ^ sz) - sz;                             // x = abs(z)
+
+    if (x >= zbin) {
+      x += round_ptr[rc];
+      y  = (((x * quant_ptr[rc]) >> 16) + x)
+           >> quant_shift_ptr[rc];                // quantize (x)
+      x  = (y ^ sz) - sz;                         // get the sign back
+      qcoeff_ptr[rc]  = x;                        // write to destination
+      dqcoeff_ptr[rc] = x * dequant_ptr[rc];      // dequantized value
+
+      if (y) {
+        eob = i;                                // last nonzero coeffs
+        zbin_boost_ptr = b->zrun_zbin_boost;    // reset zero runlength
+      }
+    }
+  }
+
+  d->eob = eob + 1;
+}
+#endif
+
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) {
   int i, rc, eob;
   int zbin;
@@ -47,7 +113,7 @@
     z    = coeff_ptr[rc];
 
     zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-    zbin_boost_ptr++;
+    zbin_boost_ptr ++;
 
     sz = (z >> 31);                                 // sign of z
     x  = (z ^ sz) - sz;                             // x = abs(z)
@@ -54,6 +120,7 @@
 
     if (x >= zbin) {
       x += round_ptr[rc];
+
       y  = (((x * quant_ptr[rc]) >> 16) + x)
            >> quant_shift_ptr[rc];                // quantize (x)
       x  = (y ^ sz) - sz;                         // get the sign back
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -50,6 +50,10 @@
 extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
 extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
 
+#if CONFIG_HYBRIDTRANSFORM
+extern void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d);
+#endif
+
 #if CONFIG_HIGH_PRECISION_MV
 #define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
 #else
@@ -545,10 +549,39 @@
   int cost = 0;
   short *qcoeff_ptr = b->qcoeff;
 
-  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+#if CONFIG_HYBRIDTRANSFORM
+  int QIndex = mb->q_index;
+  int active_ht = (QIndex < ACTIVE_HT) &&
+                (mb->e_mbd.mode_info_context->mbmi.mode_rdopt == B_PRED);
 
-# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
+  int const *pt_scan;
 
+  if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) {
+    switch (b->bmi.as_mode.tx_type) {
+      case ADST_DCT :
+        pt_scan = vp8_row_scan;
+        break;
+
+      case DCT_ADST :
+        pt_scan = vp8_col_scan;
+        break;
+
+      default :
+        pt_scan = vp8_default_zig_zag1d;
+        break;
+    }
+
+  } else {
+    pt_scan = vp8_default_zig_zag1d;
+  }
+
+#define  QC(I)  ( qcoeff_ptr [pt_scan[I]] )
+#else
+#define QC(I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
+#endif
+
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
   for (; c < eob; c++) {
     int v = QC(c);
     int t = vp8_dct_value_tokens_ptr[v].Token;
@@ -804,11 +837,17 @@
   int *bmode_costs,
   ENTROPY_CONTEXT *a,
   ENTROPY_CONTEXT *l,
-
   int *bestrate,
   int *bestratey,
   int *bestdistortion) {
   B_PREDICTION_MODE mode;
+
+#if CONFIG_HYBRIDTRANSFORM
+  int QIndex = x->q_index;
+  int active_ht = (QIndex < ACTIVE_HT);
+  TX_TYPE best_tx_type;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
   B_PREDICTION_MODE mode2;
 #endif
@@ -828,7 +867,8 @@
 
   for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) {
 #if CONFIG_COMP_INTRA_PRED
-    for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1)); mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
+    for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1));
+                   mode2 != (allow_comp ? (mode + 1) : 0); mode2++) {
 #endif
       int this_rd;
       int ratey;
@@ -853,36 +893,75 @@
       }
 #endif
       ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
-      x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
-      x->quantize_b(be, b);
 
-      tempa = ta;
-      templ = tl;
+#if CONFIG_HYBRIDTRANSFORM
+      if(active_ht) {
+        b->bmi.as_mode.test = mode;
+        switch(mode) {
+          // case B_DC_PRED :
+          case B_TM_PRED :
+          case B_RD_PRED :
+            b->bmi.as_mode.tx_type = ADST_ADST;
+            break;
 
-      ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
-      rate += ratey;
-      distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(
-                     be->coeff, b->dqcoeff) >> 2;
+          case B_VE_PRED :
+          case B_VR_PRED :
+            b->bmi.as_mode.tx_type = ADST_DCT;
+            break;
 
-      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+          case B_HE_PRED :
+          case B_HD_PRED :
+          case B_HU_PRED :
+            b->bmi.as_mode.tx_type = DCT_ADST;
+            break;
 
-      if (this_rd < best_rd) {
-        *bestrate = rate;
-        *bestratey = ratey;
-        *bestdistortion = distortion;
-        best_rd = this_rd;
-        *best_mode = mode;
-#if CONFIG_COMP_INTRA_PRED
-        *best_second_mode = mode2;
+          default :
+            b->bmi.as_mode.tx_type = DCT_DCT;
+            break;
+        }
+
+        vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
+        vp8_ht_quantize_b(be, b);
+      } else {
+        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->quantize_b(be, b);
+      }
+#else
+        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->quantize_b(be, b);
 #endif
-        *a = tempa;
-        *l = templ;
-        copy_predictor(best_predictor, b->predictor);
-        vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+
+        tempa = ta;
+        templ = tl;
+
+        ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
+        rate += ratey;
+        distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(
+            be->coeff, b->dqcoeff) >> 2;
+
+        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+        if (this_rd < best_rd) {
+          *bestrate = rate;
+          *bestratey = ratey;
+          *bestdistortion = distortion;
+          best_rd = this_rd;
+          *best_mode = mode;
+#if CONFIG_HYBRIDTRANSFORM
+          best_tx_type = b->bmi.as_mode.tx_type ;
+#endif
+
 #if CONFIG_COMP_INTRA_PRED
-      }
+          *best_second_mode = mode2;
 #endif
+          *a = tempa;
+          *l = templ;
+          copy_predictor(best_predictor, b->predictor);
+          vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+        }
+#if CONFIG_COMP_INTRA_PRED
     }
+#endif
   }
   b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
 #if CONFIG_COMP_INTRA_PRED
@@ -889,7 +968,20 @@
   b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode);
 #endif
 
+#if CONFIG_HYBRIDTRANSFORM
+  b->bmi.as_mode.tx_type = best_tx_type;
+
+  // inverse transform
+  if(active_ht) {
+    vp8_iht4x4llm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type );
+  } else {
+    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff,
+                                                                b->diff, 32);
+  }
+#else
   IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
+#endif
+
   RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
   return best_rd;
@@ -1043,6 +1135,8 @@
 #endif
   return best_rd;
 }
+
+
 static int rd_pick_intra8x8block(
   VP8_COMP *cpi,
   MACROBLOCK *x,
@@ -2739,6 +2833,12 @@
     xd->mode_info_context->mbmi.pred_filter_enabled = 0;
 #endif
 
+    // current coding mode under rate-distortion optimization test loop
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = this_mode;
+#endif
+
+
 #if CONFIG_COMP_INTRA_PRED
     xd->mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
     xd->mode_info_context->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
@@ -3646,36 +3746,45 @@
   MACROBLOCKD *xd = &x->e_mbd;
   int error4x4, error16x16;
 #if CONFIG_COMP_INTRA_PRED
-  int error4x4d, rate4x4d, dist4x4d;
+    int error4x4d, rate4x4d, dist4x4d;
 #endif
-  int rate4x4, rate16x16 = 0, rateuv;
-  int dist4x4, dist16x16, distuv;
-  int rate;
-  int rate4x4_tokenonly = 0;
-  int rate16x16_tokenonly = 0;
-  int rateuv_tokenonly = 0;
-  int error8x8, rate8x8_tokenonly = 0;
-  int rate8x8, dist8x8;
-  int mode16x16;
-  int mode8x8[2][4];
+    int rate4x4, rate16x16 = 0, rateuv;
+    int dist4x4, dist16x16, distuv;
+    int rate;
+    int rate4x4_tokenonly = 0;
+    int rate16x16_tokenonly = 0;
+    int rateuv_tokenonly = 0;
+    int error8x8, rate8x8_tokenonly=0;
+    int rate8x8, dist8x8;
+    int mode16x16;
+    int mode8x8[2][4];
 
-  xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+    xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
-  rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
-  rate = rateuv;
+    rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
+    rate = rateuv;
 
-  error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
-                                          &rate16x16, &rate16x16_tokenonly,
-                                          &dist16x16);
-  mode16x16 = xd->mode_info_context->mbmi.mode;
+    // current macroblock under rate-distortion optimization test loop
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = DC_PRED;
+#endif
 
-  error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
-                                       &rate8x8, &rate8x8_tokenonly,
-                                       &dist8x8, error16x16);
-  mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first;
-  mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first;
-  mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first;
-  mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first;
+    error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
+                                            &rate16x16, &rate16x16_tokenonly,
+                                            &dist16x16);
+    mode16x16 = xd->mode_info_context->mbmi.mode;
+
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = I8X8_PRED;
+#endif
+
+    error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
+                &rate8x8, &rate8x8_tokenonly,
+                &dist8x8, error16x16);
+    mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first;
+    mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first;
+    mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first;
+    mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first;
 #if CONFIG_COMP_INTRA_PRED
   mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second;
   mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second;
@@ -3683,9 +3792,13 @@
   mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second;
 #endif
 
-  error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
-                                       &rate4x4, &rate4x4_tokenonly,
-                                       &dist4x4, error16x16,
+#if CONFIG_HYBRIDTRANSFORM
+    xd->mode_info_context->mbmi.mode_rdopt = B_PRED;
+#endif
+
+    error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
+                                         &rate4x4, &rate4x4_tokenonly,
+                                         &dist4x4, error16x16,
 #if CONFIG_COMP_INTRA_PRED
                                        0,
 #endif
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -298,6 +298,164 @@
   *a = *l = pt;
 }
 
+#if CONFIG_HYBRIDTRANSFORM
+static void tokenize1st_order_ht(   MACROBLOCKD *xd,
+                                    TOKENEXTRA **tp,
+                                    int type,
+                                    VP8_COMP    *cpi) {
+  unsigned int block;
+  const BLOCKD *b;
+  int pt;             /* near block/prev token context index */
+  int c;
+  int token;
+  TOKENEXTRA *t = *tp;/* store tokens starting here */
+  const short *qcoeff_ptr;
+  ENTROPY_CONTEXT * a;
+  ENTROPY_CONTEXT * l;
+  int band, rc, v;
+  int tmp1, tmp2;
+
+  int const *pt_scan ;
+
+  int seg_eob = 16;
+  int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+  if ( segfeature_active( xd, segment_id, SEG_LVL_EOB ) ) {
+    seg_eob = get_segdata( xd, segment_id, SEG_LVL_EOB );
+  }
+
+  b = xd->block;
+
+  /* Luma */
+  for (block = 0; block < 16; block++, b++) {
+    B_PREDICTION_MODE b_mode;
+
+    if( xd->mode_info_context->mbmi.mode == B_PRED ) {
+      b_mode = b->bmi.as_mode.first;
+    }
+
+    // assign scanning order for luma components coded in intra4x4 mode
+    if( ( ( xd->mode_info_context->mbmi.mode == B_PRED ) ||
+          ( xd->mode_info_context->mbmi.mode == I8X8_PRED ) ) &&
+        ( type == PLANE_TYPE_Y_WITH_DC) ) {
+      switch(b_mode) {
+        case B_VE_PRED :
+        case B_VR_PRED :
+          pt_scan = vp8_row_scan;
+          break;
+
+        case B_HE_PRED :
+        case B_HD_PRED :
+        case B_HU_PRED :
+          pt_scan = vp8_col_scan;
+          break;
+
+        default :
+          pt_scan = vp8_default_zig_zag1d;
+          break;
+      }
+    } else {
+      pt_scan = vp8_default_zig_zag1d;
+    }
+
+    tmp1 = vp8_block2above[block];
+    tmp2 = vp8_block2left[block];
+    qcoeff_ptr = b->qcoeff;
+    a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+    l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+    c = type ? 0 : 1;
+
+    for (; c < b->eob; c++) {
+      rc = pt_scan[c];
+      band = vp8_coef_bands[c];
+      v = qcoeff_ptr[rc];
+
+      t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+      token    = vp8_dct_value_tokens_ptr[v].Token;
+
+      t->Token = token;
+      t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+
+      t->skip_eob_node = pt == 0 &&
+          ((band > 0 && type > 0) || (band > 1 && type == 0));
+
+      ++cpi->coef_counts       [type] [band] [pt] [token];
+
+      pt = vp8_prev_token_class[token];
+      t++;
+    }
+
+    if (c < seg_eob) {
+      band = vp8_coef_bands[c];
+      t->Token = DCT_EOB_TOKEN;
+      t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
+
+      t->skip_eob_node = pt == 0 &&
+          ((band > 0 && type > 0) || (band > 1 && type == 0));
+
+      ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];
+
+      t++;
+    }
+
+    *tp = t;
+    pt = (c != !type); /* 0 <-> all coeff data is zero */
+    *a = *l = pt;
+  }
+
+  // reset scanning order for chroma components
+  pt_scan = vp8_default_zig_zag1d ;
+
+  /* Chroma */
+  for (block = 16; block < 24; block++, b++) {
+    tmp1 = vp8_block2above[block];
+    tmp2 = vp8_block2left[block];
+    qcoeff_ptr = b->qcoeff;
+    a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+    l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
+
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+    for (c = 0; c < b->eob; c++) {
+      rc = pt_scan[c];
+      band = vp8_coef_bands[c];
+      v = qcoeff_ptr[rc];
+
+      t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+      token    = vp8_dct_value_tokens_ptr[v].Token;
+
+      t->Token = token;
+      t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+      t->skip_eob_node = ((pt == 0) && (band > 0));
+
+      ++cpi->coef_counts       [2] [band] [pt] [token];
+
+      pt = vp8_prev_token_class[token];
+      t++;
+  }
+
+    if (c < seg_eob) {
+      band = vp8_coef_bands[c];
+      t->Token = DCT_EOB_TOKEN;
+      t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+      t->skip_eob_node = ((pt == 0) && (band > 0));
+
+      ++cpi->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];
+
+      t++;
+    }
+
+    *tp = t;
+    pt = (c != 0); /* 0 <-> all coeff data is zero */
+    *a = *l = pt;
+  }
+}
+#endif
+
 static void tokenize1st_order_b
 (
   MACROBLOCKD *xd,
@@ -483,6 +641,11 @@
   int skip_inc;
   int segment_id = x->mode_info_context->mbmi.segment_id;
 
+#if CONFIG_HYBRIDTRANSFORM
+    int QIndex = cpi->mb.q_index;
+    int active_ht = (QIndex < ACTIVE_HT);
+#endif
+
   if (!segfeature_active(x, segment_id, SEG_LVL_EOB) ||
       (get_segdata(x, segment_id, SEG_LVL_EOB) != 0)) {
     skip_inc = 1;
@@ -560,9 +723,17 @@
       *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
       *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]);
     }
-  } else
-
+  } else {
+#if CONFIG_HYBRIDTRANSFORM
+    if(active_ht) {
+      tokenize1st_order_ht(x, t, plane_type, cpi);
+    } else {
+      tokenize1st_order_b(x, t, plane_type, cpi);
+    }
+#else
     tokenize1st_order_b(x, t, plane_type, cpi);
+#endif
+  }
 }