shithub: libvpx

--- a/vp9/common/vp9_blockd.h

+++ b/vp9/common/vp9_blockd.h

@@ -390,22 +390,22 @@

   int lossless;

 #endif

   /* Inverse transform function pointers. */

-  void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);

-  void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);

-  void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);

-  void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);

-  void (*idct_add)(int16_t *input, const int16_t *dq,

+  void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);

+  void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);

+  void (*inv_2ndtxm4x4_1)(int16_t *in, int16_t *out);

+  void (*inv_2ndtxm4x4)(int16_t *in, int16_t *out);

+  void (*itxm_add)(int16_t *input, const int16_t *dq,

     uint8_t *pred, uint8_t *output, int pitch, int stride);

-  void (*dc_idct_add)(int16_t *input, const int16_t *dq,

+  void (*dc_itxm_add)(int16_t *input, const int16_t *dq,

     uint8_t *pred, uint8_t *output, int pitch, int stride, int dc);

-  void (*dc_only_idct_add)(int input_dc, uint8_t *pred_ptr,

+  void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr,

     uint8_t *dst_ptr, int pitch, int stride);

-  void (*dc_idct_add_y_block)(int16_t *q, const int16_t *dq,

+  void (*dc_itxm_add_y_block)(int16_t *q, const int16_t *dq,

     uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs,

     const int16_t *dc);

-  void (*idct_add_y_block)(int16_t *q, const int16_t *dq,

+  void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,

     uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs);

-  void (*idct_add_uv_block)(int16_t *q, const int16_t *dq,

+  void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,

     uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,

     uint16_t *eobs);

@@ -520,7 +520,7 @@

     return tx_type;

 #if CONFIG_LOSSLESS

   if (xd->lossless)

-    return tx_type;

+    return DCT_DCT;

 #endif

   // TODO(rbultje, debargha): Explore ADST usage for superblocks

   if (xd->mode_info_context->mbmi.sb_type)

--- a/vp9/common/vp9_idctllm.c

+++ b/vp9/common/vp9_idctllm.c

@@ -476,12 +476,13 @@

-void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,

+void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,

                                  uint8_t *dst_ptr,

                                  int pitch, int stride) {

   int r, c;

-  short tmp[16];

-  vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);

+  int16_t dc = input_dc;

+  int16_t tmp[16];

+  vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1);

   for (r = 0; r < 4; r++) {

     for (c = 0; c < 4; c++) {

--- a/vp9/common/vp9_invtrans.c

+++ b/vp9/common/vp9_invtrans.c

@@ -32,9 +32,9 @@

 void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {

   BLOCKD *b = &xd->block[block];

   if (b->eob <= 1)

-    xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);

+    xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);

   else

-    xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);

+    xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);

 void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {

@@ -44,7 +44,7 @@

   if (has_2nd_order) {

     /* do 2nd order transform on the dc block */

-    xd->inv_walsh4x4_lossless(blockd[24].dqcoeff, blockd[24].diff);

+    xd->inv_2ndtxm4x4(blockd[24].dqcoeff, blockd[24].diff);

     recon_dcblock(xd);

--- a/vp9/decoder/vp9_decodframe.c

+++ b/vp9/decoder/vp9_decodframe.c

@@ -123,43 +123,30 @@

     xd->block[i].dequant = pc->Y1dequant[QIndex];

+  xd->inv_txm4x4_1        = vp9_short_idct4x4llm_1;

+  xd->inv_txm4x4          = vp9_short_idct4x4llm;

+  xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1;

+  xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4;

+  xd->itxm_add            = vp9_dequant_idct_add;

+  xd->dc_only_itxm_add    = vp9_dc_only_idct_add_c;

+  xd->dc_itxm_add         = vp9_dequant_dc_idct_add;

+  xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block;

+  xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block;

+  xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block;

 #if CONFIG_LOSSLESS

-  pbi->mb.lossless = 0;

-  if (!QIndex) {

-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;

-    pbi->mb.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;

-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;

-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;

-    pbi->mb.idct_add            = vp9_dequant_idct_add_lossless_c;

-    pbi->mb.dc_only_idct_add    = vp9_dc_only_inv_walsh_add_c;

-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;

-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;

-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;

-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;

-    pbi->mb.lossless = 1;

-  } else {

-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;

-    pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;

-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;

-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;

-    pbi->mb.idct_add            = vp9_dequant_idct_add;

-    pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;

-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;

-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;

-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;

-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;

+  if (xd->lossless) {

+    assert(QIndex == 0);

+    xd->inv_txm4x4_1        = vp9_short_inv_walsh4x4_1_x8;

+    xd->inv_txm4x4          = vp9_short_inv_walsh4x4_x8;

+    xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1_lossless;

+    xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4_lossless;

+    xd->itxm_add            = vp9_dequant_idct_add_lossless_c;

+    xd->dc_only_itxm_add    = vp9_dc_only_inv_walsh_add_c;

+    xd->dc_itxm_add         = vp9_dequant_dc_idct_add_lossless_c;

+    xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;

+    xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;

+    xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;

-#else

-  pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;

-  pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;

-  pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;

-  pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;

-  pbi->mb.idct_add            = vp9_dequant_idct_add;

-  pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;

-  pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;

-  pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;

-  pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;

-  pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;

 #endif

   for (i = 16; i < 24; i++) {

@@ -349,15 +336,15 @@

       int i8x8mode = b->bmi.as_mode.first;

       b = &xd->block[16 + i];

       vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor);

-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                     *(b->base_dst) + b->dst, 8, b->dst_stride);

       b = &xd->block[20 + i];

       vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor);

-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                     *(b->base_dst) + b->dst, 8, b->dst_stride);

   } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {

-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,

+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,

          xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,

          xd->dst.uv_stride, xd->eobs + 16);

   } else {

@@ -404,17 +391,17 @@

                                     *(b->base_dst) + b->dst, 16,

                                     b->dst_stride, b->eob);

         } else {

-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                         *(b->base_dst) + b->dst, 16, b->dst_stride);

       b = &xd->block[16 + i];

       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);

-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                     *(b->base_dst) + b->dst, 8, b->dst_stride);

       b = &xd->block[20 + i];

       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);

-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                     *(b->base_dst) + b->dst, 8, b->dst_stride);

   } else if (mode == B_PRED) {

@@ -438,7 +425,7 @@

                                   *(b->base_dst) + b->dst, 16, b->dst_stride,

                                   b->eob);

       } else {

-        xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+        xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                       *(b->base_dst) + b->dst, 16, b->dst_stride);

@@ -448,7 +435,7 @@

     xd->above_context->y2 = 0;

     xd->left_context->y2 = 0;

     vp9_build_intra_predictors_mbuv(xd);

-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,

+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,

                            xd->block[16].dequant,

                            xd->predictor + 16 * 16,

                            xd->dst.u_buffer,

@@ -457,13 +444,13 @@

                            xd->eobs + 16);

   } else if (mode == SPLITMV) {

     assert(get_2nd_order_usage(xd) == 0);

-    xd->idct_add_y_block(xd->qcoeff,

+    xd->itxm_add_y_block(xd->qcoeff,

                           xd->block[0].dequant,

                           xd->predictor,

                           xd->dst.y_buffer,

                           xd->dst.y_stride,

                           xd->eobs);

-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,

+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,

                            xd->block[16].dequant,

                            xd->predictor + 16 * 16,

                            xd->dst.u_buffer,

@@ -500,7 +487,7 @@

                                     *(b->base_dst) + b->dst, 16,

                                     b->dst_stride, b->eob);

         } else {

-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,

+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,

                         *(b->base_dst) + b->dst, 16, b->dst_stride);

@@ -509,7 +496,7 @@

       assert(get_2nd_order_usage(xd) == 1);

       vp9_dequantize_b(b);

       if (xd->eobs[24] > 1) {

-        xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);

+        xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);

         ((int *)b->qcoeff)[0] = 0;

         ((int *)b->qcoeff)[1] = 0;

         ((int *)b->qcoeff)[2] = 0;

@@ -519,11 +506,11 @@

         ((int *)b->qcoeff)[6] = 0;

         ((int *)b->qcoeff)[7] = 0;

       } else {

-        xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);

+        xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);

         ((int *)b->qcoeff)[0] = 0;

       vp9_dequantize_b(b);

-      xd->dc_idct_add_y_block(xd->qcoeff,

+      xd->dc_itxm_add_y_block(xd->qcoeff,

                                xd->block[0].dequant,

                                xd->predictor,

                                xd->dst.y_buffer,

@@ -531,7 +518,7 @@

                                xd->eobs,

                                xd->block[24].diff);

-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,

+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,

                            xd->block[16].dequant,

                            xd->predictor + 16 * 16,

                            xd->dst.u_buffer,

@@ -649,7 +636,7 @@

             + x_idx * 16 + (i & 3) * 4,

             xd->dst.y_stride, xd->dst.y_stride, b->eob);

       } else {

-        xd->idct_add(

+        xd->itxm_add(

             b->qcoeff, b->dequant,

             xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride

             + x_idx * 16 + (i & 3) * 4,

@@ -661,7 +648,7 @@

   } else {

     vp9_dequantize_b(b);

     if (xd->eobs[24] > 1) {

-      xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);

+      xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);

       ((int *)b->qcoeff)[0] = 0;

       ((int *)b->qcoeff)[1] = 0;

       ((int *)b->qcoeff)[2] = 0;

@@ -671,7 +658,7 @@

       ((int *)b->qcoeff)[6] = 0;

       ((int *)b->qcoeff)[7] = 0;

     } else {

-      xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);

+      xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);

       ((int *)b->qcoeff)[0] = 0;

     vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(

@@ -1533,17 +1520,24 @@

   pc->sb64_coded = vp9_read_literal(&header_bc, 8);

   pc->sb32_coded = vp9_read_literal(&header_bc, 8);

-  /* Read the loop filter level and type */

-  pc->txfm_mode = vp9_read_literal(&header_bc, 2);

-  if (pc->txfm_mode == 3)

-    pc->txfm_mode += vp9_read_bit(&header_bc);

-  if (pc->txfm_mode == TX_MODE_SELECT) {

-    pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);

-    pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);

-    pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);

+#if CONFIG_LOSSLESS

+  xd->lossless = vp9_read_bit(&header_bc);

+  if (xd->lossless) {

+    pc->txfm_mode = ONLY_4X4;

+  else

+#endif

+  {

+    /* Read the loop filter level and type */

+    pc->txfm_mode = vp9_read_literal(&header_bc, 2);

+    if (pc->txfm_mode == 3)

+      pc->txfm_mode += vp9_read_bit(&header_bc);

+    if (pc->txfm_mode == TX_MODE_SELECT) {

+      pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);

+      pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);

+      pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);

+    }

+  }

   pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);

   pc->filter_level = vp9_read_literal(&header_bc, 6);

   pc->sharpness_level = vp9_read_literal(&header_bc, 3);

--- a/vp9/decoder/vp9_idct_blk.c

+++ b/vp9/decoder/vp9_idct_blk.c

@@ -51,9 +51,9 @@

   for (i = 0; i < 4; i++) {

     for (j = 0; j < 4; j++) {

       if (*eobs++ > 1)

-        xd->dc_idct_add(q, dq, dst, dst, stride, stride, dc[0]);

+        xd->dc_itxm_add(q, dq, dst, dst, stride, stride, dc[0]);

       else

-        xd->dc_only_idct_add(dc[0], dst, dst, stride, stride);

+        xd->dc_only_itxm_add(dc[0], dst, dst, stride, stride);

       q   += 16;

       dst += 4;

@@ -143,9 +143,9 @@

   for (i = 0; i < 2; i++) {

     for (j = 0; j < 2; j++) {

       if (*eobs++ > 1) {

-        xd->idct_add(q, dq, dstu, dstu, stride, stride);

+        xd->itxm_add(q, dq, dstu, dstu, stride, stride);

       } else {

-        xd->dc_only_idct_add(q[0]*dq[0], dstu, dstu, stride, stride);

+        xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride);

         ((int *)q)[0] = 0;

@@ -159,9 +159,9 @@

   for (i = 0; i < 2; i++) {

     for (j = 0; j < 2; j++) {

       if (*eobs++ > 1) {

-        xd->idct_add(q, dq, dstv, dstv, stride, stride);

+        xd->itxm_add(q, dq, dstv, dstv, stride, stride);

       } else {

-        xd->dc_only_idct_add(q[0]*dq[0], dstv, dstv, stride, stride);

+        xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride);

         ((int *)q)[0] = 0;

--- a/vp9/encoder/vp9_bitstream.c

+++ b/vp9/encoder/vp9_bitstream.c

@@ -1667,7 +1667,13 @@

   vp9_write_literal(&header_bc, pc->sb64_coded, 8);

   pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);

   vp9_write_literal(&header_bc, pc->sb32_coded, 8);

+#if CONFIG_LOSSLESS

+  vp9_write_bit(&header_bc, cpi->oxcf.lossless);

+  if (cpi->oxcf.lossless) {

+    pc->txfm_mode = ONLY_4X4;

+  }

+  else

+#endif

     if (pc->txfm_mode == TX_MODE_SELECT) {

       pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -169,14 +169,14 @@

   PICK_MODE_CONTEXT sb32_context[4];

   PICK_MODE_CONTEXT sb64_context;

-  void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);

-  void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);

-  void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);

+  void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);

+  void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);

+  void (*fwd_2ndtxm4x4)(int16_t *input, int16_t *output, int pitch);

+  void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);

+  void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);

+  void (*fwd_2ndtxm2x2)(int16_t *input, int16_t *output, int pitch);

   void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);

   void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);

-  void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);

-  void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);

-  void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);

   void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);

   void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);

   void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);

--- a/vp9/encoder/vp9_encodeintra.c

+++ b/vp9/encoder/vp9_encodeintra.c

@@ -58,7 +58,7 @@

     vp9_ht_quantize_b_4x4(be, b, tx_type);

     vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);

   } else {

-    x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

+    x->fwd_txm4x4(be->src_diff, be->coeff, 32);

     x->quantize_b_4x4(be, b) ;

     vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);

@@ -161,7 +161,7 @@

                    tx_type, 8, xd->block[idx].eob);

 #endif

     } else {

-      x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);

+      x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

       vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);

@@ -175,13 +175,13 @@

         vp9_ht_quantize_b_4x4(be, b, tx_type);

         vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);

       } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {

-        x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);

+        x->fwd_txm8x4(be->src_diff, be->coeff, 32);

         x->quantize_b_4x4_pair(be, be + 1, b, b + 1);

         vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);

         vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);

         i++;

       } else {

-        x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

+        x->fwd_txm4x4(be->src_diff, be->coeff, 32);

         x->quantize_b_4x4(be, b);

         vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);

@@ -214,7 +214,7 @@

   vp9_subtract_b(be, b, 8);

-  x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);

+  x->fwd_txm4x4(be->src_diff, be->coeff, 16);

   x->quantize_b_4x4(be, b);

   vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -188,11 +188,11 @@

       assert(has_2nd_order == 0);

       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);

     } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {

-      x->vp9_short_fdct8x4(&x->block[i].src_diff[0],

+      x->fwd_txm8x4(&x->block[i].src_diff[0],

                            &x->block[i].coeff[0], 32);

       i++;

     } else {

-      x->vp9_short_fdct4x4(&x->block[i].src_diff[0],

+      x->fwd_txm4x4(&x->block[i].src_diff[0],

                            &x->block[i].coeff[0], 32);

@@ -202,7 +202,7 @@

     build_dcblock_4x4(x);

     // do 2nd order transform on the dc block

-    x->short_walsh4x4(&x->block[24].src_diff[0],

+    x->fwd_2ndtxm4x4(&x->block[24].src_diff[0],

                       &x->block[24].coeff[0], 8);

   } else {

     vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));

@@ -213,7 +213,7 @@

   int i;

   for (i = 16; i < 24; i += 2) {

-    x->vp9_short_fdct8x4(&x->block[i].src_diff[0],

+    x->fwd_txm8x4(&x->block[i].src_diff[0],

                          &x->block[i].coeff[0], 16);

@@ -253,7 +253,7 @@

       assert(has_2nd_order == 0);

       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);

     } else {

-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],

+      x->fwd_txm8x8(&x->block[i].src_diff[0],

                            &x->block[i].coeff[0], 32);

@@ -264,7 +264,7 @@

       assert(has_2nd_order == 0);

       vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);

     } else {

-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],

+      x->fwd_txm8x8(&x->block[i].src_diff[0],

                            &x->block[i + 2].coeff[0], 32);

@@ -274,7 +274,7 @@

     build_dcblock_8x8(x);

     // do 2nd order transform on the dc block

-    x->short_fhaar2x2(&x->block[24].src_diff[0],

+    x->fwd_2ndtxm2x2(&x->block[24].src_diff[0],

                       &x->block[24].coeff[0], 8);

   } else {

     vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));

@@ -285,7 +285,7 @@

   int i;

   for (i = 16; i < 24; i += 4) {

-    x->vp9_short_fdct8x8(&x->block[i].src_diff[0],

+    x->fwd_txm8x8(&x->block[i].src_diff[0],

                          &x->block[i].coeff[0], 16);

@@ -303,7 +303,7 @@

   if (tx_type != DCT_DCT) {

     vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);

   } else {

-    x->vp9_short_fdct16x16(&x->block[0].src_diff[0],

+    x->fwd_txm16x16(&x->block[0].src_diff[0],

                            &x->block[0].coeff[0], 32);

@@ -321,9 +321,9 @@

 void vp9_transform_sbuv_16x16(MACROBLOCK *x) {

   SUPERBLOCK * const x_sb = &x->sb_coeff_data;

   vp9_clear_system_state();

-  x->vp9_short_fdct16x16(x_sb->src_diff + 1024,

+  x->fwd_txm16x16(x_sb->src_diff + 1024,

                          x_sb->coeff + 1024, 32);

-  x->vp9_short_fdct16x16(x_sb->src_diff + 1280,

+  x->fwd_txm16x16(x_sb->src_diff + 1280,

                          x_sb->coeff + 1280, 32);

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -752,10 +752,10 @@

   sf->quarter_pixel_search = 1;

   sf->half_pixel_search = 1;

   sf->iterative_sub_pixel = 1;

-#if CONFIG_LOSSLESS

-  sf->optimize_coefficients = 0;

-#else

   sf->optimize_coefficients = 1;

+#if CONFIG_LOSSLESS

+  if (cpi->oxcf.lossless)

+    sf->optimize_coefficients = 0;

 #endif

   sf->no_skip_block4x4_search = 1;

   sf->first_step = 0;

@@ -840,20 +840,18 @@

-  cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;

-  cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;

-  cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;

-  cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;

-  cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;

-  cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;

+  cpi->mb.fwd_txm16x16  = vp9_short_fdct16x16;

+  cpi->mb.fwd_txm8x8    = vp9_short_fdct8x8;

+  cpi->mb.fwd_txm8x4    = vp9_short_fdct8x4;

+  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;

+  cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4;

+  cpi->mb.fwd_2ndtxm2x2 = vp9_short_fhaar2x2;

 #if CONFIG_LOSSLESS

   if (cpi->oxcf.lossless) {

-    cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;

-    cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;

-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;

-    cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;

-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;

+    cpi->mb.fwd_txm8x4    = vp9_short_walsh8x4_x8;

+    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4_x8;

+    cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4_lossless;

 #endif

@@ -1206,18 +1204,18 @@

   cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];

   cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];

-  cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;

-  cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_idct4x4llm;

-  cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;

-  cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;

+  cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_idct4x4llm_1;

+  cpi->mb.e_mbd.inv_txm4x4      = vp9_short_idct4x4llm;

+  cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1;

+  cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4;

 #if CONFIG_LOSSLESS

   cpi->oxcf.lossless = oxcf->lossless;

   if (cpi->oxcf.lossless) {

-    cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;

-    cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;

-    cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;

-    cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;

+    cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_inv_walsh4x4_1_x8;

+    cpi->mb.e_mbd.inv_txm4x4      = vp9_short_inv_walsh4x4_x8;

+    cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless;

+    cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4_lossless;

 #endif

@@ -2619,10 +2617,10 @@

   // For 2 Pass Only used where GF/ARF prediction quality

   // is above a threshold

   cpi->zbin_mode_boost = 0;

-#if CONFIG_LOSSLESS

-  cpi->zbin_mode_boost_enabled = FALSE;

-#else

   cpi->zbin_mode_boost_enabled = TRUE;

+#if CONFIG_LOSSLESS

+  if (cpi->oxcf.lossless)

+    cpi->zbin_mode_boost_enabled = FALSE;

 #endif

   if (cpi->gfu_boost <= 400) {

     cpi->zbin_mode_boost_enabled = FALSE;

--- a/vp9/encoder/vp9_quantize.c

+++ b/vp9/encoder/vp9_quantize.c

@@ -460,18 +460,14 @@

   static const int zbin_boost[16] = { 0,  0,  0,  8,  8,  8, 10, 12,

                                      14, 16, 20, 24, 28, 32, 36, 40 };

-  int qrounding_factor = 48;

   for (Q = 0; Q < QINDEX_RANGE; Q++) {

     int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;

+    int qrounding_factor = 48;

 #if CONFIG_LOSSLESS

-    if (cpi->oxcf.lossless) {

-      if (Q == 0) {

-        qzbin_factor = 64;

-        qrounding_factor = 64;

-      }

+    if (cpi->oxcf.lossless && Q == 0) {

+      qzbin_factor = 64;

+      qrounding_factor = 64;

 #endif

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1140,7 +1140,7 @@

       vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);

       vp9_ht_quantize_b_4x4(be, b, tx_type);

     } else {

-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);

       x->quantize_b_4x4(be, b);

@@ -1172,7 +1172,7 @@

   if (best_tx_type != DCT_DCT)

     vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);

   else

-    xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);

+    xd->inv_txm4x4(best_dqcoeff, b->diff, 32);

   vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);

@@ -1436,7 +1436,7 @@

       if (tx_type != DCT_DCT)

         vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);

       else

-        x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);

+        x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

       // compute quantization mse of 8x8 block

@@ -1470,11 +1470,11 @@

           vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);

           vp9_ht_quantize_b_4x4(be, b, tx_type);

         } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {

-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);

+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);

           x->quantize_b_4x4_pair(be, be + 1, b, b + 1);

           do_two = 1;

         } else {

-          x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

+          x->fwd_txm4x4(be->src_diff, be->coeff, 32);

           x->quantize_b_4x4(be, b);

         distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);

@@ -2244,7 +2244,7 @@

       if (xd->mode_info_context->mbmi.second_ref_frame > 0)

         vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix);

       vp9_subtract_b(be, bd, 16);

-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);

       x->quantize_b_4x4(be, bd);

       thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);

       *distortion += thisdistortion;

@@ -2296,7 +2296,7 @@

       if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {

         if (otherrd) {

-          x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);

+          x->fwd_txm8x8(be->src_diff, be2->coeff, 32);

           x->quantize_b_8x8(be2, bd2);

           thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);

           otherdist += thisdistortion;

@@ -2308,7 +2308,7 @@

         for (j = 0; j < 4; j += 2) {

           bd = &xd->block[ib + iblock[j]];

           be = &x->block[ib + iblock[j]];

-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);

+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);

           x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);

           thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);

           *distortion += thisdistortion;

@@ -2326,7 +2326,7 @@

           for (j = 0; j < 4; j += 2) {

             BLOCKD *bd = &xd->block[ib + iblock[j]];

             BLOCK *be = &x->block[ib + iblock[j]];

-            x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);

+            x->fwd_txm8x4(be->src_diff, be->coeff, 32);

             x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);

             thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);

             otherdist += thisdistortion;

@@ -2340,7 +2340,7 @@

                            TX_4X4);

-        x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);

+        x->fwd_txm8x8(be->src_diff, be2->coeff, 32);

         x->quantize_b_8x8(be2, bd2);

         thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);

         *distortion += thisdistortion;

--

⑨