shithub: libvpx

--- a/vp8/common/idct.h

+++ b/vp8/common/idct.h

@@ -111,9 +111,10 @@

 extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);

 #endif

-#if CONFIG_HYBRIDTRANSFORM

+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM

 #include "vp8/common/blockd.h"

-void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type);

+void vp8_ihtllm_c(short *input, short *output, int pitch,

+                  TX_TYPE tx_type, int tx_dim);

 #endif

--- a/vp8/common/idctllm.c

+++ b/vp8/common/idctllm.c

@@ -93,120 +93,17 @@

};

 #endif

-#if CONFIG_HYBRIDTRANSFORM

-void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {

+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM

+void vp8_ihtllm_c(short *input, short *output, int pitch,

+                  TX_TYPE tx_type, int tx_dim) {

   int i, j, k;

-  float bufa[16], bufb[16]; // buffers are for floating-point test purpose

-                            // the implementation could be simplified in

-                            // conjunction with integer transform

-  short *ip = input;

-  short *op = output;

-  int shortpitch = pitch >> 1;

-  float *pfa = &bufa[0];

-  float *pfb = &bufb[0];

-  // pointers to vertical and horizontal transforms

-  float *ptv, *pth;

-  // load and convert residual array into floating-point

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      pfa[i] = (float)ip[i];

-    }

-    pfa += 4;

-    ip  += 4;

-  }

-  // vertical transformation

-  pfa = &bufa[0];

-  pfb = &bufb[0];

-  switch(tx_type) {

-    case ADST_ADST :

-    case ADST_DCT  :

-      ptv = &iadst_4[0];

-      break;

-    default :

-      ptv = &idct_4[0];

-      break;

-  }

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      pfb[i] = 0 ;

-      for(k = 0; k < 4; k++) {

-        pfb[i] += ptv[k] * pfa[(k<<2)];

-      }

-      pfa += 1;

-    }

-    pfb += 4;

-    ptv += 4;

-    pfa = &bufa[0];

-  }

-  // horizontal transformation

-  pfa = &bufa[0];

-  pfb = &bufb[0];

-  switch(tx_type) {

-    case ADST_ADST :

-    case  DCT_ADST :

-      pth = &iadst_4[0];

-      break;

-    default :

-      pth = &idct_4[0];

-      break;

-  }

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      pfa[i] = 0;

-      for(k = 0; k < 4; k++) {

-        pfa[i] += pfb[k] * pth[k];

-      }

-      pth += 4;

-     }

-    pfa += 4;

-    pfb += 4;

-    switch(tx_type) {

-      case ADST_ADST :

-      case  DCT_ADST :

-        pth = &iadst_4[0];

-        break;

-      default :

-        pth = &idct_4[0];

-        break;

-    }

-  }

-  // convert to short integer format and load BLOCKD buffer

-  op  = output;

-  pfa = &bufa[0];

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :

-                             -(short)( - pfa[i] / 8 + 0.49);

-    }

-    op  += shortpitch;

-    pfa += 4;

-  }

-}

-#endif

-#if CONFIG_HYBRIDTRANSFORM8X8

-void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {

-  int i, j, k;

   float bufa[64], bufb[64]; // buffers are for floating-point test purpose

                             // the implementation could be simplified in

                             // conjunction with integer transform

+                            // further notice, since we are thinking to use one

+                            // function for both 4x4 and 8x8 transforms, the

+                            // temporary buffers are simply initialized with 64.

   short *ip = input;

   short *op = output;

   int shortpitch = pitch >> 1;

@@ -218,12 +115,12 @@

   float *ptv, *pth;

   // load and convert residual array into floating-point

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       pfa[i] = (float)ip[i];

-    pfa += 8;

-    ip  += 8;

+    pfa += tx_dim;

+    ip  += tx_dim;

   // vertical transformation

@@ -233,25 +130,25 @@

   switch(tx_type) {

     case ADST_ADST :

     case ADST_DCT  :

-      ptv = &iadst_8[0];

+      ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];

       break;

     default :

-      ptv = &idct_8[0];

+      ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];

       break;

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       pfb[i] = 0 ;

-      for(k = 0; k < 8; k++) {

-        pfb[i] += ptv[k] * pfa[(k<<3)];

+      for(k = 0; k < tx_dim; k++) {

+        pfb[i] += ptv[k] * pfa[(k * tx_dim)];

       pfa += 1;

-    pfb += 8;

-    ptv += 8;

+    pfb += tx_dim;

+    ptv += tx_dim;

     pfa = &bufa[0];

@@ -262,34 +159,34 @@

   switch(tx_type) {

     case ADST_ADST :

     case  DCT_ADST :

-      pth = &iadst_8[0];

+      pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];

       break;

     default :

-      pth = &idct_8[0];

+      pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];

       break;

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       pfa[i] = 0;

-      for(k = 0; k < 8; k++) {

+      for(k = 0; k < tx_dim; k++) {

         pfa[i] += pfb[k] * pth[k];

-      pth += 8;

+      pth += tx_dim;

-    pfa += 8;

-    pfb += 8;

+    pfa += tx_dim;

+    pfb += tx_dim;

     switch(tx_type) {

       case ADST_ADST :

       case  DCT_ADST :

-        pth = &iadst_8[0];

+        pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];

         break;

       default :

-        pth = &idct_8[0];

+        pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];

         break;

@@ -298,13 +195,14 @@

   op  = output;

   pfa = &bufa[0];

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :

                              -(short)( - pfa[i] / 8 + 0.49);

     op  += shortpitch;

-    pfa += 8;

+    pfa += tx_dim;

 #endif

--- a/vp8/common/invtrans.c

+++ b/vp8/common/invtrans.c

@@ -33,7 +33,7 @@

 #if CONFIG_HYBRIDTRANSFORM

 void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {

-  vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type);

+  vp8_ihtllm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type, 4);

 #endif

--- a/vp8/decoder/decodframe.c

+++ b/vp8/decoder/decodframe.c

@@ -392,7 +392,6 @@

       txfm_map(b, pred_mode_conv(i8x8mode));

       vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type,

                                     q, dq, pre, dst, 16, stride);

-      // vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);

       q += 64;

 #else

       for (j = 0; j < 4; j++) {

--- a/vp8/decoder/dequantize.c

+++ b/vp8/decoder/dequantize.c

@@ -55,7 +55,7 @@

     input[i] = dq[i] * input[i];

-  vp8_iht4x4llm_c( input, output, 4 << 1, tx_type );

+  vp8_ihtllm_c(input, output, 4 << 1, tx_type, 4);

   vpx_memset(input, 0, 32);

@@ -95,7 +95,7 @@

     input[i] = dq[1] * input[i];

-  vp8_iht8x8llm_c(input, output, 16, tx_type);

+  vp8_ihtllm_c(input, output, 16, tx_type, 8);

   vpx_memset(input, 0, 128);

@@ -117,9 +117,10 @@

       diff_ptr += 8;

       pred += pitch;

-    diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;

-    dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;

-    pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;

+    // shift buffer pointers to next 4x4 block in the submacroblock

+    diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4;

+    dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4;

+    pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4;

 #endif

--- a/vp8/encoder/dct.c

+++ b/vp8/encoder/dct.c

@@ -329,115 +329,10 @@

-#if CONFIG_HYBRIDTRANSFORM

-void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {

+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM

+void vp8_fht_c(short *input, short *output, int pitch,

+               TX_TYPE tx_type, int tx_dim) {

   int i, j, k;

-  float bufa[16], bufb[16]; // buffers are for floating-point test purpose

-                             // the implementation could be simplified in

-                             // conjunction with integer transform

-  short *ip = input;

-  short *op = output;

-  float *pfa = &bufa[0];

-  float *pfb = &bufb[0];

-  // pointers to vertical and horizontal transforms

-  float *ptv, *pth;

-  // load and convert residual array into floating-point

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      pfa[i] = (float)ip[i];

-    }

-    pfa += 4;

-    ip  += pitch / 2;

-  }

-  // vertical transformation

-  pfa = &bufa[0];

-  pfb = &bufb[0];

-  switch(tx_type) {

-    case ADST_ADST :

-    case ADST_DCT  :

-      ptv = &adst_4[0];

-      break;

-    default :

-      ptv = &dct_4[0];

-      break;

-  }

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      pfb[i] = 0;

-      for(k = 0; k < 4; k++) {

-        pfb[i] += ptv[k] * pfa[(k<<2)];

-      }

-      pfa += 1;

-    }

-    pfb += 4;

-    ptv += 4;

-    pfa = &bufa[0];

-  }

-  // horizontal transformation

-  pfa = &bufa[0];

-  pfb = &bufb[0];

-  switch(tx_type) {

-    case ADST_ADST :

-    case  DCT_ADST :

-      pth = &adst_4[0];

-      break;

-    default :

-      pth = &dct_4[0];

-      break;

-  }

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      pfa[i] = 0;

-      for(k = 0; k < 4; k++) {

-        pfa[i] += pfb[k] * pth[k];

-      }

-      pth += 4;

-     }

-    pfa += 4;

-    pfb += 4;

-    switch(tx_type) {

-      case ADST_ADST :

-      case  DCT_ADST :

-        pth = &adst_4[0];

-        break;

-      default :

-        pth = &dct_4[0];

-        break;

-    }

-  }

-  // convert to short integer format and load BLOCKD buffer

-  op  = output ;

-  pfa = &bufa[0] ;

-  for(j = 0; j < 4; j++) {

-    for(i = 0; i < 4; i++) {

-      op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :

-                                   -(short)(- 8 * pfa[i] + 0.49);

-    }

-    op  += 4;

-    pfa += 4;

-  }

-}

-#endif

-#if CONFIG_HYBRIDTRANSFORM8X8

-void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {

-  int i, j, k;

   float bufa[64], bufb[64]; // buffers are for floating-point test purpose

                              // the implementation could be simplified in

                              // conjunction with integer transform

@@ -451,11 +346,11 @@

   float *ptv, *pth;

   // load and convert residual array into floating-point

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       pfa[i] = (float)ip[i];

-    pfa += 8;

+    pfa += tx_dim;

     ip  += pitch / 2;

@@ -466,24 +361,24 @@

   switch(tx_type) {

     case ADST_ADST :

     case ADST_DCT  :

-      ptv = &adst_8[0];

+      ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];

       break;

     default :

-      ptv = &dct_8[0];

+      ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];

       break;

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       pfb[i] = 0;

-      for(k = 0; k < 8; k++) {

-        pfb[i] += ptv[k] * pfa[(k<<3)];

+      for(k = 0; k < tx_dim; k++) {

+        pfb[i] += ptv[k] * pfa[(k * tx_dim)];

       pfa += 1;

-    pfb += 8;

-    ptv += 8;

+    pfb += tx_dim;

+    ptv += tx_dim;

     pfa = &bufa[0];

@@ -494,34 +389,34 @@

   switch(tx_type) {

     case ADST_ADST :

     case  DCT_ADST :

-      pth = &adst_8[0];

+      pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];

       break;

     default :

-      pth = &dct_8[0];

+      pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];

       break;

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       pfa[i] = 0;

-      for(k = 0; k < 8; k++) {

+      for(k = 0; k < tx_dim; k++) {

         pfa[i] += pfb[k] * pth[k];

-      pth += 8;

+      pth += tx_dim;

-    pfa += 8;

-    pfb += 8;

+    pfa += tx_dim;

+    pfb += tx_dim;

     switch(tx_type) {

       case ADST_ADST :

       case  DCT_ADST :

-        pth = &adst_8[0];

+        pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];

         break;

       default :

-        pth = &dct_8[0];

+        pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];

         break;

@@ -530,13 +425,13 @@

   op  = output ;

   pfa = &bufa[0] ;

-  for(j = 0; j < 8; j++) {

-    for(i = 0; i < 8; i++) {

+  for(j = 0; j < tx_dim; j++) {

+    for(i = 0; i < tx_dim; i++) {

       op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :

                                    -(short)(- 8 * pfa[i] + 0.49);

-    op  += 8;

-    pfa += 8;

+    op  += tx_dim;

+    pfa += tx_dim;

 #endif

@@ -581,14 +476,6 @@

     op++;

-#if CONFIG_HYBRIDTRANSFORM

-void vp8_fht8x4_c(short *input, short *output, int pitch,

-                  TX_TYPE tx_type) {

-  vp8_fht4x4_c(input,     output,      pitch, tx_type);

-  vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type);

-}

-#endif

 void vp8_short_fdct8x4_c(short *input, short *output, int pitch)

--- a/vp8/encoder/dct.h

+++ b/vp8/encoder/dct.h

@@ -23,9 +23,9 @@

 #endif

-#if CONFIG_HYBRIDTRANSFORM

-void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);

-void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);

+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM

+void vp8_fht_c(short *input, short *output, int pitch,

+               TX_TYPE tx_type, int tx_dim);

 #endif

 #if CONFIG_TX16X16

--- a/vp8/encoder/encodeintra.c

+++ b/vp8/encoder/encodeintra.c

@@ -91,8 +91,7 @@

     if(active_ht) {

       b->bmi.as_mode.test = b->bmi.as_mode.first;

       txfm_map(b, b->bmi.as_mode.first);

-      vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);

+      vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);

       vp8_ht_quantize_b(be, b);

       vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;

     } else {

@@ -317,16 +316,11 @@

     vp8_subtract_4b_c(be, b, 16);

     txfm_map(b, pred_mode_conv(b->bmi.as_mode.first));

-    vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32,

-                 b->bmi.as_mode.tx_type);

+    vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,

+              b->bmi.as_mode.tx_type, 8);

     x->quantize_b_8x8(x->block + idx, xd->block + idx);

-    vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,

-                    b->bmi.as_mode.tx_type);

-//    x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);

-//    x->quantize_b_8x8(x->block + idx, xd->block + idx);

-//    vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);

+    vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,

+                 b->bmi.as_mode.tx_type, 8);

     // reconstruct submacroblock

     for (i = 0; i < 4; i++) {

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -612,20 +612,20 @@

         if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) {

           switch (b->bmi.as_mode.tx_type) {

             case ADST_DCT:

-              pt_scan = vp8_row_scan;

+              scan = vp8_row_scan;

               break;

             case DCT_ADST:

-              pt_scan = vp8_col_scan;

+              scan = vp8_col_scan;

               break;

             default:

-              pt_scan = vp8_default_zig_zag1d;

+              scan = vp8_default_zig_zag1d;

               break;

         } else

-          pt_scan = vp8_default_zig_zag1d;

+          scan = vp8_default_zig_zag1d;

 #endif

       break;

@@ -937,8 +937,7 @@

       if(active_ht) {

         b->bmi.as_mode.test = mode;

         txfm_map(b, mode);

-        vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);

+        vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);

         vp8_ht_quantize_b(be, b);

       } else {

         x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);

@@ -991,7 +990,7 @@

   // inverse transform

   if(active_ht) {

-    vp8_iht4x4llm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type );

+    vp8_ihtllm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type, 4);

   } else {

     IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff,

                                                                 b->diff, 32);

@@ -1230,8 +1229,8 @@

 #if CONFIG_HYBRIDTRANSFORM8X8

       txfm_map(b, pred_mode_conv(mode));

-      vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, b->bmi.as_mode.tx_type);

-//    x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);

+      vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,

+                b->bmi.as_mode.tx_type, 8);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

       // compute quantization mse of 8x8 block

--

⑨