shithub: libvpx

Download patch

ref: d15e1da4940f813311035c3ed101a9c69f15b527
parent: 29731308c4667de4fe4f02f92f0c2b29af86bbc1
author: Jingning Han <jingning@google.com>
date: Tue Feb 5 07:37:13 EST 2013

Butterfly ADST based hybrid transform

Refactor the 8x8 inverse hybrid transform. It is now consistent
with the new inverse DCT. Overall performance loss (due to the
use of this variant ADST, and the rounding errors in the butterfly
implementation) for std-hd is -0.02.

Fixed BUILD warning.

Devise a variant of the original ADST, which allows butterfly
computation structure. This new transform has kernel of the
form: sin((2k+1)*(2n+1) / (4N)). One of its butterfly structures
using floating-point multiplications was reported in Z. Wang,
"Fast algorithms for the discrete W transform and for the discrete
Fourier transform", IEEE Trans. on ASSP, 1984.

This patch includes the butterfly implementation of the inverse
ADST/DCT hybrid transform of dimension 8x8.

Change-Id: I3533cb715f749343a80b9087ce34b3e776d1581d

--- a/configure
+++ b/configure
@@ -249,6 +249,7 @@
     newcoefcontext
     enable_6tap
     abovesprefmv
+    intht
 "
 CONFIG_LIST="
     external_build
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -413,9 +413,9 @@
 
 } MACROBLOCKD;
 
-#define ACTIVE_HT 110                // quantization stepsize threshold
+#define ACTIVE_HT   110                // quantization stepsize threshold
 
-#define ACTIVE_HT8 300
+#define ACTIVE_HT8  300
 
 #define ACTIVE_HT16 300
 
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -120,7 +120,43 @@
    4096, -3675,  3218, -2731,  2217, -1682,  1130,  -568
 };
 
+#if CONFIG_INTHT
 static const int16_t iadst_i16[256] = {
+   284,   850,  1407,  1951,  2476,  2977,  3450,  3889,
+  4291,  4652,  4967,  5235,  5453,  5618,  5729,  5784,
+   850,  2476,  3889,  4967,  5618,  5784,  5453,  4652,
+  3450,  1951,   284, -1407, -2977, -4291, -5235, -5729,
+  1407,  3889,  5453,  5729,  4652,  2476,  -284, -2977,
+ -4967, -5784, -5235, -3450,  -850,  1951,  4291,  5618,
+  1951,  4967,  5729,  3889,   284, -3450, -5618, -5235,
+ -2476,  1407,  4652,  5784,  4291,   850, -2977, -5453,
+  2476,  5618,  4652,   284, -4291, -5729, -2977,  1951,
+  5453,  4967,   850, -3889, -5784, -3450,  1407,  5235,
+  2977,  5784,  2476, -3450, -5729, -1951,  3889,  5618,
+  1407, -4291, -5453,  -850,  4652,  5235,   284, -4967,
+  3450,  5453,  -284, -5618, -2977,  3889,  5235,  -850,
+ -5729, -2476,  4291,  4967, -1407, -5784, -1951,  4652,
+  3889,  4652, -2977, -5235,  1951,  5618,  -850, -5784,
+  -284,  5729,  1407, -5453, -2476,  4967,  3450, -4291,
+  4291,  3450, -4967, -2476,  5453,  1407, -5729,  -284,
+  5784,  -850, -5618,  1951,  5235, -2977, -4652,  3889,
+  4652,  1951, -5784,  1407,  4967, -4291, -2476,  5729,
+  -850, -5235,  3889,  2977, -5618,   284,  5453, -3450,
+  4967,   284, -5235,  4652,   850, -5453,  4291,  1407,
+ -5618,  3889,  1951, -5729,  3450,  2476, -5784,  2977,
+  5235, -1407, -3450,  5784, -3889,  -850,  4967, -5453,
+  1951,  2977, -5729,  4291,   284, -4652,  5618, -2476,
+  5453, -2977,  -850,  4291, -5784,  4652, -1407, -2476,
+  5235, -5618,  3450,   284, -3889,  5729, -4967,  1951,
+  5618, -4291,  1951,   850, -3450,  5235, -5784,  4967,
+ -2977,   284,  2476, -4652,  5729, -5453,  3889, -1407,
+  5729, -5235,  4291, -2977,  1407,   284, -1951,  3450,
+ -4652,  5453, -5784,  5618, -4967,  3889, -2476,   850,
+  5784, -5729,  5618, -5453,  5235, -4967,  4652, -4291,
+  3889, -3450,  2977, -2476,  1951, -1407,   850,  -284
+};
+#else
+static const int16_t iadst_i16[256] = {
     542,  1607,  2614,  3526,  4311,  4940,  5390,  5646,
    5698,  5543,  5189,  4646,  3936,  3084,  2120,  1080,
    1080,  3084,  4646,  5543,  5646,  4940,  3526,  1607,
@@ -154,8 +190,8 @@
    5698, -5646,  5543, -5390,  5189, -4940,  4646, -4311,
    3936, -3526,  3084, -2614,  2120, -1607,  1080,  -542
 };
+#endif
 
-
 /* Converted the transforms to integer form. */
 #define HORIZONTAL_SHIFT 14  // 16
 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
@@ -656,6 +692,138 @@
         output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;
     }
 }
+
+#if CONFIG_INTHT
+static void iadst8_1d(int16_t *input, int16_t *output) {
+  int x0, x1, x2, x3, x4, x5, x6, x7;
+  int s0, s1, s2, s3, s4, s5, s6, s7;
+
+  x0 = input[7];
+  x1 = input[0];
+  x2 = input[5];
+  x3 = input[2];
+  x4 = input[3];
+  x5 = input[4];
+  x6 = input[1];
+  x7 = input[6];
+
+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+    output[0] = output[1] = output[2] = output[3] = output[4]
+                    = output[5] = output[6] = output[7] = 0;
+    return;
+  }
+
+  // stage 1
+  s0 = cospi_2_64  * x0 + cospi_30_64 * x1;
+  s1 = cospi_30_64 * x0 - cospi_2_64  * x1;
+  s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+  s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+  s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+  s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+  s6 = cospi_26_64 * x6 + cospi_6_64  * x7;
+  s7 = cospi_6_64  * x6 - cospi_26_64 * x7;
+
+  x0 = dct_const_round_shift(s0 + s4);
+  x1 = dct_const_round_shift(s1 + s5);
+  x2 = dct_const_round_shift(s2 + s6);
+  x3 = dct_const_round_shift(s3 + s7);
+  x4 = dct_const_round_shift(s0 - s4);
+  x5 = dct_const_round_shift(s1 - s5);
+  x6 = dct_const_round_shift(s2 - s6);
+  x7 = dct_const_round_shift(s3 - s7);
+
+  // stage 2
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = cospi_8_64  * x4 + cospi_24_64 * x5;
+  s5 = cospi_24_64 * x4 - cospi_8_64  * x5;
+  s6 = - cospi_24_64 * x6 + cospi_8_64  * x7;
+  s7 =   cospi_8_64  * x6 + cospi_24_64 * x7;
+
+  x0 = s0 + s2;
+  x1 = s1 + s3;
+  x2 = s0 - s2;
+  x3 = s1 - s3;
+  x4 = dct_const_round_shift(s4 + s6);
+  x5 = dct_const_round_shift(s5 + s7);
+  x6 = dct_const_round_shift(s4 - s6);
+  x7 = dct_const_round_shift(s5 - s7);
+
+  // stage 3
+  s2 = cospi_16_64 * (x2 + x3);
+  s3 = cospi_16_64 * (x2 - x3);
+  s6 = cospi_16_64 * (x6 + x7);
+  s7 = cospi_16_64 * (x6 - x7);
+
+  x2 = dct_const_round_shift(s2);
+  x3 = dct_const_round_shift(s3);
+  x6 = dct_const_round_shift(s6);
+  x7 = dct_const_round_shift(s7);
+
+  output[0] =   x0;
+  output[1] = - x4;
+  output[2] =   x6;
+  output[3] = - x2;
+  output[4] =   x3;
+  output[5] = - x7;
+  output[6] =   x5;
+  output[7] = - x1;
+
+  return;
+}
+
+void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
+                        TX_TYPE tx_type, int pitch) {
+  int16_t out[8 * 8];
+  int16_t *outptr = &out[0];
+  const int short_pitch = pitch >> 1;
+  int i, j;
+  int16_t temp_in[8], temp_out[8];
+
+  void (*invr)(int16_t*, int16_t*);
+  void (*invc)(int16_t*, int16_t*);
+
+  switch (tx_type) {
+    case ADST_ADST:
+      invc = &iadst8_1d;
+      invr = &iadst8_1d;
+      break;
+    case ADST_DCT:
+      invc = &iadst8_1d;
+      invr = &idct8_1d;
+      break;
+    case DCT_ADST:
+      invc = &idct8_1d;
+      invr = &iadst8_1d;
+      break;
+    case DCT_DCT:
+      invc = &idct8_1d;
+      invr = &idct8_1d;
+      break;
+    default:
+      assert(0);
+  }
+
+  // inverse transform row vectors
+  for (i = 0; i < 8; ++i) {
+    invr(input, outptr);
+    input += 8;
+    outptr += 8;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j)
+      temp_in[j] = out[j * 8 + i];
+    invc(temp_in, temp_out);
+    for (j = 0; j < 8; ++j)
+      output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;
+  }
+}
+#endif
+
 
 void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) {
   int16_t out[8 * 8];
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -91,8 +91,13 @@
   for (i = 0; i < 9; i += 8) {
     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
     if (tx_type != DCT_DCT) {
+#if CONFIG_INTHT
+      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,
+                           tx_type, 32);
+#else
       vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
                  xd->block[i].eob);
+#endif
     } else {
       vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
                                   &blockd[i].diff[0], 32);
@@ -101,8 +106,13 @@
   for (i = 2; i < 11; i += 8) {
     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
     if (tx_type != DCT_DCT) {
+#if CONFIG_INTHT
+      vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
+                           tx_type, 32);
+#else
       vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
                  xd->block[i + 2].eob);
+#endif
     } else {
       vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
                                   &blockd[i].diff[0], 32);
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -411,6 +411,11 @@
 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
 specialize vp9_short_idct1_32x32
 
+#if CONFIG_INTHT
+prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int tx_type, int pitch"
+specialize vp9_short_iht8x8
+#endif
+
 prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"
 specialize vp9_ihtllm
 
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -92,8 +92,11 @@
       input[i] = dq[1] * input[i];
     }
 
+#if CONFIG_INTHT
+    vp9_short_iht8x8(input, output, tx_type, 16);
+#else
     vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
-
+#endif
     vpx_memset(input, 0, 128);
 
     add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -104,7 +104,27 @@
    16069, -13623,   9102,  -3196
 };
 
+#if CONFIG_INTHT
 static const int16_t adst_i8[64] = {
+   1606,    4756,     7723,    10394,
+  12665,   14449,    15678,    16305,
+   4756,   12665,    16305,    14449,
+   7723,   -1606,   -10394,   -15678,
+   7723,   16305,    10394,    -4756,
+ -15678,  -12665,     1606,    14449,
+  10394,   14449,    -4756,   -16305,
+  -1606,   15678,     7723,   -12665,
+  12665,    7723,   -15678,    -1606,
+  16305,   -4756,   -14449,    10394,
+  14449,   -1606,   -12665,    15678,
+  -4756,  -10394,    16305,    -7723,
+  15678,  -10394,     1606,     7723,
+ -14449,   16305,   -12665,     4756,
+  16305,  -15678,    14449,   -12665,
+  10394,   -7723,     4756,    -1606
+};
+#else
+static const int16_t adst_i8[64] = {
     2921,   5742,   8368,  10708,
    12684,  14228,  15288,  15827,
     8368,  14228,  15827,  12684,
@@ -122,6 +142,7 @@
     5742, -10708,  14228, -15827,
    15288, -12684,   8368,  -2921
 };
+#endif
 
 static const float dct_16[256] = {
   0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
@@ -229,7 +250,43 @@
    11529, -11086,  10217,  -8955,   7350,  -5461,   3363,  -1136
 };
 
+#if CONFIG_INTHT
 static const int16_t adst_i16[256] = {
+     568,    1700,    2815,    3903,    4953,    5956,    6901,    7780,
+    8584,    9305,    9937,   10473,   10908,   11238,   11459,   11571,
+    1700,    4953,    7780,    9937,   11238,   11571,   10908,    9305,
+    6901,    3903,     568,   -2815,   -5956,   -8584,  -10473,  -11459,
+    2815,    7780,   10908,   11459,    9305,    4953,    -568,   -5956,
+   -9937,  -11571,  -10473,   -6901,   -1700,    3903,    8584,   11238,
+    3903,    9937,   11459,    7780,     568,   -6901,  -11238,  -10473,
+   -4953,    2815,    9305,   11571,    8584,    1700,   -5956,  -10908,
+    4953,   11238,    9305,     568,   -8584,  -11459,   -5956,    3903,
+   10908,    9937,    1700,   -7780,  -11571,   -6901,    2815,   10473,
+    5956,   11571,    4953,   -6901,  -11459,   -3903,    7780,   11238,
+    2815,   -8584,  -10908,   -1700,    9305,   10473,     568,   -9937,
+    6901,   10908,    -568,  -11238,   -5956,    7780,   10473,   -1700,
+  -11459,   -4953,    8584,    9937,   -2815,  -11571,   -3903,    9305,
+    7780,    9305,   -5956,  -10473,    3903,   11238,   -1700,  -11571,
+    -568,   11459,    2815,  -10908,   -4953,    9937,    6901,   -8584,
+    8584,    6901,   -9937,   -4953,   10908,    2815,  -11459,    -568,
+   11571,   -1700,  -11238,    3903,   10473,   -5956,   -9305,    7780,
+    9305,    3903,  -11571,    2815,    9937,   -8584,   -4953,   11459,
+   -1700,  -10473,    7780,    5956,  -11238,     568,   10908,   -6901,
+    9937,     568,  -10473,    9305,    1700,  -10908,    8584,    2815,
+  -11238,    7780,    3903,  -11459,    6901,    4953,  -11571,    5956,
+   10473,   -2815,   -6901,   11571,   -7780,   -1700,    9937,  -10908,
+    3903,    5956,  -11459,    8584,     568,   -9305,   11238,   -4953,
+   10908,   -5956,   -1700,    8584,  -11571,    9305,   -2815,   -4953,
+   10473,  -11238,    6901,     568,   -7780,   11459,   -9937,    3903,
+   11238,   -8584,    3903,    1700,   -6901,   10473,  -11571,    9937,
+   -5956,     568,    4953,   -9305,   11459,  -10908,    7780,   -2815,
+   11459,  -10473,    8584,   -5956,    2815,     568,   -3903,    6901,
+   -9305,   10908,  -11571,   11238,   -9937,    7780,   -4953,    1700,
+   11571,  -11459,   11238,  -10908,   10473,   -9937,    9305,   -8584,
+    7780,   -6901,    5956,   -4953,    3903,   -2815,    1700,    -568
+};
+#else
+static const int16_t adst_i16[256] = {
     1084,   2159,   3214,   4240,   5228,   6168,   7052,   7873,
     8622,   9293,   9880,  10377,  10781,  11087,  11292,  11395,
     3214,   6168,   8622,  10377,  11292,  11292,  10377,   8622,
@@ -263,6 +320,7 @@
     2159,  -4240,   6168,  -7873,   9293, -10377,  11087, -11395,
    11292, -10781,   9880,  -8622,   7052,  -5228,   3214,  -1084
 };
+#endif
 
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -152,8 +152,14 @@
       vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,
                 tx_type, 8);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
+
+#if CONFIG_INTHT
+      vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
+                            tx_type, 32);
+#else
       vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
                    tx_type, 8, xd->block[idx].eob);
+#endif
     } else {
       x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -2472,7 +2472,6 @@
         " and --passes=2\n", stream->index, global.pass);
     });
 
-
     /* Use the frame rate from the file only if none was specified
      * on the command-line.
      */