shithub: libvpx

--- a/configure

+++ b/configure

@@ -249,6 +249,7 @@

     newcoefcontext

     enable_6tap

     abovesprefmv

+    intht

 CONFIG_LIST="

     external_build

--- a/vp9/common/vp9_blockd.h

+++ b/vp9/common/vp9_blockd.h

@@ -413,9 +413,9 @@

 } MACROBLOCKD;

-#define ACTIVE_HT 110                // quantization stepsize threshold

+#define ACTIVE_HT   110                // quantization stepsize threshold

-#define ACTIVE_HT8 300

+#define ACTIVE_HT8  300

 #define ACTIVE_HT16 300

--- a/vp9/common/vp9_idctllm.c

+++ b/vp9/common/vp9_idctllm.c

@@ -120,7 +120,43 @@

    4096, -3675,  3218, -2731,  2217, -1682,  1130,  -568

};

+#if CONFIG_INTHT

 static const int16_t iadst_i16[256] = {

+   284,   850,  1407,  1951,  2476,  2977,  3450,  3889,

+  4291,  4652,  4967,  5235,  5453,  5618,  5729,  5784,

+   850,  2476,  3889,  4967,  5618,  5784,  5453,  4652,

+  3450,  1951,   284, -1407, -2977, -4291, -5235, -5729,

+  1407,  3889,  5453,  5729,  4652,  2476,  -284, -2977,

+ -4967, -5784, -5235, -3450,  -850,  1951,  4291,  5618,

+  1951,  4967,  5729,  3889,   284, -3450, -5618, -5235,

+ -2476,  1407,  4652,  5784,  4291,   850, -2977, -5453,

+  2476,  5618,  4652,   284, -4291, -5729, -2977,  1951,

+  5453,  4967,   850, -3889, -5784, -3450,  1407,  5235,

+  2977,  5784,  2476, -3450, -5729, -1951,  3889,  5618,

+  1407, -4291, -5453,  -850,  4652,  5235,   284, -4967,

+  3450,  5453,  -284, -5618, -2977,  3889,  5235,  -850,

+ -5729, -2476,  4291,  4967, -1407, -5784, -1951,  4652,

+  3889,  4652, -2977, -5235,  1951,  5618,  -850, -5784,

+  -284,  5729,  1407, -5453, -2476,  4967,  3450, -4291,

+  4291,  3450, -4967, -2476,  5453,  1407, -5729,  -284,

+  5784,  -850, -5618,  1951,  5235, -2977, -4652,  3889,

+  4652,  1951, -5784,  1407,  4967, -4291, -2476,  5729,

+  -850, -5235,  3889,  2977, -5618,   284,  5453, -3450,

+  4967,   284, -5235,  4652,   850, -5453,  4291,  1407,

+ -5618,  3889,  1951, -5729,  3450,  2476, -5784,  2977,

+  5235, -1407, -3450,  5784, -3889,  -850,  4967, -5453,

+  1951,  2977, -5729,  4291,   284, -4652,  5618, -2476,

+  5453, -2977,  -850,  4291, -5784,  4652, -1407, -2476,

+  5235, -5618,  3450,   284, -3889,  5729, -4967,  1951,

+  5618, -4291,  1951,   850, -3450,  5235, -5784,  4967,

+ -2977,   284,  2476, -4652,  5729, -5453,  3889, -1407,

+  5729, -5235,  4291, -2977,  1407,   284, -1951,  3450,

+ -4652,  5453, -5784,  5618, -4967,  3889, -2476,   850,

+  5784, -5729,  5618, -5453,  5235, -4967,  4652, -4291,

+  3889, -3450,  2977, -2476,  1951, -1407,   850,  -284

+};

+#else

+static const int16_t iadst_i16[256] = {

     542,  1607,  2614,  3526,  4311,  4940,  5390,  5646,

    5698,  5543,  5189,  4646,  3936,  3084,  2120,  1080,

    1080,  3084,  4646,  5543,  5646,  4940,  3526,  1607,

@@ -154,8 +190,8 @@

    5698, -5646,  5543, -5390,  5189, -4940,  4646, -4311,

    3936, -3526,  3084, -2614,  2120, -1607,  1080,  -542

};

+#endif

 /* Converted the transforms to integer form. */

 #define HORIZONTAL_SHIFT 14  // 16

 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)

@@ -656,6 +692,138 @@

         output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;

+#if CONFIG_INTHT

+static void iadst8_1d(int16_t *input, int16_t *output) {

+  int x0, x1, x2, x3, x4, x5, x6, x7;

+  int s0, s1, s2, s3, s4, s5, s6, s7;

+  x0 = input[7];

+  x1 = input[0];

+  x2 = input[5];

+  x3 = input[2];

+  x4 = input[3];

+  x5 = input[4];

+  x6 = input[1];

+  x7 = input[6];

+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {

+    output[0] = output[1] = output[2] = output[3] = output[4]

+                    = output[5] = output[6] = output[7] = 0;

+    return;

+  }

+  // stage 1

+  s0 = cospi_2_64  * x0 + cospi_30_64 * x1;

+  s1 = cospi_30_64 * x0 - cospi_2_64  * x1;

+  s2 = cospi_10_64 * x2 + cospi_22_64 * x3;

+  s3 = cospi_22_64 * x2 - cospi_10_64 * x3;

+  s4 = cospi_18_64 * x4 + cospi_14_64 * x5;

+  s5 = cospi_14_64 * x4 - cospi_18_64 * x5;

+  s6 = cospi_26_64 * x6 + cospi_6_64  * x7;

+  s7 = cospi_6_64  * x6 - cospi_26_64 * x7;

+  x0 = dct_const_round_shift(s0 + s4);

+  x1 = dct_const_round_shift(s1 + s5);

+  x2 = dct_const_round_shift(s2 + s6);

+  x3 = dct_const_round_shift(s3 + s7);

+  x4 = dct_const_round_shift(s0 - s4);

+  x5 = dct_const_round_shift(s1 - s5);

+  x6 = dct_const_round_shift(s2 - s6);

+  x7 = dct_const_round_shift(s3 - s7);

+  // stage 2

+  s0 = x0;

+  s1 = x1;

+  s2 = x2;

+  s3 = x3;

+  s4 = cospi_8_64  * x4 + cospi_24_64 * x5;

+  s5 = cospi_24_64 * x4 - cospi_8_64  * x5;

+  s6 = - cospi_24_64 * x6 + cospi_8_64  * x7;

+  s7 =   cospi_8_64  * x6 + cospi_24_64 * x7;

+  x0 = s0 + s2;

+  x1 = s1 + s3;

+  x2 = s0 - s2;

+  x3 = s1 - s3;

+  x4 = dct_const_round_shift(s4 + s6);

+  x5 = dct_const_round_shift(s5 + s7);

+  x6 = dct_const_round_shift(s4 - s6);

+  x7 = dct_const_round_shift(s5 - s7);

+  // stage 3

+  s2 = cospi_16_64 * (x2 + x3);

+  s3 = cospi_16_64 * (x2 - x3);

+  s6 = cospi_16_64 * (x6 + x7);

+  s7 = cospi_16_64 * (x6 - x7);

+  x2 = dct_const_round_shift(s2);

+  x3 = dct_const_round_shift(s3);

+  x6 = dct_const_round_shift(s6);

+  x7 = dct_const_round_shift(s7);

+  output[0] =   x0;

+  output[1] = - x4;

+  output[2] =   x6;

+  output[3] = - x2;

+  output[4] =   x3;

+  output[5] = - x7;

+  output[6] =   x5;

+  output[7] = - x1;

+  return;

+}

+void vp9_short_iht8x8_c(int16_t *input, int16_t *output,

+                        TX_TYPE tx_type, int pitch) {

+  int16_t out[8 * 8];

+  int16_t *outptr = &out[0];

+  const int short_pitch = pitch >> 1;

+  int i, j;

+  int16_t temp_in[8], temp_out[8];

+  void (*invr)(int16_t*, int16_t*);

+  void (*invc)(int16_t*, int16_t*);

+  switch (tx_type) {

+    case ADST_ADST:

+      invc = &iadst8_1d;

+      invr = &iadst8_1d;

+      break;

+    case ADST_DCT:

+      invc = &iadst8_1d;

+      invr = &idct8_1d;

+      break;

+    case DCT_ADST:

+      invc = &idct8_1d;

+      invr = &iadst8_1d;

+      break;

+    case DCT_DCT:

+      invc = &idct8_1d;

+      invr = &idct8_1d;

+      break;

+    default:

+      assert(0);

+  }

+  // inverse transform row vectors

+  for (i = 0; i < 8; ++i) {

+    invr(input, outptr);

+    input += 8;

+    outptr += 8;

+  }

+  // inverse transform column vectors

+  for (i = 0; i < 8; ++i) {

+    for (j = 0; j < 8; ++j)

+      temp_in[j] = out[j * 8 + i];

+    invc(temp_in, temp_out);

+    for (j = 0; j < 8; ++j)

+      output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;

+  }

+}

+#endif

 void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) {

   int16_t out[8 * 8];

--- a/vp9/common/vp9_invtrans.c

+++ b/vp9/common/vp9_invtrans.c

@@ -91,8 +91,13 @@

   for (i = 0; i < 9; i += 8) {

     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

+#if CONFIG_INTHT

+      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,

+                           tx_type, 32);

+#else

       vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,

                  xd->block[i].eob);

+#endif

     } else {

       vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],

                                   &blockd[i].diff[0], 32);

@@ -101,8 +106,13 @@

   for (i = 2; i < 11; i += 8) {

     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

+#if CONFIG_INTHT

+      vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,

+                           tx_type, 32);

+#else

       vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,

                  xd->block[i + 2].eob);

+#endif

     } else {

       vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],

                                   &blockd[i].diff[0], 32);

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -411,6 +411,11 @@

 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"

 specialize vp9_short_idct1_32x32

+#if CONFIG_INTHT

+prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int tx_type, int pitch"

+specialize vp9_short_iht8x8

+#endif

 prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"

 specialize vp9_ihtllm

--- a/vp9/decoder/vp9_dequantize.c

+++ b/vp9/decoder/vp9_dequantize.c

@@ -92,8 +92,11 @@

       input[i] = dq[1] * input[i];

+#if CONFIG_INTHT

+    vp9_short_iht8x8(input, output, tx_type, 16);

+#else

     vp9_ihtllm(input, output, 16, tx_type, 8, eobs);

+#endif

     vpx_memset(input, 0, 128);

     add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);

--- a/vp9/encoder/vp9_dct.c

+++ b/vp9/encoder/vp9_dct.c

@@ -104,7 +104,27 @@

    16069, -13623,   9102,  -3196

};

+#if CONFIG_INTHT

 static const int16_t adst_i8[64] = {

+   1606,    4756,     7723,    10394,

+  12665,   14449,    15678,    16305,

+   4756,   12665,    16305,    14449,

+   7723,   -1606,   -10394,   -15678,

+   7723,   16305,    10394,    -4756,

+ -15678,  -12665,     1606,    14449,

+  10394,   14449,    -4756,   -16305,

+  -1606,   15678,     7723,   -12665,

+  12665,    7723,   -15678,    -1606,

+  16305,   -4756,   -14449,    10394,

+  14449,   -1606,   -12665,    15678,

+  -4756,  -10394,    16305,    -7723,

+  15678,  -10394,     1606,     7723,

+ -14449,   16305,   -12665,     4756,

+  16305,  -15678,    14449,   -12665,

+  10394,   -7723,     4756,    -1606

+};

+#else

+static const int16_t adst_i8[64] = {

     2921,   5742,   8368,  10708,

    12684,  14228,  15288,  15827,

     8368,  14228,  15827,  12684,

@@ -122,6 +142,7 @@

     5742, -10708,  14228, -15827,

    15288, -12684,   8368,  -2921

};

+#endif

 static const float dct_16[256] = {

   0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,

@@ -229,7 +250,43 @@

    11529, -11086,  10217,  -8955,   7350,  -5461,   3363,  -1136

};

+#if CONFIG_INTHT

 static const int16_t adst_i16[256] = {

+     568,    1700,    2815,    3903,    4953,    5956,    6901,    7780,

+    8584,    9305,    9937,   10473,   10908,   11238,   11459,   11571,

+    1700,    4953,    7780,    9937,   11238,   11571,   10908,    9305,

+    6901,    3903,     568,   -2815,   -5956,   -8584,  -10473,  -11459,

+    2815,    7780,   10908,   11459,    9305,    4953,    -568,   -5956,

+   -9937,  -11571,  -10473,   -6901,   -1700,    3903,    8584,   11238,

+    3903,    9937,   11459,    7780,     568,   -6901,  -11238,  -10473,

+   -4953,    2815,    9305,   11571,    8584,    1700,   -5956,  -10908,

+    4953,   11238,    9305,     568,   -8584,  -11459,   -5956,    3903,

+   10908,    9937,    1700,   -7780,  -11571,   -6901,    2815,   10473,

+    5956,   11571,    4953,   -6901,  -11459,   -3903,    7780,   11238,

+    2815,   -8584,  -10908,   -1700,    9305,   10473,     568,   -9937,

+    6901,   10908,    -568,  -11238,   -5956,    7780,   10473,   -1700,

+  -11459,   -4953,    8584,    9937,   -2815,  -11571,   -3903,    9305,

+    7780,    9305,   -5956,  -10473,    3903,   11238,   -1700,  -11571,

+    -568,   11459,    2815,  -10908,   -4953,    9937,    6901,   -8584,

+    8584,    6901,   -9937,   -4953,   10908,    2815,  -11459,    -568,

+   11571,   -1700,  -11238,    3903,   10473,   -5956,   -9305,    7780,

+    9305,    3903,  -11571,    2815,    9937,   -8584,   -4953,   11459,

+   -1700,  -10473,    7780,    5956,  -11238,     568,   10908,   -6901,

+    9937,     568,  -10473,    9305,    1700,  -10908,    8584,    2815,

+  -11238,    7780,    3903,  -11459,    6901,    4953,  -11571,    5956,

+   10473,   -2815,   -6901,   11571,   -7780,   -1700,    9937,  -10908,

+    3903,    5956,  -11459,    8584,     568,   -9305,   11238,   -4953,

+   10908,   -5956,   -1700,    8584,  -11571,    9305,   -2815,   -4953,

+   10473,  -11238,    6901,     568,   -7780,   11459,   -9937,    3903,

+   11238,   -8584,    3903,    1700,   -6901,   10473,  -11571,    9937,

+   -5956,     568,    4953,   -9305,   11459,  -10908,    7780,   -2815,

+   11459,  -10473,    8584,   -5956,    2815,     568,   -3903,    6901,

+   -9305,   10908,  -11571,   11238,   -9937,    7780,   -4953,    1700,

+   11571,  -11459,   11238,  -10908,   10473,   -9937,    9305,   -8584,

+    7780,   -6901,    5956,   -4953,    3903,   -2815,    1700,    -568

+};

+#else

+static const int16_t adst_i16[256] = {

     1084,   2159,   3214,   4240,   5228,   6168,   7052,   7873,

     8622,   9293,   9880,  10377,  10781,  11087,  11292,  11395,

     3214,   6168,   8622,  10377,  11292,  11292,  10377,   8622,

@@ -263,6 +320,7 @@

     2159,  -4240,   6168,  -7873,   9293, -10377,  11087, -11395,

    11292, -10781,   9880,  -8622,   7052,  -5228,   3214,  -1084

};

+#endif

 static const int xC1S7 = 16069;

 static const int xC2S6 = 15137;

--- a/vp9/encoder/vp9_encodeintra.c

+++ b/vp9/encoder/vp9_encodeintra.c

@@ -152,8 +152,14 @@

       vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,

                 tx_type, 8);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

+#if CONFIG_INTHT

+      vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,

+                            tx_type, 32);

+#else

       vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,

                    tx_type, 8, xd->block[idx].eob);

+#endif

     } else {

       x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

--- a/vpxenc.c

+++ b/vpxenc.c

@@ -2472,7 +2472,6 @@

         " and --passes=2\n", stream->index, global.pass);

});

     /* Use the frame rate from the file only if none was specified

      * on the command-line.

*/

--

⑨