ref: 48f5886605e95aa863e236a68602a1c4d6ced7fc
parent: c4e5c54d69920c07f5d421ba805da1a4c9c3e82d
author: Linfeng Zhang <linfengz@google.com>
date: Wed Mar 8 05:46:33 EST 2017
Add vpx_highbd_idct32x32_135_add_c() When eob is less than or equal to 135 for high-bitdepth 32x32 idct, call this function. BUG=webm:1301 Change-Id: I8a5864f5c076e449c984e602946547a7b09c9fe6
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -298,6 +298,15 @@
&highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 12, 2),
make_tuple(
&vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+ &highbd_wrapper<vpx_highbd_idct32x32_135_add_c>, TX_32X32, 135, 8, 2),
+ make_tuple(
+ &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+ &highbd_wrapper<vpx_highbd_idct32x32_135_add_c>, TX_32X32, 135, 10, 2),
+ make_tuple(
+ &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+ &highbd_wrapper<vpx_highbd_idct32x32_135_add_c>, TX_32X32, 135, 12, 2),
+ make_tuple(
+ &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
&highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 8, 2),
make_tuple(
&vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -363,6 +363,8 @@
vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
} else if (eob <= 34) {
vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
+ } else if (eob <= 135) {
+ vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
} else {
vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
}
--- a/vpx_dsp/inv_txfm.c
+++ b/vpx_dsp/inv_txfm.c
@@ -2569,6 +2569,35 @@
}
}
+void vpx_highbd_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ int i, j;
+ tran_low_t out[32 * 32] = { 0 };
+ tran_low_t *outptr = out;
+ tran_low_t temp_in[32], temp_out[32];
+ uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // Rows
+ // Only upper-left 16x16 has non-zero coeff
+ for (i = 0; i < 16; ++i) {
+ highbd_idct32_c(input, outptr, bd);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ uint16_t *destT = dest;
+ for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+ highbd_idct32_c(temp_in, temp_out, bd);
+ for (j = 0; j < 32; ++j) {
+ destT[i] = highbd_clip_pixel_add(destT[i],
+ ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ destT += stride;
+ }
+ }
+}
+
void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i, j;
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -628,6 +628,8 @@
add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+ add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
+
add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";