ref: c244a862341826713c4ad17b1f7c637e8395cdfc
parent: e0b88b5c00b8026876da72e2f6c6fadf479d394d
author: Linfeng Zhang <linfengz@google.com>
date: Mon Mar 5 10:16:49 EST 2018
Fix a bug in vp9_iht8x8_64_add_neon() This bug was introduced in b14b616d. BUG=webm:1403 Change-Id: I84b2733734982e52b66548850d61758c772b5494
--- a/test/dct_test.cc
+++ b/test/dct_test.cc
@@ -639,10 +639,10 @@
#endif
#endif
{ &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
-// TODO(linfengz): reenable these functions once test vector failures are
-// addressed.
-#if 0
+ // TODO(linfengz): reenable these functions once test vector failures are
+ // addressed.
{ &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
+#if 0
{ &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 }
#endif
};
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -675,9 +675,8 @@
::testing::Values(make_tuple(&vpx_fdct8x8_neon,
&vpx_idct8x8_64_add_neon,
0, VPX_BITS_8)));
-// TODO(linfengz): reenable these functions once test vector failures are
-// addressed.
-#if 0 // !CONFIG_VP9_HIGHBITDEPTH
+
+#if !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, FwdTrans8x8HT,
::testing::Values(
--- a/vp9/common/arm/neon/vp9_iht_neon.h
+++ b/vp9/common/arm/neon/vp9_iht_neon.h
@@ -59,14 +59,17 @@
static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
const int16x4_t c) {
- const int16x8_t sum = vaddq_s16(x[0], x[1]);
- const int16x8_t sub = vsubq_s16(x[0], x[1]);
+ // Don't add/sub before multiply, which will overflow in iadst8.
+ const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0);
+ const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0);
+ const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0);
+ const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0);
int32x4_t t0[2], t1[2];
- t0[0] = vmull_lane_s16(vget_low_s16(sum), c, 0);
- t0[1] = vmull_lane_s16(vget_high_s16(sum), c, 0);
- t1[0] = vmull_lane_s16(vget_low_s16(sub), c, 0);
- t1[1] = vmull_lane_s16(vget_high_s16(sub), c, 0);
+ t0[0] = vaddq_s32(x0_lo, x1_lo);
+ t0[1] = vaddq_s32(x0_hi, x1_hi);
+ t1[0] = vsubq_s32(x0_lo, x1_lo);
+ t1[1] = vsubq_s32(x0_hi, x1_hi);
x[0] = dct_const_round_shift_low_8(t0);
x[1] = dct_const_round_shift_low_8(t1);
}
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -68,7 +68,7 @@
# Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off.
specialize qw/vp9_iht4x4_16_add neon sse2/;
- specialize qw/vp9_iht8x8_64_add sse2/;
+ specialize qw/vp9_iht8x8_64_add neon sse2/;
specialize qw/vp9_iht16x16_256_add sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.