ref: a80bdfd0813b9a05252c72796ddbc55bc1e48588
parent: 9d278465b5a1107f5e91eb63161cbcc0e6c118d3
author: Linfeng Zhang <linfengz@google.com>
date: Tue Sep 12 11:24:54 EDT 2017
Change sinpi_{1,2,3,4}_9 from tran_high_t to int16_t Add "typedef int16_t tran_coef_t;" BUG=webm:1450 Change-Id: I67866f104898d1dda8989e1abdaf6983fe324154
--- a/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
+++ b/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
@@ -14,14 +14,7 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
-
-static int16_t sinpi_1_9 = 0x14a3;
-static int16_t sinpi_2_9 = 0x26c9;
-static int16_t sinpi_3_9 = 0x3441;
-static int16_t sinpi_4_9 = 0x3b6c;
-static int16_t cospi_8_64 = 0x3b21;
-static int16_t cospi_16_64 = 0x2d41;
-static int16_t cospi_24_64 = 0x187e;
+#include "vpx_dsp/txfm_common.h"
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
int32x4_t q8s32, q9s32;
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -246,13 +246,14 @@
return;
}
- s0 = sinpi_1_9 * x0;
- s1 = sinpi_4_9 * x0;
- s2 = sinpi_2_9 * x1;
- s3 = sinpi_1_9 * x1;
- s4 = sinpi_3_9 * x2;
- s5 = sinpi_4_9 * x3;
- s6 = sinpi_2_9 * x3;
+ // 32-bit result is enough for the following multiplications.
+ s0 = sinpi_1_9 * input[0];
+ s1 = sinpi_4_9 * input[0];
+ s2 = sinpi_2_9 * input[1];
+ s3 = sinpi_1_9 * input[1];
+ s4 = sinpi_3_9 * input[2];
+ s5 = sinpi_4_9 * input[3];
+ s6 = sinpi_2_9 * input[3];
s7 = x0 + x1 - x3;
x0 = s0 + s2 + s5;
--- a/vpx_dsp/inv_txfm.c
+++ b/vpx_dsp/inv_txfm.c
@@ -105,6 +105,7 @@
return;
}
+ // 32-bit result is enough for the following multiplications.
s0 = sinpi_1_9 * x0;
s1 = sinpi_2_9 * x0;
s2 = sinpi_3_9 * x1;
@@ -1390,13 +1391,13 @@
return;
}
- s0 = sinpi_1_9 * x0;
- s1 = sinpi_2_9 * x0;
- s2 = sinpi_3_9 * x1;
- s3 = sinpi_4_9 * x2;
- s4 = sinpi_1_9 * x2;
- s5 = sinpi_2_9 * x3;
- s6 = sinpi_4_9 * x3;
+ s0 = (tran_high_t)sinpi_1_9 * x0;
+ s1 = (tran_high_t)sinpi_2_9 * x0;
+ s2 = (tran_high_t)sinpi_3_9 * x1;
+ s3 = (tran_high_t)sinpi_4_9 * x2;
+ s4 = (tran_high_t)sinpi_1_9 * x2;
+ s5 = (tran_high_t)sinpi_2_9 * x3;
+ s6 = (tran_high_t)sinpi_4_9 * x3;
s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd);
s0 = s0 + s3 + s5;
--- a/vpx_dsp/mips/itrans4_dspr2.c
+++ b/vpx_dsp/mips/itrans4_dspr2.c
@@ -343,6 +343,7 @@
return;
}
+ // 32-bit result is enough for the following multiplications.
s0 = sinpi_1_9 * x0;
s1 = sinpi_2_9 * x0;
s2 = sinpi_3_9 * x1;
--- a/vpx_dsp/txfm_common.h
+++ b/vpx_dsp/txfm_common.h
@@ -58,9 +58,9 @@
static const tran_high_t cospi_31_64 = 804;
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
-static const tran_high_t sinpi_1_9 = 5283;
-static const tran_high_t sinpi_2_9 = 9929;
-static const tran_high_t sinpi_3_9 = 13377;
-static const tran_high_t sinpi_4_9 = 15212;
+static const tran_coef_t sinpi_1_9 = 5283;
+static const tran_coef_t sinpi_2_9 = 9929;
+static const tran_coef_t sinpi_3_9 = 13377;
+static const tran_coef_t sinpi_4_9 = 15212;
#endif // VPX_DSP_TXFM_COMMON_H_
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -43,6 +43,8 @@
typedef int16_t tran_low_t;
#endif // CONFIG_VP9_HIGHBITDEPTH
+typedef int16_t tran_coef_t;
+
static INLINE uint8_t clip_pixel(int val) {
return (val > 255) ? 255 : (val < 0) ? 0 : val;
}