ref: cc80247f16ce83271e5c2043307dc65c8bb4bbf7
parent: 89ac94f8fb7be1ce9baee198954a890941ecf936
author: Yunqing Wang <yunqingwang@google.com>
date: Thu Dec 27 08:48:17 EST 2012
Switch the order of calculating 2-D inverse transform The 2-D inverse transform X = M1*Z*Transposed_M2 was calculated in 2 steps from left to right: 1. Vertical transform: Y = M1*Z 2. Horizontal transform: X= Y*Transposed_M2 In SIMD, a transpose is needed in vertical transform. Here, switched the calculation order to do it from right to left. In this way, we could eliminate that transpose by writing the intermediate results out to their transposed positions. Change-Id: I34dfe5eb01292f6e363712420d99475e2e81e12c
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -399,10 +399,10 @@
}
/* Converted the transforms to integer form. */
-#define VERTICAL_SHIFT 14 // 16
-#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
-#define HORIZONTAL_SHIFT 17 // 15
+#define HORIZONTAL_SHIFT 14 // 16
#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
+#define VERTICAL_SHIFT 17 // 15
+#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
TX_TYPE tx_type, int tx_dim) {
int i, j, k;
@@ -444,41 +444,47 @@
break;
}
- /* vertical transformation */
+ /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps
+ * from right to left:
+ * 1. horizontal transform: Y= Z*Transposed_M2
+ * 2. vertical transform: X = M1*Y
+ * In SIMD, doing this way could eliminate the transpose needed if it is
+ * calculated from left to right.
+ */
+ /* Horizontal transformation */
for (j = 0; j < tx_dim; j++) {
for (i = 0; i < tx_dim; i++) {
int temp = 0;
for (k = 0; k < tx_dim; k++) {
- temp += ptv[k] * ip[(k * tx_dim)];
+ temp += ip[k] * pth[k];
}
- im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
- ip++;
+ /* Calculate im and store it in its transposed position. */
+ im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
+ ip += tx_dim;
}
- im += tx_dim; // 16
- ptv += tx_dim;
+ im += tx_dim;
+ pth += tx_dim;
ip = input;
}
- /* horizontal transformation */
+ /* Vertical transformation */
im = &imbuf[0];
- for (j = 0; j < tx_dim; j++) {
- const int16_t *pthc = pth;
-
- for (i = 0; i < tx_dim; i++) {
+ for (i = 0; i < tx_dim; i++) {
+ for (j = 0; j < tx_dim; j++) {
int temp = 0;
for (k = 0; k < tx_dim; k++) {
- temp += im[k] * pthc[k];
+ temp += ptv[k] * im[k];
}
- op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
- pthc += tx_dim;
+ op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
+ im += tx_dim;
}
-
- im += tx_dim; // 16
+ im = &imbuf[0];
+ ptv += tx_dim;
op += shortpitch;
}
}
--
⑨