ref: 56e6c66b498e4d71eae35af54b00bbf7d00ab4de
parent: 6b1b34177491351fb15613637bd1667ced34c5ac
author: Tero Rintaluoma <teror@google.com>
date: Wed Feb 13 10:18:41 EST 2013
Avoid division in intra prediction - Using multiplication and shifting instead of division in intra prediction. - Maximum absolute difference is 1 for division statements in d45, d27, d63 prediction modes. However, errors can cumulate for large block sizes when using already predicted values. - Maximum number of non-matching result values in loops using division are: 4x4 0/16 8x8 0/64 16x16 10/256 32x32 13/1024 64x64 122/4096 Overall PSNR derf: 0.005 yt: -0.022 std-hd: 0.021 hd: -0.006 Change-Id: I3979a02eb6351636442c1af1e23d6c4e6ec1d01d
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -18,6 +18,23 @@
* and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd).
*/
+/* Using multiplication and shifting instead of division in diagonal prediction.
+ * iscale table is calculated from ((1<<16) + (i+2)/2) / (i+2) and used as
+ * ((A + B) * iscale[i] + (1<<15)) >> 16;
+ * where A and B are weighted pixel values.
+ */
+static const unsigned int iscale[64] = {
+ 32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282,
+ 6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855,
+ 3641, 3449, 3277, 3121, 2979, 2849, 2731, 2621,
+ 2521, 2427, 2341, 2260, 2185, 2114, 2048, 1986,
+ 1928, 1872, 1820, 1771, 1725, 1680, 1638, 1598,
+ 1560, 1524, 1489, 1456, 1425, 1394, 1365, 1337,
+ 1311, 1285, 1260, 1237, 1214, 1192, 1170, 1150,
+ 1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008,
+};
+
+
static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c, h, w, v;
@@ -29,7 +46,7 @@
else
a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
b = yabove_row[c + 2];
- ypred_ptr[c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ ypred_ptr[c] = ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16;
}
for (r = 1; r < n / 2 - 1; r++) {
for (c = 0; c < n - 2 - 2 * r; c++) {
@@ -38,7 +55,8 @@
else
a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
b = ypred_ptr[(r - 1) * y_stride + c + 2];
- ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ ypred_ptr[r * y_stride + c] =
+ ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16;
}
}
for (; r < n - 1; ++r) {
@@ -77,7 +95,8 @@
else
a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
b = yleft_col[r + 2];
- ypred_ptr[r * y_stride] = (2 * a + (r + 1) * b + (r + 3) / 2) / (r + 3);
+ ypred_ptr[r * y_stride] = ((2 * a + (r + 1) * b) * iscale[1+r] +
+ (1<<15)) >> 16;
}
for (c = 1; c < n / 2 - 1; c++) {
for (r = 0; r < n - 2 - 2 * c; r++) {
@@ -86,7 +105,8 @@
else
a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
b = ypred_ptr[(r + 2) * y_stride + c - 1];
- ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ ypred_ptr[r * y_stride + c] = ((2 * a + (c + 1) * b) * iscale[1+c] +
+ (1<<15)) >> 16;
}
}
for (; c < n - 1; ++c) {
@@ -119,8 +139,8 @@
for (r = 0; r < n - 1; ++r) {
for (c = 0; c <= r; ++c) {
ypred_ptr[(r - c) * y_stride + c] =
- (yabove_row[r + 1] * (c + 1) +
- yleft_col[r + 1] * (r - c + 1) + r / 2 + 1) / (r + 2);
+ ((yabove_row[r + 1] * (c + 1) +
+ yleft_col[r + 1] * (r - c + 1)) * iscale[r] + (1<<15)) >> 16;
}
}
for (c = 0; c <= r; ++c) {
@@ -129,8 +149,8 @@
int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] -
// yleft_col[r-1]);
ypred_ptr[(r - c) * y_stride + c] =
- (yabove_ext * (c + 1) +
- yleft_ext * (r - c + 1) + r / 2 + 1) / (r + 2);
+ ((yabove_ext * (c + 1) +
+ yleft_ext * (r - c + 1)) * iscale[r] + (1<<15)) >> 16;
}
for (r = 1; r < n; ++r) {
for (c = n - r; c < n; ++c) {
--
⑨