shithub: dav1d

Download patch

ref: 14072e733465b034644dd08cfaffb3bf7ac0a310
parent: 8bfd7f2f06dc413180f7c7c795ee3b801df68601
author: Henrik Gramner <gramner@twoorioles.com>
date: Sun Sep 23 13:20:46 EDT 2018

Downshift mc subpel multiplier constants

Downshift all the constants by one, and reduce the rounding shift by one.
This is mathematically equivalent since all constants are a multiple of two,
but allows for using 16-bit intermediates in the 1st pass of the 8-tap filter.

--- a/src/mc.c
+++ b/src/mc.c
@@ -102,7 +102,7 @@
             src -= src_stride * 3;
             do {
                 for (int x = 0; x < w; x++)
-                    mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+                    mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
 
                 mid_ptr += 128;
                 src += src_stride;
@@ -111,7 +111,7 @@
             mid_ptr = mid + 128 * 3;
             do {
                 for (int x = 0; x < w; x++)
-                    dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 11);
+                    dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10);
 
                 mid_ptr += 128;
                 dst += dst_stride;
@@ -119,7 +119,7 @@
         } else {
             do {
                 for (int x = 0; x < w; x++) {
-                    const int px = FILTER_8TAP_RND(src, x, fh, 1, 3);
+                    const int px = FILTER_8TAP_RND(src, x, fh, 1, 2);
                     dst[x] = iclip_pixel((px + 8) >> 4);
                 }
 
@@ -130,7 +130,7 @@
     } else if (fv) {
         do {
             for (int x = 0; x < w; x++)
-                dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 7);
+                dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 6);
 
             dst += dst_stride;
             src += src_stride;
@@ -155,7 +155,7 @@
             src -= src_stride * 3;
             do {
                 for (int x = 0; x < w; x++)
-                    mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+                    mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
 
                 mid_ptr += 128;
                 src += src_stride;
@@ -164,7 +164,7 @@
             mid_ptr = mid + 128 * 3;
             do {
                 for (int x = 0; x < w; x++)
-                    tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 7);
+                    tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6);
 
                 mid_ptr += 128;
                 tmp += w;
@@ -172,7 +172,7 @@
         } else {
             do {
                 for (int x = 0; x < w; x++)
-                    tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+                    tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
 
                 tmp += w;
                 src += src_stride;
@@ -181,7 +181,7 @@
     } else if (fv) {
         do {
             for (int x = 0; x < w; x++)
-                tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 3);
+                tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 2);
 
             tmp += w;
             src += src_stride;
--- a/src/tables.c
+++ b/src/tables.c
@@ -29,6 +29,8 @@
 
 #include <stdint.h>
 
+#include "common/attributes.h"
+
 #include "src/levels.h"
 #include "src/tables.h"
 
@@ -562,89 +564,88 @@
   293,  273,  256,  241,  228, 216, 205, 195, 186, 178, 171, 164,
 };
 
-const int8_t dav1d_mc_subpel_filters[5][15][8] = {
+ALIGN(const int8_t dav1d_mc_subpel_filters[5][15][8], 8 ) = {
     [FILTER_8TAP_REGULAR] = {
-        { 0, 2,  -6, 126,   8,  -2, 0, 0 },
-        { 0, 2, -10, 122,  18,  -4, 0, 0 },
-        { 0, 2, -12, 116,  28,  -8, 2, 0 },
-        { 0, 2, -14, 110,  38, -10, 2, 0 },
-        { 0, 2, -14, 102,  48, -12, 2, 0 },
-        { 0, 2, -16,  94,  58, -12, 2, 0 },
-        { 0, 2, -14,  84,  66, -12, 2, 0 },
-        { 0, 2, -14,  76,  76, -14, 2, 0 },
-        { 0, 2, -12,  66,  84, -14, 2, 0 },
-        { 0, 2, -12,  58,  94, -16, 2, 0 },
-        { 0, 2, -12,  48, 102, -14, 2, 0 },
-        { 0, 2, -10,  38, 110, -14, 2, 0 },
-        { 0, 2,  -8,  28, 116, -12, 2, 0 },
-        { 0, 0,  -4,  18, 122, -10, 2, 0 },
-        { 0, 0,  -2,   8, 126,  -6, 2, 0 }
-    }, [FILTER_8TAP_SHARP] = {
-        { -2,  2,  -6, 126,   8,  -2,  2,  0 },
-        { -2,  6, -12, 124,  16,  -6,  4, -2 },
-        { -2,  8, -18, 120,  26, -10,  6, -2 },
-        { -4, 10, -22, 116,  38, -14,  6, -2 },
-        { -4, 10, -22, 108,  48, -18,  8, -2 },
-        { -4, 10, -24, 100,  60, -20,  8, -2 },
-        { -4, 10, -24,  90,  70, -22, 10, -2 },
-        { -4, 12, -24,  80,  80, -24, 12, -4 },
-        { -2, 10, -22,  70,  90, -24, 10, -4 },
-        { -2,  8, -20,  60, 100, -24, 10, -4 },
-        { -2,  8, -18,  48, 108, -22, 10, -4 },
-        { -2,  6, -14,  38, 116, -22, 10, -4 },
-        { -2,  6, -10,  26, 120, -18,  8, -2 },
-        { -2,  4,  -6,  16, 124, -12,  6, -2 },
-        {  0,  2,  -2,   8, 126,  -6,  2, -2 }
+        {   0,   1,  -3,  63,   4,  -1,   0,   0 },
+        {   0,   1,  -5,  61,   9,  -2,   0,   0 },
+        {   0,   1,  -6,  58,  14,  -4,   1,   0 },
+        {   0,   1,  -7,  55,  19,  -5,   1,   0 },
+        {   0,   1,  -7,  51,  24,  -6,   1,   0 },
+        {   0,   1,  -8,  47,  29,  -6,   1,   0 },
+        {   0,   1,  -7,  42,  33,  -6,   1,   0 },
+        {   0,   1,  -7,  38,  38,  -7,   1,   0 },
+        {   0,   1,  -6,  33,  42,  -7,   1,   0 },
+        {   0,   1,  -6,  29,  47,  -8,   1,   0 },
+        {   0,   1,  -6,  24,  51,  -7,   1,   0 },
+        {   0,   1,  -5,  19,  55,  -7,   1,   0 },
+        {   0,   1,  -4,  14,  58,  -6,   1,   0 },
+        {   0,   0,  -2,   9,  61,  -5,   1,   0 },
+        {   0,   0,  -1,   4,  63,  -3,   1,   0 }
     }, [FILTER_8TAP_SMOOTH] = {
-        { 0,  2, 28,  62, 34,  2,  0, 0 },
-        { 0,  0, 26,  62, 36,  4,  0, 0 },
-        { 0,  0, 22,  62, 40,  4,  0, 0 },
-        { 0,  0, 20,  60, 42,  6,  0, 0 },
-        { 0,  0, 18,  58, 44,  8,  0, 0 },
-        { 0,  0, 16,  56, 46, 10,  0, 0 },
-        { 0, -2, 16,  54, 48, 12,  0, 0 },
-        { 0, -2, 14,  52, 52, 14, -2, 0 },
-        { 0,  0, 12,  48, 54, 16, -2, 0 },
-        { 0,  0, 10,  46, 56, 16,  0, 0 },
-        { 0,  0,  8,  44, 58, 18,  0, 0 },
-        { 0,  0,  6,  42, 60, 20,  0, 0 },
-        { 0,  0,  4,  40, 62, 22,  0, 0 },
-        { 0,  0,  4,  36, 62, 26,  0, 0 },
-        { 0,  0,  2,  34, 62, 28,  2, 0 },
-    },
+        {   0,   1,  14,  31,  17,   1,   0,   0 },
+        {   0,   0,  13,  31,  18,   2,   0,   0 },
+        {   0,   0,  11,  31,  20,   2,   0,   0 },
+        {   0,   0,  10,  30,  21,   3,   0,   0 },
+        {   0,   0,   9,  29,  22,   4,   0,   0 },
+        {   0,   0,   8,  28,  23,   5,   0,   0 },
+        {   0,  -1,   8,  27,  24,   6,   0,   0 },
+        {   0,  -1,   7,  26,  26,   7,  -1,   0 },
+        {   0,   0,   6,  24,  27,   8,  -1,   0 },
+        {   0,   0,   5,  23,  28,   8,   0,   0 },
+        {   0,   0,   4,  22,  29,   9,   0,   0 },
+        {   0,   0,   3,  21,  30,  10,   0,   0 },
+        {   0,   0,   2,  20,  31,  11,   0,   0 },
+        {   0,   0,   2,  18,  31,  13,   0,   0 },
+        {   0,   0,   1,  17,  31,  14,   1,   0 }
+    }, [FILTER_8TAP_SHARP] = {
+        {  -1,   1,  -3,  63,   4,  -1,   1,   0 },
+        {  -1,   3,  -6,  62,   8,  -3,   2,  -1 },
+        {  -1,   4,  -9,  60,  13,  -5,   3,  -1 },
+        {  -2,   5, -11,  58,  19,  -7,   3,  -1 },
+        {  -2,   5, -11,  54,  24,  -9,   4,  -1 },
+        {  -2,   5, -12,  50,  30, -10,   4,  -1 },
+        {  -2,   5, -12,  45,  35, -11,   5,  -1 },
+        {  -2,   6, -12,  40,  40, -12,   6,  -2 },
+        {  -1,   5, -11,  35,  45, -12,   5,  -2 },
+        {  -1,   4, -10,  30,  50, -12,   5,  -2 },
+        {  -1,   4,  -9,  24,  54, -11,   5,  -2 },
+        {  -1,   3,  -7,  19,  58, -11,   5,  -2 },
+        {  -1,   3,  -5,  13,  60,  -9,   4,  -1 },
+        {  -1,   2,  -3,   8,  62,  -6,   3,  -1 },
+        {   0,   1,  -1,   4,  63,  -3,   1,  -1 }
     /* width <= 4 */
-    [3 + FILTER_8TAP_REGULAR] = {
-        { 0, 0,  -4, 126,   8,  -2, 0, 0 },
-        { 0, 0,  -8, 122,  18,  -4, 0, 0 },
-        { 0, 0, -10, 116,  28,  -6, 0, 0 },
-        { 0, 0, -12, 110,  38,  -8, 0, 0 },
-        { 0, 0, -12, 102,  48, -10, 0, 0 },
-        { 0, 0, -14,  94,  58, -10, 0, 0 },
-        { 0, 0, -12,  84,  66, -10, 0, 0 },
-        { 0, 0, -12,  76,  76, -12, 0, 0 },
-        { 0, 0, -10,  66,  84, -12, 0, 0 },
-        { 0, 0, -10,  58,  94, -14, 0, 0 },
-        { 0, 0, -10,  48, 102, -12, 0, 0 },
-        { 0, 0,  -8,  38, 110, -12, 0, 0 },
-        { 0, 0,  -6,  28, 116, -10, 0, 0 },
-        { 0, 0,  -4,  18, 122,  -8, 0, 0 },
-        { 0, 0,  -2,   8, 126,  -4, 0, 0 }
+    }, [3 + FILTER_8TAP_REGULAR] = {
+        {   0,   0,  -2,  63,   4,  -1,   0,   0 },
+        {   0,   0,  -4,  61,   9,  -2,   0,   0 },
+        {   0,   0,  -5,  58,  14,  -3,   0,   0 },
+        {   0,   0,  -6,  55,  19,  -4,   0,   0 },
+        {   0,   0,  -6,  51,  24,  -5,   0,   0 },
+        {   0,   0,  -7,  47,  29,  -5,   0,   0 },
+        {   0,   0,  -6,  42,  33,  -5,   0,   0 },
+        {   0,   0,  -6,  38,  38,  -6,   0,   0 },
+        {   0,   0,  -5,  33,  42,  -6,   0,   0 },
+        {   0,   0,  -5,  29,  47,  -7,   0,   0 },
+        {   0,   0,  -5,  24,  51,  -6,   0,   0 },
+        {   0,   0,  -4,  19,  55,  -6,   0,   0 },
+        {   0,   0,  -3,  14,  58,  -5,   0,   0 },
+        {   0,   0,  -2,   9,  61,  -4,   0,   0 },
+        {   0,   0,  -1,   4,  63,  -2,   0,   0 }
     }, [3 + FILTER_8TAP_SMOOTH] = {
-        { 0, 0, 30,  62, 34,  2, 0, 0 },
-        { 0, 0, 26,  62, 36,  4, 0, 0 },
-        { 0, 0, 22,  62, 40,  4, 0, 0 },
-        { 0, 0, 20,  60, 42,  6, 0, 0 },
-        { 0, 0, 18,  58, 44,  8, 0, 0 },
-        { 0, 0, 16,  56, 46, 10, 0, 0 },
-        { 0, 0, 14,  54, 48, 12, 0, 0 },
-        { 0, 0, 12,  52, 52, 12, 0, 0 },
-        { 0, 0, 12,  48, 54, 14, 0, 0 },
-        { 0, 0, 10,  46, 56, 16, 0, 0 },
-        { 0, 0,  8,  44, 58, 18, 0, 0 },
-        { 0, 0,  6,  42, 60, 20, 0, 0 },
-        { 0, 0,  4,  40, 62, 22, 0, 0 },
-        { 0, 0,  4,  36, 62, 26, 0, 0 },
-        { 0, 0,  2,  34, 62, 30, 0, 0 }
+        {   0,   0,  15,  31,  17,   1,   0,   0 },
+        {   0,   0,  13,  31,  18,   2,   0,   0 },
+        {   0,   0,  11,  31,  20,   2,   0,   0 },
+        {   0,   0,  10,  30,  21,   3,   0,   0 },
+        {   0,   0,   9,  29,  22,   4,   0,   0 },
+        {   0,   0,   8,  28,  23,   5,   0,   0 },
+        {   0,   0,   7,  27,  24,   6,   0,   0 },
+        {   0,   0,   6,  26,  26,   6,   0,   0 },
+        {   0,   0,   6,  24,  27,   7,   0,   0 },
+        {   0,   0,   5,  23,  28,   8,   0,   0 },
+        {   0,   0,   4,  22,  29,   9,   0,   0 },
+        {   0,   0,   3,  21,  30,  10,   0,   0 },
+        {   0,   0,   2,  20,  31,  11,   0,   0 },
+        {   0,   0,   2,  18,  31,  13,   0,   0 },
+        {   0,   0,   1,  17,  31,  15,   0,   0 }
     }
 };