shithub: dav1d

Download patch

ref: 77c52be0076549020e75c259a10c6fd817b0996c
parent: 93c4bea2d45d7caf5cc6ab712d938dc6f74b98a2
author: Henrik Gramner <gramner@twoorioles.com>
date: Fri Oct 19 20:11:11 EDT 2018

Reorder the dav1d_filter_intra_taps array

Ordering the elements this way is more SIMD-friendly.

--- a/src/ipred.c
+++ b/src/ipred.c
@@ -553,7 +553,7 @@
     filt_idx &= 511;
     assert(filt_idx < 5);
 
-    const int8_t (*const filter)[8] = dav1d_filter_intra_taps[filt_idx];
+    const int8_t *const filter = dav1d_filter_intra_taps[filt_idx];
     int x, y;
     ptrdiff_t left_stride;
     const pixel *left, *topleft, *top;
@@ -568,19 +568,18 @@
             const int p1 = top[0], p2 = top[1], p3 = top[2], p4 = top[3];
             const int p5 = left[0 * left_stride], p6 = left[1 * left_stride];
             pixel *ptr = &dst[x];
-            const int8_t (*flt_ptr)[8] = filter;
+            const int8_t *flt_ptr = filter;
 
             for (int yy = 0; yy < 2; yy++) {
-                for (int xx = 0; xx < 4; xx++, flt_ptr++) {
-                    int acc = flt_ptr[0][0] * p0 + flt_ptr[0][1] * p1 +
-                              flt_ptr[0][2] * p2 + flt_ptr[0][3] * p3 +
-                              flt_ptr[0][4] * p4 + flt_ptr[0][5] * p5 +
-                              flt_ptr[0][6] * p6;
+                for (int xx = 0; xx < 4; xx++, flt_ptr += 2) {
+                    int acc = flt_ptr[ 0] * p0 + flt_ptr[ 1] * p1 +
+                              flt_ptr[16] * p2 + flt_ptr[17] * p3 +
+                              flt_ptr[32] * p4 + flt_ptr[33] * p5 +
+                              flt_ptr[48] * p6;
                     ptr[xx] = iclip_pixel((acc + 8) >> 4);
                 }
                 ptr += PXSTRIDE(stride);
             }
-
             left = &dst[x + 4 - 1];
             left_stride = PXSTRIDE(stride);
             top += 4;
--- a/src/tables.c
+++ b/src/tables.c
@@ -781,51 +781,51 @@
        3, 0, 0,       // 87, ...
 };
 
-const int8_t dav1d_filter_intra_taps[5][8][8] = {
+const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 16) = {
     {
-        {  -6, 10,  0,  0,  0, 12,  0,  0 },
-        {  -5,  2, 10,  0,  0,  9,  0,  0 },
-        {  -3,  1,  1, 10,  0,  7,  0,  0 },
-        {  -3,  1,  1,  2, 10,  5,  0,  0 },
-        {  -4,  6,  0,  0,  0,  2, 12,  0 },
-        {  -3,  2,  6,  0,  0,  2,  9,  0 },
-        {  -3,  2,  2,  6,  0,  2,  7,  0 },
-        {  -3,  1,  2,  2,  6,  3,  5,  0 },
+         -6,  10,  -5,   2,  -3,   1,  -3,   1,
+         -4,   6,  -3,   2,  -3,   2,  -3,   1,
+          0,   0,  10,   0,   1,  10,   1,   2,
+          0,   0,   6,   0,   2,   6,   2,   2,
+          0,  12,   0,   9,   0,   7,  10,   5,
+          0,   2,   0,   2,   0,   2,   6,   3,
+          0,   0,   0,   0,   0,   0,   0,   0,
+         12,   0,   9,   0,   7,   0,   5,   0
     }, {
-        { -10, 16,  0,  0,  0, 10,  0,  0 },
-        {  -6,  0, 16,  0,  0,  6,  0,  0 },
-        {  -4,  0,  0, 16,  0,  4,  0,  0 },
-        {  -2,  0,  0,  0, 16,  2,  0,  0 },
-        { -10, 16,  0,  0,  0,  0, 10,  0 },
-        {  -6,  0, 16,  0,  0,  0,  6,  0 },
-        {  -4,  0,  0, 16,  0,  0,  4,  0 },
-        {  -2,  0,  0,  0, 16,  0,  2,  0 },
+        -10,  16,  -6,   0,  -4,   0,  -2,   0,
+        -10,  16,  -6,   0,  -4,   0,  -2,   0,
+          0,   0,  16,   0,   0,  16,   0,   0,
+          0,   0,  16,   0,   0,  16,   0,   0,
+          0,  10,   0,   6,   0,   4,  16,   2,
+          0,   0,   0,   0,   0,   0,  16,   0,
+          0,   0,   0,   0,   0,   0,   0,   0,
+         10,   0,   6,   0,   4,   0,   2,   0
     }, {
-        {  -8,  8,  0,  0,  0, 16,  0,  0 },
-        {  -8,  0,  8,  0,  0, 16,  0,  0 },
-        {  -8,  0,  0,  8,  0, 16,  0,  0 },
-        {  -8,  0,  0,  0,  8, 16,  0,  0 },
-        {  -4,  4,  0,  0,  0,  0, 16,  0 },
-        {  -4,  0,  4,  0,  0,  0, 16,  0 },
-        {  -4,  0,  0,  4,  0,  0, 16,  0 },
-        {  -4,  0,  0,  0,  4,  0, 16,  0 },
+         -8,   8,  -8,   0,  -8,   0,  -8,   0,
+         -4,   4,  -4,   0,  -4,   0,  -4,   0,
+          0,   0,   8,   0,   0,   8,   0,   0,
+          0,   0,   4,   0,   0,   4,   0,   0,
+          0,  16,   0,  16,   0,  16,   8,  16,
+          0,   0,   0,   0,   0,   0,   4,   0,
+          0,   0,   0,   0,   0,   0,   0,   0,
+         16,   0,  16,   0,  16,   0,  16,   0
     }, {
-        {  -2,  8,  0,  0,  0, 10,  0,  0 },
-        {  -1,  3,  8,  0,  0,  6,  0,  0 },
-        {  -1,  2,  3,  8,  0,  4,  0,  0 },
-        {   0,  1,  2,  3,  8,  2,  0,  0 },
-        {  -1,  4,  0,  0,  0,  3, 10,  0 },
-        {  -1,  3,  4,  0,  0,  4,  6,  0 },
-        {  -1,  2,  3,  4,  0,  4,  4,  0 },
-        {  -1,  2,  2,  3,  4,  3,  3,  0 },
+         -2,   8,  -1,   3,  -1,   2,   0,   1,
+         -1,   4,  -1,   3,  -1,   2,  -1,   2,
+          0,   0,   8,   0,   3,   8,   2,   3,
+          0,   0,   4,   0,   3,   4,   2,   3,
+          0,  10,   0,   6,   0,   4,   8,   2,
+          0,   3,   0,   4,   0,   4,   4,   3,
+          0,   0,   0,   0,   0,   0,   0,   0,
+         10,   0,   6,   0,   4,   0,   3,   0
     }, {
-        { -12, 14,  0,  0,  0, 14,  0,  0 },
-        { -10,  0, 14,  0,  0, 12,  0,  0 },
-        {  -9,  0,  0, 14,  0, 11,  0,  0 },
-        {  -8,  0,  0,  0, 14, 10,  0,  0 },
-        { -10, 12,  0,  0,  0,  0, 14,  0 },
-        {  -9,  1, 12,  0,  0,  0, 12,  0 },
-        {  -8,  0,  0, 12,  0,  1, 11,  0 },
-        {  -7,  0,  0,  1, 12,  1,  9,  0 },
+        -12,  14, -10,   0,  -9,   0,  -8,   0,
+        -10,  12,  -9,   1,  -8,   0,  -7,   0,
+          0,   0,  14,   0,   0,  14,   0,   0,
+          0,   0,  12,   0,   0,  12,   0,   1,
+          0,  14,   0,  12,   0,  11,  14,  10,
+          0,   0,   0,   0,   0,   1,  12,   1,
+          0,   0,   0,   0,   0,   0,   0,   0,
+         14,   0,  12,   0,  11,   0,   9,   0
     }
 };
--- a/src/tables.h
+++ b/src/tables.h
@@ -115,6 +115,6 @@
 
 extern const uint8_t dav1d_sm_weights[128];
 extern const int16_t dav1d_dr_intra_derivative[90];
-extern const int8_t dav1d_filter_intra_taps[5][8][8];
+extern const int8_t dav1d_filter_intra_taps[5][64];
 
 #endif /* __DAV1D_SRC_TABLES_H__ */