shithub: dav1d

Download patch

ref: f813285c1d1a5421e0180efbb7cbdd377cd31c69
parent: a440af4a51abf484b637ef936872dd378f40d86a
author: Henrik Gramner <gramner@twoorioles.com>
date: Sun Jan 13 18:04:53 EST 2019

Shrink dav1d_dr_intra_derivative[]

--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -422,7 +422,7 @@
     const int enable_intra_edge_filter = angle >> 10;
     angle &= 511;
     assert(angle < 90);
-    int dx = dav1d_dr_intra_derivative[angle];
+    int dx = dav1d_dr_intra_derivative[angle >> 1];
     pixel top_out[(64 + 64) * 2];
     const pixel *top;
     int max_base_x;
@@ -476,8 +476,8 @@
     const int enable_intra_edge_filter = angle >> 10;
     angle &= 511;
     assert(angle > 90 && angle < 180);
-    int dy = dav1d_dr_intra_derivative[angle - 90];
-    int dx = dav1d_dr_intra_derivative[180 - angle];
+    int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1];
+    int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1];
     const int upsample_left = enable_intra_edge_filter ?
         get_upsample(width + height, 180 - angle, is_sm) : 0;
     const int upsample_above = enable_intra_edge_filter ?
@@ -557,7 +557,7 @@
     const int enable_intra_edge_filter = angle >> 10;
     angle &= 511;
     assert(angle > 180);
-    int dy = dav1d_dr_intra_derivative[270 - angle];
+    int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1];
     pixel left_out[(64 + 64) * 2];
     const pixel *left;
     int max_base_y;
--- a/src/tables.c
+++ b/src/tables.c
@@ -775,37 +775,36 @@
       7,   6,   6,   5,   5,   4,   4,   4
 };
 
-const int16_t dav1d_dr_intra_derivative[90] = {
-    // More evenly spread out angles and limited to 10-bit
+const uint16_t dav1d_dr_intra_derivative[44] = {
     // Values that are 0 will never be used
-       0, 0, 0,       // Approx angle
-    1023, 0, 0,       // 3, ...
-     547, 0, 0,       // 6, ...
-     372, 0, 0, 0, 0, // 9, ...
-     273, 0, 0,       // 14, ...
-     215, 0, 0,       // 17, ...
-     178, 0, 0,       // 20, ...
-     151, 0, 0,       // 23, ... (113 & 203 are base angles)
-     132, 0, 0,       // 26, ...
-     116, 0, 0,       // 29, ...
-     102, 0, 0, 0,    // 32, ...
-      90, 0, 0,       // 36, ...
-      80, 0, 0,       // 39, ...
-      71, 0, 0,       // 42, ...
-      64, 0, 0,       // 45, ... (45 & 135 are base angles)
-      57, 0, 0,       // 48, ...
-      51, 0, 0,       // 51, ...
-      45, 0, 0, 0,    // 54, ...
-      40, 0, 0,       // 58, ...
-      35, 0, 0,       // 61, ...
-      31, 0, 0,       // 64, ...
-      27, 0, 0,       // 67, ... (67 & 157 are base angles)
-      23, 0, 0,       // 70, ...
-      19, 0, 0,       // 73, ...
-      15, 0, 0, 0, 0, // 76, ...
-      11, 0, 0,       // 81, ...
-       7, 0, 0,       // 84, ...
-       3, 0, 0,       // 87, ...
+          0,    // Angles:
+    1023, 0,    //  3,  93, 183
+     547,       //  6,  96, 186
+     372, 0, 0, //  9,  99, 189
+     273,       // 14, 104, 194
+     215, 0,    // 17, 107, 197
+     178,       // 20, 110, 200
+     151, 0,    // 23, 113, 203 (113 & 203 are base angles)
+     132,       // 26, 116, 206
+     116, 0,    // 29, 119, 209
+     102, 0,    // 32, 122, 212
+      90,       // 36, 126, 216
+      80, 0,    // 39, 129, 219
+      71,       // 42, 132, 222
+      64, 0,    // 45, 135, 225 (45 & 135 are base angles)
+      57,       // 48, 138, 228
+      51, 0,    // 51, 141, 231
+      45, 0,    // 54, 144, 234
+      40,       // 58, 148, 238
+      35, 0,    // 61, 151, 241
+      31,       // 64, 154, 244
+      27, 0,    // 67, 157, 247 (67 & 157 are base angles)
+      23,       // 70, 160, 250
+      19, 0,    // 73, 163, 253
+      15, 0,    // 76, 166, 256
+      11, 0,    // 81, 171, 261
+       7,       // 84, 174, 264
+       3        // 87, 177, 267
 };
 
 const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 16) = {
--- a/src/tables.h
+++ b/src/tables.h
@@ -114,7 +114,7 @@
 extern const int16_t dav1d_resize_filter[64][8];
 
 extern const uint8_t dav1d_sm_weights[128];
-extern const int16_t dav1d_dr_intra_derivative[90];
+extern const uint16_t dav1d_dr_intra_derivative[44];
 extern const int8_t dav1d_filter_intra_taps[5][64];
 
 extern const uint8_t dav1d_obmc_masks[64];
--- a/src/x86/ipred.asm
+++ b/src/x86/ipred.asm
@@ -1308,9 +1308,10 @@
     inc                 tlq
     movsxd               wq, [r6+wq*4]
     add                  wq, r6
-    movzx               dxd, angleb
+    mov                 dxd, angled
+    and                 dxd, 0x7e
     add              angled, 165 ; ~90
-    movzx               dxd, word [r7+dxq*2]
+    movzx               dxd, word [r7+dxq]
     xor              angled, 0x4ff ; d = 90 - angle
     vpbroadcastd         m3, [pw_512]
     vpbroadcastd         m4, [pw_62]
@@ -2130,15 +2131,16 @@
     lea                  r6, [ipred_z3_avx2_table]
     tzcnt                hd, hm
     movifnidn        angled, anglem
-    lea                  r7, [dr_intra_derivative+90*2]
+    lea                  r7, [dr_intra_derivative+45*2-1]
     dec                 tlq
     movsxd               hq, [r6+hq*4]
     sub              angled, 180
     add                  hq, r6
-    movzx               dyd, angleb
+    mov                 dyd, angled
+    neg                 dyd
     xor              angled, 0x400
-    neg                 dyq
-    movzx               dyd, word [r7+dyq*2]
+    or                  dyq, ~0x7e
+    movzx               dyd, word [r7+dyq]
     vpbroadcastd         m3, [pw_512]
     vpbroadcastd         m4, [pw_62]
     vpbroadcastd         m5, [pw_64]