ref: f813285c1d1a5421e0180efbb7cbdd377cd31c69
parent: a440af4a51abf484b637ef936872dd378f40d86a
author: Henrik Gramner <gramner@twoorioles.com>
date: Sun Jan 13 18:04:53 EST 2019
Shrink dav1d_dr_intra_derivative[]
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -422,7 +422,7 @@
const int enable_intra_edge_filter = angle >> 10;
angle &= 511;
assert(angle < 90);
- int dx = dav1d_dr_intra_derivative[angle];
+ int dx = dav1d_dr_intra_derivative[angle >> 1];
pixel top_out[(64 + 64) * 2];
const pixel *top;
int max_base_x;
@@ -476,8 +476,8 @@
const int enable_intra_edge_filter = angle >> 10;
angle &= 511;
assert(angle > 90 && angle < 180);
- int dy = dav1d_dr_intra_derivative[angle - 90];
- int dx = dav1d_dr_intra_derivative[180 - angle];
+ int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1];
+ int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1];
const int upsample_left = enable_intra_edge_filter ?
get_upsample(width + height, 180 - angle, is_sm) : 0;
const int upsample_above = enable_intra_edge_filter ?
@@ -557,7 +557,7 @@
const int enable_intra_edge_filter = angle >> 10;
angle &= 511;
assert(angle > 180);
- int dy = dav1d_dr_intra_derivative[270 - angle];
+ int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1];
pixel left_out[(64 + 64) * 2];
const pixel *left;
int max_base_y;
--- a/src/tables.c
+++ b/src/tables.c
@@ -775,37 +775,36 @@
7, 6, 6, 5, 5, 4, 4, 4
};
-const int16_t dav1d_dr_intra_derivative[90] = {
- // More evenly spread out angles and limited to 10-bit
+const uint16_t dav1d_dr_intra_derivative[44] = {
// Values that are 0 will never be used
- 0, 0, 0, // Approx angle
- 1023, 0, 0, // 3, ...
- 547, 0, 0, // 6, ...
- 372, 0, 0, 0, 0, // 9, ...
- 273, 0, 0, // 14, ...
- 215, 0, 0, // 17, ...
- 178, 0, 0, // 20, ...
- 151, 0, 0, // 23, ... (113 & 203 are base angles)
- 132, 0, 0, // 26, ...
- 116, 0, 0, // 29, ...
- 102, 0, 0, 0, // 32, ...
- 90, 0, 0, // 36, ...
- 80, 0, 0, // 39, ...
- 71, 0, 0, // 42, ...
- 64, 0, 0, // 45, ... (45 & 135 are base angles)
- 57, 0, 0, // 48, ...
- 51, 0, 0, // 51, ...
- 45, 0, 0, 0, // 54, ...
- 40, 0, 0, // 58, ...
- 35, 0, 0, // 61, ...
- 31, 0, 0, // 64, ...
- 27, 0, 0, // 67, ... (67 & 157 are base angles)
- 23, 0, 0, // 70, ...
- 19, 0, 0, // 73, ...
- 15, 0, 0, 0, 0, // 76, ...
- 11, 0, 0, // 81, ...
- 7, 0, 0, // 84, ...
- 3, 0, 0, // 87, ...
+ 0, // Angles:
+ 1023, 0, // 3, 93, 183
+ 547, // 6, 96, 186
+ 372, 0, 0, // 9, 99, 189
+ 273, // 14, 104, 194
+ 215, 0, // 17, 107, 197
+ 178, // 20, 110, 200
+ 151, 0, // 23, 113, 203 (113 & 203 are base angles)
+ 132, // 26, 116, 206
+ 116, 0, // 29, 119, 209
+ 102, 0, // 32, 122, 212
+ 90, // 36, 126, 216
+ 80, 0, // 39, 129, 219
+ 71, // 42, 132, 222
+ 64, 0, // 45, 135, 225 (45 & 135 are base angles)
+ 57, // 48, 138, 228
+ 51, 0, // 51, 141, 231
+ 45, 0, // 54, 144, 234
+ 40, // 58, 148, 238
+ 35, 0, // 61, 151, 241
+ 31, // 64, 154, 244
+ 27, 0, // 67, 157, 247 (67 & 157 are base angles)
+ 23, // 70, 160, 250
+ 19, 0, // 73, 163, 253
+ 15, 0, // 76, 166, 256
+ 11, 0, // 81, 171, 261
+ 7, // 84, 174, 264
+ 3 // 87, 177, 267
};
const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 16) = {
--- a/src/tables.h
+++ b/src/tables.h
@@ -114,7 +114,7 @@
extern const int16_t dav1d_resize_filter[64][8];
extern const uint8_t dav1d_sm_weights[128];
-extern const int16_t dav1d_dr_intra_derivative[90];
+extern const uint16_t dav1d_dr_intra_derivative[44];
extern const int8_t dav1d_filter_intra_taps[5][64];
extern const uint8_t dav1d_obmc_masks[64];
--- a/src/x86/ipred.asm
+++ b/src/x86/ipred.asm
@@ -1308,9 +1308,10 @@
inc tlq
movsxd wq, [r6+wq*4]
add wq, r6
- movzx dxd, angleb
+ mov dxd, angled
+ and dxd, 0x7e
add angled, 165 ; ~90
- movzx dxd, word [r7+dxq*2]
+ movzx dxd, word [r7+dxq]
xor angled, 0x4ff ; d = 90 - angle
vpbroadcastd m3, [pw_512]
vpbroadcastd m4, [pw_62]
@@ -2130,15 +2131,16 @@
lea r6, [ipred_z3_avx2_table]
tzcnt hd, hm
movifnidn angled, anglem
- lea r7, [dr_intra_derivative+90*2]
+ lea r7, [dr_intra_derivative+45*2-1]
dec tlq
movsxd hq, [r6+hq*4]
sub angled, 180
add hq, r6
- movzx dyd, angleb
+ mov dyd, angled
+ neg dyd
xor angled, 0x400
- neg dyq
- movzx dyd, word [r7+dyq*2]
+ or dyq, ~0x7e
+ movzx dyd, word [r7+dyq]
vpbroadcastd m3, [pw_512]
vpbroadcastd m4, [pw_62]
vpbroadcastd m5, [pw_64]