ref: 111ca4213355ac4edd10b3c14461096d56e3f3d0
parent: 2d3e879fccef330d89f5e5ae6c718cb37d888d2a
author: Ronald S. Bultje <rbultje@google.com>
date: Mon Mar 4 09:12:17 EST 2013
Make superblocks independent of macroblock code and data. Split macroblock and superblock tokenization and detokenization functions and coefficient-related data structs so that the bitstream layout and related code of superblock coefficients looks less like it's a hack to fit macroblocks in superblocks. In addition, unify chroma transform size selection from luma transform size (i.e. always use the same size, as long as it fits the predictor); in practice, this means 32x32 and 64x64 superblocks using the 16x16 luma transform will now use the 16x16 (instead of the 8x8) chroma transform, and 64x64 superblocks using the 32x32 luma transform will now use the 32x32 (instead of the 16x16) chroma transform. Lastly, add a trellis optimize function for 32x32 transform blocks. HD gains about 0.3%, STDHD about 0.15% and derf about 0.1%. There's a few negative points here and there that I might want to analyze a little closer. Change-Id: Ibad7c3ddfe1acfc52771dfc27c03e9783e054430
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -12,15 +12,431 @@
#include "vp9/common/vp9_blockd.h"
#include "vpx_mem/vpx_mem.h"
-const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24] = {
- {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7},
- {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}
+const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24] = {
+ { 0, 0, 0, 0,
+ 1, 1, 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3, 3,
+ 4, 4,
+ 5, 5,
+ 6, 6,
+ 7, 7 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ 4, 4,
+ 4, 4,
+ 6, 6,
+ 6, 6 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0 },
};
-const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24] = {
- {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7},
- {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}
+const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = {
+ { 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 4, 5,
+ 4, 5,
+ 6, 7,
+ 6, 7 },
+ { 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 4, 4,
+ 4, 4,
+ 6, 6,
+ 6, 6 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0 },
};
+
+#define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)
+const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3),
+ 4, 4, 4, 4,
+ 5, 5, 5, 5,
+ S(4), S(4), S(4), S(4),
+ S(5), S(5), S(5), S(5),
+ 6, 6, 6, 6,
+ 7, 7, 7, 7,
+ S(6), S(6), S(6), S(6),
+ S(7), S(7), S(7), S(7) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+#define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT))
+#define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT))
+const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(5), T(5), T(5), T(5), T(5), T(5), T(5), T(5),
+ U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4),
+ U(5), U(5), U(5), U(5), U(5), U(5), U(5), U(5),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(7), T(7), T(7), T(7), T(7), T(7), T(7), T(7),
+ U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6),
+ U(7), U(7), U(7), U(7), U(7), U(7), U(7), U(7) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4),
+ U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6),
+ U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6 },
+};
+const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6 },
+};
+#undef U
+#undef T
+#undef S
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -277,13 +277,6 @@
union b_mode_info bmi;
} BLOCKD;
-typedef struct superblockd {
- /* 32x32 Y and 16x16 U/V */
- DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]);
- DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]);
-} SUPERBLOCKD;
-
struct scale_factors {
int x_num;
int x_den;
@@ -297,14 +290,12 @@
};
typedef struct macroblockd {
- DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */
- DECLARE_ALIGNED(16, uint8_t, predictor[384]);
- DECLARE_ALIGNED(16, int16_t, qcoeff[384]);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[384]);
- DECLARE_ALIGNED(16, uint16_t, eobs[24]);
+ DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */
+ DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks
+ DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]);
+ DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]);
- SUPERBLOCKD sb_coeff_data;
-
/* 16 Y blocks, 4 U, 4 V, each with 16 entries. */
BLOCKD block[24];
int fullpixel_mask;
@@ -451,8 +442,12 @@
}
}
-extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24];
-extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24];
+extern const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24];
+extern const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24];
+extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96];
+extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96];
+extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384];
+extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384];
#define USE_ADST_FOR_I16X16_8X8 0
#define USE_ADST_FOR_I16X16_4X4 0
--- a/vp9/common/vp9_default_coef_probs.h
+++ b/vp9/common/vp9_default_coef_probs.h
@@ -270,85 +270,85 @@
}, { /* block Type 1 */
{ /* Intra */
{ /* Coeff Band 0 */
- { 202, 29, 181, 221, 168, 177, 217, 162, 235, 202, 157 },
- { 117, 39, 146, 207, 155, 172, 203, 155, 236, 192, 208 },
- { 46, 40, 99, 171, 136, 161, 176, 140, 229, 177, 208 }
+ { 210, 33, 210, 232, 185, 185, 210, 166, 207, 192, 146 },
+ { 118, 47, 169, 220, 170, 179, 201, 160, 231, 183, 211 },
+ { 40, 52, 119, 203, 146, 169, 207, 160, 242, 194, 222 }
}, { /* Coeff Band 1 */
- { 1, 138, 204, 227, 179, 181, 224, 161, 249, 203, 237 },
- { 116, 138, 209, 227, 179, 180, 222, 165, 248, 204, 241 },
- { 63, 112, 184, 227, 183, 178, 223, 167, 248, 206, 237 },
- { 47, 84, 140, 219, 163, 177, 223, 160, 249, 207, 241 },
- { 25, 53, 76, 179, 120, 156, 217, 152, 248, 205, 232 },
- { 10, 23, 29, 76, 91, 132, 145, 109, 228, 169, 214 }
+ { 1, 158, 215, 239, 192, 188, 234, 174, 253, 219, 230 },
+ { 130, 149, 210, 238, 191, 188, 233, 174, 253, 221, 240 },
+ { 59, 123, 193, 237, 188, 187, 232, 174, 252, 220, 246 },
+ { 22, 89, 154, 232, 172, 183, 233, 173, 253, 219, 237 },
+ { 4, 49, 83, 193, 128, 160, 227, 161, 253, 219, 233 },
+ { 1, 18, 27, 87, 90, 133, 160, 112, 242, 185, 231 }
}, { /* Coeff Band 2 */
- { 1, 69, 198, 223, 179, 177, 225, 154, 251, 208, 227 },
- { 78, 78, 170, 223, 170, 179, 218, 162, 248, 203, 245 },
- { 26, 69, 117, 209, 154, 170, 215, 160, 249, 205, 239 },
- { 16, 54, 79, 180, 119, 156, 208, 151, 248, 201, 238 },
- { 12, 43, 45, 119, 102, 142, 186, 126, 245, 193, 236 },
- { 1, 24, 22, 60, 92, 133, 114, 99, 221, 154, 210 }
+ { 1, 87, 205, 244, 192, 193, 239, 188, 252, 220, 217 },
+ { 64, 93, 169, 237, 175, 186, 237, 184, 253, 222, 235 },
+ { 19, 77, 130, 222, 154, 175, 231, 173, 253, 221, 223 },
+ { 6, 59, 95, 196, 132, 162, 223, 160, 251, 215, 240 },
+ { 1, 37, 57, 144, 109, 146, 201, 135, 250, 205, 238 },
+ { 1, 17, 26, 81, 94, 138, 135, 107, 232, 168, 223 }
}, { /* Coeff Band 3 */
- { 1, 135, 214, 222, 183, 178, 230, 144, 252, 208, 241 },
- { 107, 122, 201, 229, 181, 182, 221, 165, 250, 202, 243 },
- { 38, 100, 168, 221, 168, 176, 220, 166, 250, 208, 240 },
- { 21, 83, 125, 206, 149, 167, 217, 160, 250, 209, 238 },
- { 16, 65, 80, 164, 122, 156, 208, 139, 250, 206, 246 },
- { 3, 37, 43, 104, 103, 143, 156, 118, 237, 173, 227 }
+ { 1, 150, 219, 243, 198, 192, 237, 182, 253, 227, 245 },
+ { 88, 130, 202, 239, 190, 188, 236, 180, 253, 224, 255 },
+ { 25, 103, 172, 231, 175, 182, 234, 174, 253, 227, 248 },
+ { 7, 78, 128, 215, 156, 172, 228, 166, 252, 222, 248 },
+ { 1, 48, 76, 175, 121, 155, 212, 149, 251, 213, 237 },
+ { 1, 22, 35, 101, 97, 141, 161, 120, 236, 181, 213 }
}, { /* Coeff Band 4 */
- { 1, 169, 223, 233, 193, 184, 234, 150, 254, 206, 243 },
- { 83, 140, 201, 233, 184, 185, 228, 168, 252, 203, 223 },
- { 19, 104, 158, 225, 168, 179, 228, 169, 253, 207, 248 },
- { 10, 76, 117, 209, 145, 168, 223, 166, 252, 210, 243 },
- { 8, 59, 79, 163, 119, 153, 213, 142, 250, 205, 230 },
- { 1, 31, 43, 100, 103, 144, 149, 116, 240, 171, 221 }
+ { 1, 177, 228, 247, 206, 197, 243, 191, 255, 232, 255 },
+ { 76, 143, 205, 243, 192, 192, 241, 189, 253, 223, 255 },
+ { 17, 107, 163, 233, 170, 183, 239, 183, 253, 227, 218 },
+ { 3, 75, 118, 216, 147, 171, 234, 174, 253, 220, 249 },
+ { 1, 43, 71, 174, 118, 154, 217, 153, 250, 211, 240 },
+ { 1, 19, 31, 93, 93, 136, 154, 116, 235, 178, 228 }
}, { /* Coeff Band 5 */
- { 1, 190, 234, 247, 211, 197, 239, 172, 255, 208, 236 },
- { 65, 152, 218, 244, 199, 194, 236, 184, 252, 199, 249 },
- { 17, 109, 173, 237, 179, 186, 235, 183, 250, 205, 255 },
- { 6, 78, 127, 219, 153, 173, 231, 177, 251, 210, 249 },
- { 3, 56, 77, 172, 121, 157, 215, 152, 249, 209, 247 },
- { 1, 29, 38, 96, 97, 144, 152, 114, 239, 169, 243 }
+ { 1, 192, 230, 251, 215, 205, 245, 201, 254, 229, 255 },
+ { 66, 142, 206, 248, 200, 202, 244, 197, 255, 224, 255 },
+ { 21, 107, 166, 241, 176, 191, 241, 192, 253, 230, 255 },
+ { 5, 79, 129, 221, 150, 173, 237, 178, 254, 226, 255 },
+ { 1, 43, 72, 173, 117, 151, 217, 150, 253, 216, 245 },
+ { 1, 17, 28, 93, 95, 139, 162, 114, 245, 187, 235 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
- { 223, 71, 225, 221, 176, 169, 242, 165, 248, 216, 201 },
- { 147, 79, 197, 215, 175, 172, 230, 154, 243, 203, 184 },
- { 69, 75, 152, 197, 158, 168, 203, 144, 231, 187, 177 }
+ { 235, 68, 250, 244, 206, 192, 229, 177, 248, 215, 237 },
+ { 169, 88, 225, 235, 191, 184, 222, 170, 246, 205, 237 },
+ { 65, 100, 171, 214, 166, 173, 216, 157, 249, 213, 215 }
}, { /* Coeff Band 1 */
- { 1, 168, 219, 195, 168, 151, 249, 131, 255, 221, 255 },
- { 152, 156, 226, 210, 189, 173, 240, 121, 255, 215, 238 },
- { 82, 128, 198, 239, 201, 194, 220, 151, 254, 202, 251 },
- { 74, 107, 150, 236, 163, 187, 222, 177, 255, 204, 255 },
- { 59, 103, 120, 181, 125, 148, 232, 157, 255, 219, 245 },
- { 21, 63, 84, 129, 122, 150, 171, 118, 246, 196, 226 }
+ { 1, 191, 246, 250, 217, 202, 244, 195, 255, 226, 128 },
+ { 177, 169, 236, 250, 216, 201, 244, 194, 251, 228, 255 },
+ { 70, 132, 205, 250, 209, 205, 246, 193, 254, 246, 255 },
+ { 41, 108, 165, 244, 172, 194, 246, 202, 255, 229, 255 },
+ { 23, 84, 126, 207, 140, 162, 244, 179, 254, 237, 255 },
+ { 11, 57, 83, 149, 127, 156, 180, 126, 247, 202, 220 }
}, { /* Coeff Band 2 */
- { 1, 133, 219, 202, 174, 158, 244, 133, 255, 214, 237 },
- { 101, 132, 204, 221, 187, 183, 225, 131, 253, 201, 247 },
- { 41, 107, 147, 228, 174, 187, 211, 162, 252, 201, 246 },
- { 40, 107, 107, 205, 129, 162, 213, 164, 252, 206, 232 },
- { 24, 140, 90, 122, 111, 141, 210, 127, 251, 208, 239 },
- { 1, 59, 55, 91, 111, 141, 144, 109, 241, 180, 226 }
+ { 1, 169, 240, 250, 212, 202, 242, 192, 252, 222, 255 },
+ { 105, 151, 215, 246, 200, 197, 240, 190, 253, 221, 255 },
+ { 24, 111, 166, 237, 177, 188, 236, 183, 252, 213, 255 },
+ { 9, 83, 122, 218, 148, 170, 233, 174, 250, 215, 242 },
+ { 1, 55, 77, 168, 118, 152, 215, 150, 248, 213, 226 },
+ { 1, 26, 36, 104, 98, 146, 149, 116, 235, 182, 225 }
}, { /* Coeff Band 3 */
- { 1, 170, 226, 200, 179, 153, 245, 138, 255, 214, 241 },
- { 111, 149, 217, 226, 194, 186, 223, 137, 255, 211, 253 },
- { 40, 113, 174, 228, 180, 183, 211, 165, 255, 212, 247 },
- { 44, 101, 126, 210, 151, 167, 212, 161, 255, 217, 241 },
- { 43, 131, 103, 146, 119, 148, 211, 136, 254, 216, 250 },
- { 1, 57, 63, 112, 116, 145, 158, 115, 249, 193, 236 }
+ { 1, 191, 243, 251, 219, 204, 246, 196, 255, 230, 128 },
+ { 97, 168, 225, 248, 207, 198, 244, 193, 254, 225, 192 },
+ { 15, 122, 182, 241, 187, 188, 241, 190, 251, 231, 228 },
+ { 3, 83, 131, 226, 160, 178, 237, 180, 251, 222, 205 },
+ { 1, 49, 77, 184, 121, 155, 222, 159, 249, 216, 249 },
+ { 1, 21, 32, 98, 98, 140, 152, 113, 233, 173, 243 }
}, { /* Coeff Band 4 */
- { 1, 186, 233, 216, 191, 163, 241, 143, 255, 210, 255 },
- { 91, 161, 214, 225, 190, 181, 224, 150, 255, 212, 253 },
- { 26, 117, 163, 220, 172, 180, 218, 148, 255, 215, 252 },
- { 27, 90, 122, 203, 143, 167, 212, 159, 255, 213, 255 },
- { 21, 98, 113, 163, 130, 153, 208, 141, 255, 215, 248 },
- { 1, 47, 66, 130, 118, 151, 167, 123, 252, 199, 235 }
+ { 1, 202, 242, 253, 226, 212, 245, 205, 254, 226, 255 },
+ { 83, 168, 219, 252, 212, 211, 244, 200, 250, 215, 255 },
+ { 9, 143, 174, 245, 183, 197, 241, 194, 254, 217, 255 },
+ { 1, 105, 129, 228, 154, 179, 233, 179, 253, 211, 255 },
+ { 1, 47, 72, 177, 116, 152, 214, 157, 251, 209, 255 },
+ { 1, 18, 26, 79, 94, 137, 150, 109, 246, 175, 248 }
}, { /* Coeff Band 5 */
- { 1, 195, 236, 245, 211, 195, 238, 171, 255, 209, 248 },
- { 65, 156, 218, 245, 200, 196, 230, 185, 255, 212, 248 },
- { 13, 112, 172, 238, 180, 189, 231, 185, 255, 213, 250 },
- { 6, 83, 130, 224, 155, 177, 227, 180, 255, 214, 244 },
- { 5, 71, 91, 185, 133, 160, 214, 154, 254, 212, 248 },
- { 1, 45, 63, 128, 112, 147, 169, 129, 248, 190, 236 }
+ { 1, 205, 236, 254, 233, 221, 247, 201, 255, 220, 128 },
+ { 87, 149, 205, 254, 211, 219, 245, 207, 255, 239, 128 },
+ { 56, 122, 162, 248, 164, 195, 246, 211, 255, 231, 128 },
+ { 26, 108, 163, 224, 149, 169, 240, 187, 255, 238, 255 },
+ { 1, 54, 89, 171, 123, 152, 219, 148, 254, 226, 255 },
+ { 1, 21, 34, 99, 90, 140, 174, 112, 252, 210, 255 }
}
}
}
@@ -441,90 +441,90 @@
}, { /* block Type 1 */
{ /* Intra */
{ /* Coeff Band 0 */
- { 198, 28, 192, 217, 170, 174, 201, 162, 219, 179, 159 },
- { 96, 36, 145, 198, 153, 167, 193, 153, 222, 180, 177 },
- { 31, 35, 89, 156, 131, 157, 166, 136, 214, 170, 178 }
+ { 203, 35, 218, 235, 189, 187, 194, 174, 175, 150, 127 },
+ { 95, 50, 155, 211, 161, 173, 190, 163, 198, 161, 187 },
+ { 21, 46, 93, 178, 130, 157, 200, 151, 224, 186, 191 }
}, { /* Coeff Band 1 */
- { 1, 138, 202, 225, 174, 178, 218, 164, 243, 200, 201 },
- { 147, 134, 202, 223, 174, 177, 215, 162, 243, 204, 220 },
- { 65, 115, 179, 224, 176, 177, 215, 162, 243, 202, 227 },
- { 25, 86, 141, 217, 163, 177, 216, 159, 243, 201, 225 },
- { 6, 48, 79, 181, 125, 157, 209, 151, 244, 201, 212 },
- { 1, 16, 25, 77, 91, 134, 132, 112, 210, 162, 180 }
+ { 1, 155, 198, 236, 183, 187, 223, 175, 250, 209, 255 },
+ { 115, 147, 192, 235, 182, 186, 222, 173, 244, 199, 222 },
+ { 43, 124, 174, 234, 178, 186, 222, 176, 249, 201, 255 },
+ { 13, 96, 143, 227, 164, 181, 223, 174, 248, 197, 237 },
+ { 2, 59, 91, 197, 131, 163, 213, 162, 246, 198, 241 },
+ { 1, 19, 29, 85, 96, 139, 128, 116, 215, 153, 204 }
}, { /* Coeff Band 2 */
- { 1, 78, 195, 222, 172, 177, 219, 162, 245, 205, 227 },
- { 67, 79, 154, 211, 158, 171, 212, 159, 243, 201, 222 },
- { 18, 63, 108, 192, 140, 163, 205, 152, 242, 197, 214 },
- { 6, 49, 77, 163, 121, 154, 192, 142, 239, 191, 216 },
- { 1, 34, 49, 112, 106, 143, 160, 122, 233, 178, 213 },
- { 1, 14, 20, 56, 93, 135, 94, 102, 189, 141, 170 }
+ { 1, 91, 180, 231, 170, 180, 237, 181, 248, 213, 230 },
+ { 39, 83, 139, 220, 153, 173, 233, 179, 243, 200, 228 },
+ { 12, 63, 106, 203, 136, 163, 227, 170, 244, 200, 234 },
+ { 5, 48, 79, 178, 123, 154, 215, 155, 244, 197, 232 },
+ { 1, 32, 50, 125, 104, 144, 171, 130, 238, 181, 229 },
+ { 1, 12, 18, 54, 88, 131, 92, 99, 201, 142, 193 }
}, { /* Coeff Band 3 */
- { 1, 137, 210, 229, 182, 181, 223, 164, 247, 214, 201 },
- { 89, 123, 189, 226, 176, 180, 217, 165, 245, 207, 216 },
- { 24, 100, 155, 217, 162, 176, 215, 163, 242, 198, 215 },
- { 8, 78, 121, 199, 147, 167, 206, 155, 241, 198, 212 },
- { 2, 52, 81, 161, 125, 156, 185, 139, 236, 186, 207 },
- { 1, 22, 35, 88, 102, 141, 121, 116, 199, 153, 179 }
+ { 1, 152, 202, 238, 186, 188, 227, 178, 248, 205, 229 },
+ { 63, 125, 183, 234, 178, 184, 225, 179, 248, 205, 228 },
+ { 15, 100, 153, 227, 166, 180, 223, 173, 244, 198, 229 },
+ { 4, 76, 119, 210, 149, 170, 215, 165, 245, 200, 221 },
+ { 1, 46, 73, 165, 120, 154, 192, 144, 241, 189, 225 },
+ { 1, 18, 27, 78, 95, 136, 124, 110, 219, 158, 207 }
}, { /* Coeff Band 4 */
- { 1, 169, 220, 239, 196, 191, 220, 173, 242, 201, 226 },
- { 64, 139, 195, 231, 183, 184, 215, 169, 240, 196, 211 },
- { 12, 103, 153, 217, 162, 174, 212, 163, 236, 195, 211 },
- { 3, 71, 109, 190, 141, 164, 202, 152, 240, 192, 220 },
- { 1, 38, 61, 139, 114, 149, 175, 133, 233, 183, 211 },
- { 1, 13, 22, 61, 93, 134, 101, 106, 194, 145, 185 }
+ { 1, 181, 211, 243, 197, 195, 228, 180, 249, 211, 252 },
+ { 40, 138, 189, 237, 184, 189, 226, 178, 249, 208, 247 },
+ { 7, 103, 153, 226, 166, 179, 223, 171, 249, 209, 224 },
+ { 1, 71, 110, 200, 143, 166, 213, 159, 249, 206, 241 },
+ { 1, 37, 60, 144, 111, 150, 189, 135, 245, 196, 232 },
+ { 1, 15, 25, 75, 91, 134, 128, 108, 224, 163, 213 }
}, { /* Coeff Band 5 */
- { 1, 204, 220, 234, 193, 185, 220, 166, 247, 207, 237 },
- { 42, 139, 187, 221, 174, 177, 215, 161, 246, 201, 242 },
- { 5, 83, 132, 204, 152, 168, 212, 158, 246, 203, 225 },
- { 1, 48, 84, 175, 126, 157, 203, 148, 245, 199, 233 },
- { 1, 24, 46, 123, 103, 142, 178, 128, 243, 189, 235 },
- { 1, 10, 19, 58, 88, 134, 109, 101, 216, 151, 216 }
+ { 1, 215, 219, 246, 205, 197, 236, 183, 252, 221, 235 },
+ { 32, 146, 197, 239, 187, 188, 234, 180, 252, 223, 247 },
+ { 6, 100, 150, 227, 167, 178, 233, 178, 252, 219, 233 },
+ { 1, 63, 102, 203, 138, 167, 225, 162, 252, 216, 240 },
+ { 1, 33, 56, 148, 109, 146, 202, 138, 250, 208, 237 },
+ { 1, 15, 25, 75, 90, 131, 138, 108, 236, 171, 235 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
- { 227, 36, 243, 237, 206, 186, 210, 157, 245, 195, 200 },
- { 144, 41, 214, 226, 190, 182, 207, 155, 238, 193, 177 },
- { 63, 37, 153, 199, 162, 169, 193, 145, 227, 187, 152 }
+ { 228, 37, 245, 229, 199, 183, 200, 146, 240, 188, 223 },
+ { 138, 62, 209, 217, 184, 177, 195, 148, 246, 186, 236 },
+ { 42, 79, 146, 185, 156, 167, 183, 137, 247, 189, 251 }
}, { /* Coeff Band 1 */
- { 1, 170, 247, 248, 213, 201, 239, 188, 238, 203, 255 },
- { 214, 166, 242, 248, 212, 198, 236, 191, 221, 219, 199 },
- { 139, 148, 224, 247, 207, 197, 236, 189, 249, 241, 128 },
- { 102, 127, 195, 244, 190, 198, 235, 189, 239, 202, 228 },
- { 76, 106, 154, 227, 159, 176, 234, 182, 243, 216, 229 },
- { 52, 69, 93, 158, 125, 155, 173, 139, 225, 170, 209 }
+ { 1, 205, 242, 248, 210, 202, 245, 193, 233, 230, 255 },
+ { 191, 185, 234, 249, 210, 201, 245, 194, 255, 197, 128 },
+ { 112, 148, 214, 247, 208, 201, 246, 192, 255, 238, 128 },
+ { 76, 120, 182, 246, 190, 198, 246, 202, 255, 244, 128 },
+ { 51, 95, 145, 232, 156, 177, 246, 199, 255, 233, 128 },
+ { 47, 71, 104, 195, 129, 158, 230, 167, 253, 224, 255 }
}, { /* Coeff Band 2 */
- { 1, 139, 241, 245, 205, 193, 230, 177, 239, 198, 183 },
- { 131, 139, 214, 240, 191, 189, 224, 181, 236, 203, 194 },
- { 32, 102, 157, 228, 167, 177, 221, 174, 235, 191, 194 },
- { 12, 75, 112, 201, 142, 163, 208, 161, 227, 180, 200 },
- { 2, 45, 66, 142, 119, 154, 178, 141, 220, 171, 213 },
- { 1, 15, 20, 56, 102, 151, 87, 104, 182, 136, 175 }
+ { 1, 182, 235, 247, 204, 195, 246, 202, 255, 227, 128 },
+ { 104, 145, 204, 243, 189, 191, 242, 199, 255, 229, 128 },
+ { 35, 107, 159, 234, 167, 181, 244, 188, 255, 221, 128 },
+ { 17, 87, 126, 216, 151, 168, 242, 179, 255, 242, 128 },
+ { 4, 68, 91, 182, 131, 154, 222, 153, 255, 228, 128 },
+ { 1, 55, 64, 126, 105, 137, 193, 121, 247, 194, 255 }
}, { /* Coeff Band 3 */
- { 1, 174, 243, 248, 212, 201, 237, 194, 249, 207, 255 },
- { 134, 155, 223, 244, 200, 195, 230, 184, 248, 189, 233 },
- { 26, 115, 177, 235, 180, 185, 225, 176, 245, 198, 255 },
- { 8, 82, 129, 217, 156, 175, 220, 168, 243, 204, 228 },
- { 3, 48, 75, 165, 122, 155, 193, 145, 245, 189, 199 },
- { 1, 15, 27, 73, 101, 139, 117, 112, 212, 157, 209 }
+ { 1, 210, 239, 249, 209, 201, 249, 205, 255, 255, 128 },
+ { 91, 162, 218, 247, 200, 195, 250, 199, 255, 255, 128 },
+ { 16, 116, 173, 242, 184, 190, 251, 193, 255, 205, 128 },
+ { 5, 85, 133, 228, 156, 178, 244, 184, 255, 251, 128 },
+ { 1, 55, 83, 196, 125, 164, 236, 168, 249, 249, 255 },
+ { 1, 24, 39, 127, 92, 154, 183, 133, 255, 192, 128 }
}, { /* Coeff Band 4 */
- { 1, 191, 244, 248, 214, 200, 229, 185, 249, 207, 255 },
- { 106, 167, 221, 242, 198, 192, 223, 178, 245, 202, 246 },
- { 13, 117, 169, 229, 175, 182, 220, 170, 244, 202, 226 },
- { 2, 74, 114, 203, 143, 170, 211, 160, 248, 199, 232 },
- { 1, 35, 58, 141, 111, 144, 184, 132, 244, 196, 239 },
- { 1, 12, 22, 66, 91, 138, 114, 102, 225, 156, 214 }
+ { 1, 225, 242, 252, 218, 205, 251, 207, 255, 255, 128 },
+ { 67, 174, 223, 249, 205, 199, 250, 210, 255, 234, 128 },
+ { 10, 119, 177, 243, 186, 187, 253, 199, 255, 255, 128 },
+ { 2, 81, 129, 228, 154, 177, 244, 193, 255, 251, 128 },
+ { 1, 48, 78, 193, 122, 152, 240, 171, 255, 240, 128 },
+ { 1, 19, 43, 116, 96, 128, 195, 135, 255, 234, 128 }
}, { /* Coeff Band 5 */
- { 1, 220, 231, 246, 203, 196, 239, 188, 255, 212, 255 },
- { 42, 155, 203, 241, 189, 191, 235, 184, 253, 220, 255 },
- { 4, 95, 151, 230, 167, 182, 234, 178, 252, 217, 243 },
- { 1, 61, 105, 206, 140, 168, 226, 167, 250, 215, 242 },
- { 1, 31, 60, 151, 109, 148, 204, 142, 250, 208, 230 },
- { 1, 13, 26, 76, 93, 132, 139, 106, 236, 171, 237 }
+ { 1, 237, 210, 255, 213, 219, 255, 235, 255, 219, 128 },
+ { 49, 163, 203, 252, 182, 198, 255, 235, 255, 255, 128 },
+ { 23, 114, 156, 247, 196, 187, 255, 238, 255, 255, 128 },
+ { 6, 71, 124, 248, 163, 202, 253, 203, 255, 255, 128 },
+ { 1, 35, 74, 226, 160, 162, 246, 189, 255, 244, 128 },
+ { 1, 16, 19, 136, 92, 164, 237, 108, 255, 255, 128 }
}
}
}
};
-static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = {
+static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = {
{ /* block Type 0 */
{ /* Intra */
{ /* Coeff Band 0 */
@@ -607,6 +607,90 @@
{ 1, 61, 99, 193, 137, 164, 207, 155, 239, 197, 208 },
{ 1, 28, 49, 128, 105, 145, 177, 130, 234, 185, 206 },
{ 1, 9, 16, 48, 89, 134, 89, 99, 183, 140, 169 }
+ }
+ }
+ }, { /* block Type 1 */
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 176, 22, 201, 227, 185, 189, 160, 172, 115, 141, 105 },
+ { 64, 33, 120, 195, 149, 171, 170, 150, 182, 175, 139 },
+ { 12, 33, 68, 151, 118, 153, 172, 138, 202, 175, 153 }
+ }, { /* Coeff Band 1 */
+ { 1, 125, 175, 228, 163, 176, 215, 171, 226, 193, 165 },
+ { 127, 126, 174, 224, 163, 177, 212, 167, 225, 175, 235 },
+ { 57, 114, 159, 223, 166, 175, 216, 167, 234, 182, 211 },
+ { 23, 93, 133, 215, 150, 174, 216, 171, 233, 174, 176 },
+ { 4, 56, 84, 178, 127, 157, 209, 149, 233, 197, 194 },
+ { 1, 19, 26, 70, 93, 136, 114, 108, 193, 150, 167 }
+ }, { /* Coeff Band 2 */
+ { 1, 76, 172, 217, 161, 172, 216, 165, 240, 188, 226 },
+ { 41, 73, 136, 208, 152, 168, 214, 163, 233, 189, 248 },
+ { 14, 59, 102, 195, 137, 163, 209, 158, 227, 184, 204 },
+ { 4, 45, 75, 168, 122, 153, 197, 148, 231, 193, 178 },
+ { 1, 33, 48, 118, 106, 148, 154, 126, 221, 168, 211 },
+ { 1, 12, 16, 42, 90, 143, 61, 94, 159, 122, 167 }
+ }, { /* Coeff Band 3 */
+ { 1, 134, 186, 226, 173, 180, 208, 172, 220, 179, 205 },
+ { 60, 114, 164, 219, 166, 177, 207, 166, 231, 176, 208 },
+ { 18, 90, 134, 208, 152, 175, 200, 164, 225, 181, 199 },
+ { 7, 67, 102, 189, 139, 164, 192, 155, 225, 172, 209 },
+ { 1, 39, 59, 137, 116, 151, 160, 132, 222, 166, 212 },
+ { 1, 12, 17, 50, 93, 134, 82, 102, 181, 131, 190 }
+ }, { /* Coeff Band 4 */
+ { 1, 160, 195, 229, 180, 185, 204, 163, 243, 185, 223 },
+ { 31, 124, 170, 221, 170, 179, 201, 164, 240, 183, 223 },
+ { 5, 91, 134, 204, 154, 170, 191, 155, 236, 178, 232 },
+ { 1, 62, 95, 173, 135, 159, 180, 145, 234, 179, 225 },
+ { 1, 30, 48, 116, 109, 147, 152, 123, 231, 170, 224 },
+ { 1, 11, 17, 53, 90, 133, 93, 102, 201, 139, 202 }
+ }, { /* Coeff Band 5 */
+ { 1, 215, 203, 233, 186, 183, 226, 170, 249, 213, 225 },
+ { 13, 133, 175, 224, 170, 178, 224, 167, 250, 212, 235 },
+ { 1, 83, 127, 209, 151, 169, 221, 162, 251, 212, 243 },
+ { 1, 53, 85, 182, 127, 157, 213, 153, 250, 210, 234 },
+ { 1, 30, 47, 131, 103, 143, 190, 132, 248, 200, 240 },
+ { 1, 14, 21, 67, 89, 129, 126, 104, 232, 167, 223 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 193, 35, 239, 239, 206, 194, 195, 152, 244, 200, 255 },
+ { 77, 57, 198, 224, 192, 187, 181, 145, 242, 190, 248 },
+ { 21, 54, 149, 197, 172, 171, 169, 138, 238, 178, 241 }
+ }, { /* Coeff Band 1 */
+ { 1, 227, 241, 247, 195, 195, 245, 199, 255, 255, 128 },
+ { 189, 223, 237, 249, 199, 200, 238, 198, 255, 255, 128 },
+ { 125, 204, 226, 247, 198, 199, 251, 213, 255, 255, 128 },
+ { 101, 167, 207, 246, 193, 201, 245, 168, 255, 255, 128 },
+ { 89, 121, 174, 237, 169, 184, 246, 204, 255, 255, 128 },
+ { 71, 79, 135, 216, 149, 170, 234, 168, 255, 226, 128 }
+ }, { /* Coeff Band 2 */
+ { 1, 207, 235, 250, 220, 204, 250, 201, 255, 255, 128 },
+ { 103, 160, 210, 245, 195, 188, 249, 195, 255, 255, 128 },
+ { 33, 130, 165, 234, 168, 183, 253, 199, 255, 255, 128 },
+ { 10, 113, 138, 223, 146, 180, 248, 199, 255, 255, 128 },
+ { 1, 88, 104, 172, 112, 174, 221, 126, 255, 217, 128 },
+ { 1, 87, 70, 160, 68, 140, 171, 85, 255, 85, 128 }
+ }, { /* Coeff Band 3 */
+ { 1, 230, 240, 249, 209, 200, 243, 199, 255, 228, 128 },
+ { 60, 178, 218, 247, 203, 200, 247, 198, 255, 255, 128 },
+ { 8, 119, 162, 241, 188, 185, 252, 202, 255, 255, 128 },
+ { 2, 78, 119, 218, 149, 162, 247, 184, 255, 255, 128 },
+ { 1, 48, 81, 172, 142, 148, 239, 140, 255, 239, 128 },
+ { 1, 29, 23, 82, 96, 102, 181, 149, 255, 255, 128 }
+ }, { /* Coeff Band 4 */
+ { 1, 240, 241, 250, 216, 203, 248, 188, 255, 255, 128 },
+ { 60, 180, 222, 247, 202, 195, 247, 191, 255, 255, 128 },
+ { 9, 120, 169, 240, 190, 189, 249, 181, 255, 255, 128 },
+ { 2, 85, 126, 223, 154, 178, 240, 184, 255, 255, 128 },
+ { 1, 47, 90, 198, 132, 158, 233, 162, 255, 224, 128 },
+ { 1, 33, 34, 143, 116, 156, 217, 128, 255, 255, 128 }
+ }, { /* Coeff Band 5 */
+ { 1, 250, 193, 249, 188, 193, 255, 236, 255, 255, 128 },
+ { 35, 187, 185, 247, 154, 184, 255, 247, 255, 171, 128 },
+ { 20, 132, 114, 223, 172, 165, 255, 229, 255, 255, 128 },
+ { 4, 97, 96, 218, 96, 162, 255, 164, 255, 253, 128 },
+ { 1, 57, 35, 197, 154, 173, 254, 215, 255, 255, 128 },
+ { 1, 8, 2, 161, 10, 57, 230, 228, 255, 171, 128 }
}
}
}
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -336,6 +336,6 @@
BLOCK_TYPES, cm->fc.coef_counts_16x16,
count_sat, update_factor);
update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32,
- BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32,
+ BLOCK_TYPES, cm->fc.coef_counts_32x32,
count_sat, update_factor);
}
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -61,7 +61,6 @@
/* Outside dimension. 0 = Y with DC, 1 = UV */
#define BLOCK_TYPES 2
-#define BLOCK_TYPES_32X32 1
#define REF_TYPES 2 // intra=0, inter=1
/* Middle dimension reflects the coefficient position within the transform. */
@@ -110,10 +109,22 @@
void vp9_coef_tree_initialize(void);
void vp9_adapt_coef_probs(struct VP9Common *);
-static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
+static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+}
+
+static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) {
+ /* Clear entropy contexts */
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+}
+
+static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) {
+ /* Clear entropy contexts */
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
}
extern const int vp9_coef_bands[32];
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -645,7 +645,7 @@
// First transform rows
for (i = 0; i < 16; ++i) {
idct16_1d(input, outptr);
- input += half_pitch;
+ input += 16;
outptr += 16;
}
@@ -655,7 +655,7 @@
temp_in[j] = out[j * 16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
@@ -838,7 +838,7 @@
};
void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
- int input_pitch, TX_TYPE tx_type) {
+ int pitch, TX_TYPE tx_type) {
int i, j;
int16_t out[16 * 16];
int16_t *outptr = out;
@@ -848,7 +848,7 @@
// Rows
for (i = 0; i < 16; ++i) {
ht.rows(input, outptr);
- input += input_pitch;
+ input += 16;
outptr += 16;
}
@@ -858,7 +858,7 @@
temp_in[j] = out[j * 16 + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
@@ -875,7 +875,7 @@
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct16_1d(input, outptr);
- input += half_pitch;
+ input += 16;
outptr += 16;
}
@@ -885,7 +885,7 @@
temp_in[j] = out[j*16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
- output[j*16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
@@ -1273,7 +1273,7 @@
// Rows
for (i = 0; i < 32; ++i) {
idct32_1d(input, outptr);
- input += half_pitch;
+ input += 32;
outptr += 32;
}
@@ -1283,7 +1283,7 @@
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
- output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
@@ -1306,7 +1306,7 @@
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct32_1d(input, outptr);
- input += half_pitch;
+ input += 32;
outptr += 32;
}
@@ -1316,6 +1316,6 @@
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
- output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
}
}
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -11,12 +11,13 @@
#include "vp9/common/vp9_invtrans.h"
#include "./vp9_rtcd.h"
-void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
- BLOCKD *b = &xd->block[block];
- if (xd->eobs[block] <= 1)
- xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
+void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob,
+ int16_t *dqcoeff, int16_t *diff,
+ int pitch) {
+ if (eob <= 1)
+ xd->inv_txm4x4_1(dqcoeff, diff, pitch);
else
- xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
+ xd->inv_txm4x4(dqcoeff, diff, pitch);
}
void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
@@ -27,7 +28,8 @@
if (tx_type != DCT_DCT) {
vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
- vp9_inverse_transform_b_4x4(xd, i, 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
+ xd->block[i].diff, 32);
}
}
}
@@ -36,7 +38,8 @@
int i;
for (i = 16; i < 24; i++) {
- vp9_inverse_transform_b_4x4(xd, i, 16);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
+ xd->block[i].diff, 16);
}
}
@@ -111,13 +114,170 @@
vp9_inverse_transform_mbuv_8x8(xd);
}
-void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) {
- vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64);
+void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) {
+ vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64);
}
-void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) {
- vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024,
- xd_sb->diff + 1024, 32);
- vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280,
- xd_sb->diff + 1280, 32);
+void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 32 * 16, 64);
+ }
+}
+
+void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
+ }
+}
+
+void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
+ }
+}
+
+void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) {
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024,
+ xd->diff + 1024, 32);
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280,
+ xd->diff + 1280, 32);
+}
+
+void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64,
+ xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8,
+ 32);
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64,
+ xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8,
+ 32);
+ }
+}
+
+void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n],
+ xd->dqcoeff + 1024 + n * 16,
+ xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4,
+ 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n],
+ xd->dqcoeff + 1280 + n * 16,
+ xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4,
+ 32);
+ }
+}
+
+void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_short_idct32x32(xd->dqcoeff + n * 1024,
+ xd->diff + x_idx * 32 + y_idx * 32 * 64, 128);
+ }
+}
+
+void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 64 * 16, 128);
+ }
+}
+
+void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
+ }
+}
+
+void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
+ }
+}
+
+void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) {
+ vp9_short_idct32x32(xd->dqcoeff + 4096,
+ xd->diff + 4096, 64);
+ vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024,
+ xd->diff + 4096 + 1024, 64);
+}
+
+void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16;
+
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256,
+ xd->diff + 4096 + off, 64);
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256,
+ xd->diff + 4096 + 1024 + off, 64);
+ }
+}
+
+void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8;
+
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64,
+ xd->diff + 4096 + off, 64);
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64,
+ xd->diff + 4096 + 1024 + off, 64);
+ }
+}
+
+void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4;
+
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n],
+ xd->dqcoeff + 4096 + n * 16,
+ xd->diff + 4096 + off, 64);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n],
+ xd->dqcoeff + 4096 + 1024 + n * 16,
+ xd->diff + 4096 + 1024 + off, 64);
+ }
}
--- a/vp9/common/vp9_invtrans.h
+++ b/vp9/common/vp9_invtrans.h
@@ -15,7 +15,9 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
-void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch);
+void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob,
+ int16_t *dqcoeff, int16_t *diff,
+ int pitch);
void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd);
@@ -39,7 +41,21 @@
void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
-void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb);
-void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb);
+void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd);
+void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd);
+
+void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd);
#endif // VP9_COMMON_VP9_INVTRANS_H_
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -61,7 +61,7 @@
vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
- vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
nmv_context nmvc;
nmv_context pre_nmvc;
@@ -83,12 +83,12 @@
vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES];
- vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES];
vp9_coeff_count coef_counts_4x4[BLOCK_TYPES];
vp9_coeff_count coef_counts_8x8[BLOCK_TYPES];
vp9_coeff_count coef_counts_16x16[BLOCK_TYPES];
- vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_count coef_counts_32x32[BLOCK_TYPES];
nmv_context_counts NMVcount;
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@@ -117,7 +117,7 @@
void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) {
int x, y, stride = xd->block[0].dst_stride;
- int16_t *diff = xd->sb_coeff_data.diff;
+ int16_t *diff = xd->diff;
for (y = 0; y < 32; y++) {
for (x = 0; x < 32; x++) {
@@ -130,8 +130,8 @@
void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
int x, y, stride = xd->block[16].dst_stride;
- int16_t *udiff = xd->sb_coeff_data.diff + 1024;
- int16_t *vdiff = xd->sb_coeff_data.diff + 1280;
+ int16_t *udiff = xd->diff + 1024;
+ int16_t *vdiff = xd->diff + 1280;
for (y = 0; y < 16; y++) {
for (x = 0; x < 16; x++) {
@@ -142,6 +142,36 @@
vdst += stride;
udiff += 16;
vdiff += 16;
+ }
+}
+
+void vp9_recon_sb64y_s_c(MACROBLOCKD *xd, uint8_t *dst) {
+ int x, y, stride = xd->block[0].dst_stride;
+ int16_t *diff = xd->diff;
+
+ for (y = 0; y < 64; y++) {
+ for (x = 0; x < 64; x++) {
+ dst[x] = clip_pixel(dst[x] + diff[x]);
+ }
+ dst += stride;
+ diff += 64;
+ }
+}
+
+void vp9_recon_sb64uv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
+ int x, y, stride = xd->block[16].dst_stride;
+ int16_t *udiff = xd->diff + 4096;
+ int16_t *vdiff = xd->diff + 4096 + 1024;
+
+ for (y = 0; y < 32; y++) {
+ for (x = 0; x < 32; x++) {
+ udst[x] = clip_pixel(udst[x] + udiff[x]);
+ vdst[x] = clip_pixel(vdst[x] + vdiff[x]);
+ }
+ udst += stride;
+ vdst += stride;
+ udiff += 32;
+ vdiff += 32;
}
}
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -97,6 +97,12 @@
prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst"
specialize void vp9_recon_sbuv_s
+prototype void vp9_recon_sb64y_s "struct macroblockd *x, uint8_t *dst"
+specialize vp9_recon_sb64y_s
+
+prototype void vp9_recon_sb64uv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst"
+specialize void vp9_recon_sb64uv_s
+
prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
specialize vp9_build_intra_predictors_mby_s
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -452,125 +452,12 @@
}
}
-static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
- BOOL_DECODER* const bc, int n,
- int maska, int shiftb) {
- int x_idx = n & maska, y_idx = n >> shiftb;
- TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_16x16_c(
- tx_type, xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]);
- } else {
- vp9_dequant_idct_add_16x16(
- xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]);
- }
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16,
- xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd);
-};
-
-static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
- BOOL_DECODER* const bc, int n,
- int maska, int shiftb) {
- int x_idx = n & maska, y_idx = n >> shiftb;
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- int i;
- for (i = 0; i < 4; i++) {
- int ib = vp9_i8x8_block[i];
- int idx = (ib & 0x02) ? (ib + 2) : ib;
- int16_t *q = xd->block[idx].qcoeff;
- int16_t *dq = xd->block[0].dequant;
- int stride = xd->dst.y_stride;
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_8x8_c(
- tx_type, q, dq,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- stride, stride, xd->eobs[idx]);
- } else {
- vp9_dequant_idct_add_8x8_c(
- q, dq,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- stride, stride, xd->eobs[idx]);
- }
- }
- } else {
- vp9_dequant_idct_add_y_block_8x8_inplace_c(
- xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd);
- }
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd);
-};
-
-static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
- BOOL_DECODER* const bc, int n,
- int maska, int shiftb) {
- int x_idx = n & maska, y_idx = n >> shiftb;
- TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- int i;
- for (i = 0; i < 16; i++) {
- BLOCKD *b = &xd->block[i];
- tx_type = get_tx_type_4x4(xd, b);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_c(
- tx_type, b->qcoeff, b->dequant,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]);
- } else {
- xd->itxm_add(
- b->qcoeff, b->dequant,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]);
- }
- }
- } else {
- vp9_dequant_idct_add_y_block_4x4_inplace_c(
- xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd);
- }
- vp9_dequant_idct_add_uv_block_4x4_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd);
-};
-
static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
int n, eobtotal;
- TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
VP9_COMMON *const pc = &pbi->common;
- MODE_INFO *orig_mi = xd->mode_info_context;
+ MODE_INFO *mi = xd->mode_info_context;
const int mis = pc->mode_info_stride;
assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64);
@@ -583,21 +470,8 @@
mb_init_dequantizer(pbi, xd);
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
- int n;
+ vp9_reset_sb64_tokens_context(xd);
- vp9_reset_mb_tokens_context(xd);
- for (n = 1; n <= 3; n++) {
- if (mb_col < pc->mb_cols - n)
- xd->above_context += n;
- if (mb_row < pc->mb_rows - n)
- xd->left_context += n;
- vp9_reset_mb_tokens_context(xd);
- if (mb_col < pc->mb_cols - n)
- xd->above_context -= n;
- if (mb_row < pc->mb_rows - n)
- xd->left_context -= n;
- }
-
/* Special case: Force the loopfilter to skip when eobtotal and
* mb_skip_coeff are zero.
*/
@@ -617,74 +491,101 @@
}
/* dequantization and idct */
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
+ eobtotal = vp9_decode_sb64_tokens(pbi, xd, bc);
+ if (eobtotal == 0) { // skip loopfilter
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
- if (mb_col + x_idx * 2 >= pc->mb_cols ||
- mb_row + y_idx * 2 >= pc->mb_rows)
- continue;
-
- xd->left_context = pc->left_context + (y_idx << 1);
- xd->above_context = pc->above_context + mb_col + (x_idx << 1);
- xd->mode_info_context = orig_mi + x_idx * 2 + y_idx * 2 * mis;
- eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
- if (mb_row + 1 < pc->mb_rows) {
- xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
- if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
+ if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows)
+ mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ }
+ } else {
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + x_idx * 32 + y_idx * xd->dst.y_stride * 32,
+ xd->dst.y_buffer + x_idx * 32 + y_idx * xd->dst.y_stride * 32,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]);
}
- } else {
- vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + x_idx * 32 +
- xd->dst.y_stride * y_idx * 32,
- xd->dst.y_buffer + x_idx * 32 +
- xd->dst.y_stride * y_idx * 32,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->eobs[0]);
- vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
- xd->block[16].dequant,
- xd->dst.u_buffer + x_idx * 16 +
- xd->dst.uv_stride * y_idx * 16,
- xd->dst.v_buffer + x_idx * 16 +
- xd->dst.uv_stride * y_idx * 16,
- xd->dst.uv_stride, xd);
- }
+ vp9_dequant_idct_add_32x32(xd->qcoeff + 4096,
+ xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]);
+ vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024,
+ xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
+ break;
+ case TX_16X16: // FIXME(rbultje): adst
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ }
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16,
+ xd->dst.u_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]);
+ vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16,
+ xd->dst.v_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]);
+ }
+ break;
+ case TX_8X8: // FIXME(rbultje): adst
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ }
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 4]);
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096 + 1024,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]);
+ }
+ break;
+ case TX_4X4: // FIXME(rbultje): adst
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+ xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ }
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ xd->itxm_add(xd->qcoeff + 4096 + n * 16,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n]);
+ xd->itxm_add(xd->qcoeff + 4096 + 1024 + n * 16,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n]);
+ }
+ break;
+ default: assert(0);
}
- } else {
- for (n = 0; n < 16; n++) {
- int x_idx = n & 3, y_idx = n >> 2;
-
- if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows)
- continue;
-
- xd->above_context = pc->above_context + mb_col + x_idx;
- xd->left_context = pc->left_context + y_idx;
- xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
-
- eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- continue;
- }
-
- if (tx_size == TX_16X16) {
- decode_16x16_sb(pbi, xd, bc, n, 3, 2);
- } else if (tx_size == TX_8X8) {
- decode_8x8_sb(pbi, xd, bc, n, 3, 2);
- } else {
- decode_4x4_sb(pbi, xd, bc, n, 3, 2);
- }
- }
}
-
- xd->above_context = pc->above_context + mb_col;
- xd->left_context = pc->left_context;
- xd->mode_info_context = orig_mi;
}
static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -691,9 +592,7 @@
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
int n, eobtotal;
- TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
VP9_COMMON *const pc = &pbi->common;
- MODE_INFO *orig_mi = xd->mode_info_context;
const int mis = pc->mode_info_stride;
assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32);
@@ -706,16 +605,7 @@
mb_init_dequantizer(pbi, xd);
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
- vp9_reset_mb_tokens_context(xd);
- if (mb_col < pc->mb_cols - 1)
- xd->above_context++;
- if (mb_row < pc->mb_rows - 1)
- xd->left_context++;
- vp9_reset_mb_tokens_context(xd);
- if (mb_col < pc->mb_cols - 1)
- xd->above_context--;
- if (mb_row < pc->mb_rows - 1)
- xd->left_context--;
+ vp9_reset_sb_tokens_context(xd);
/* Special case: Force the loopfilter to skip when eobtotal and
* mb_skip_coeff are zero.
@@ -736,56 +626,90 @@
}
/* dequantization and idct */
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
+ if (eobtotal == 0) { // skip loopfilter
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ if (mb_col + 1 < pc->mb_cols)
+ xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
+ if (mb_row + 1 < pc->mb_rows) {
+ xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
- if (mb_row + 1 < pc->mb_rows) {
- xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
- if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
- }
- } else {
- vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer, xd->dst.y_buffer,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->eobs[0]);
- vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
- xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd);
+ xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
}
} else {
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
-
- if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows)
- continue;
-
- xd->above_context = pc->above_context + mb_col + x_idx;
- xd->left_context = pc->left_context + y_idx + (mb_row & 2);
- xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
-
- eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- continue;
- }
-
- if (tx_size == TX_16X16) {
- decode_16x16_sb(pbi, xd, bc, n, 1, 1);
- } else if (tx_size == TX_8X8) {
- decode_8x8_sb(pbi, xd, bc, n, 1, 1);
- } else {
- decode_4x4_sb(pbi, xd, bc, n, 1, 1);
- }
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->dst.y_stride,
+ xd->eobs[0]);
+ vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
+ xd->block[16].dequant,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride, xd);
+ break;
+ case TX_16X16: // FIXME(rbultje): adst
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ vp9_dequant_idct_add_16x16(
+ xd->qcoeff + n * 256, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ }
+ vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
+ xd->block[16].dequant,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride, xd);
+ break;
+ case TX_8X8: // FIXME(rbultje): adst
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ }
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1024,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n * 4]);
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1280,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]);
+ }
+ break;
+ case TX_4X4: // FIXME(rbultje): adst
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ }
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ xd->itxm_add(xd->qcoeff + 1024 + n * 16,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n]);
+ xd->itxm_add(xd->qcoeff + 1280 + n * 16,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n]);
+ }
+ break;
+ default: assert(0);
}
-
- xd->above_context = pc->above_context + mb_col;
- xd->left_context = pc->left_context + (mb_row & 2);
- xd->mode_info_context = orig_mi;
}
}
@@ -1187,7 +1111,7 @@
read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES);
}
if (pbi->common.txfm_mode > ALLOW_16X16) {
- read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32);
+ read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES);
}
}
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -354,7 +354,7 @@
int stride,
MACROBLOCKD *xd) {
vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
- xd->eobs[16]);
+ xd->eobs[64]);
vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
- xd->eobs[20]);
+ xd->eobs[80]);
}
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -90,9 +90,8 @@
const int *const scan, TX_SIZE txfm_size) {
ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context;
ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context;
- const int aidx = vp9_block2above[txfm_size][block_idx];
- const int lidx = vp9_block2left[txfm_size][block_idx];
- ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0;
+ int aidx, lidx;
+ ENTROPY_CONTEXT above_ec, left_ec;
FRAME_CONTEXT *const fc = &dx->common.fc;
int recent_energy = 0;
int pt, c = 0;
@@ -101,9 +100,22 @@
vp9_coeff_count *coef_counts;
const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ aidx = vp9_block2above_sb64[txfm_size][block_idx];
+ lidx = vp9_block2left_sb64[txfm_size][block_idx];
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ aidx = vp9_block2above_sb[txfm_size][block_idx];
+ lidx = vp9_block2left_sb[txfm_size][block_idx];
+ } else {
+ aidx = vp9_block2above[txfm_size][block_idx];
+ lidx = vp9_block2left[txfm_size][block_idx];
+ }
+
switch (txfm_size) {
default:
case TX_4X4:
+ above_ec = A0[aidx] != 0;
+ left_ec = L0[lidx] != 0;
coef_probs = fc->coef_probs_4x4;
coef_counts = fc->coef_counts_4x4;
break;
@@ -240,7 +252,7 @@
if (type == PLANE_TYPE_UV) {
ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
- A1[aidx] = A1[aidx + 1] = L1[aidx] = L1[lidx + 1] = A0[aidx];
+ A1[aidx] = A1[aidx + 1] = L1[lidx] = L1[lidx + 1] = A0[aidx];
if (txfm_size >= TX_32X32) {
ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2);
ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2);
@@ -272,24 +284,181 @@
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- int i, eobtotal = 0, seg_eob;
+ int i, eobtotal = 0, seg_eob, c;
- // Luma block
- int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ // Luma block
+ c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
DCT_DCT, get_eob(xd, segment_id, 1024),
- xd->sb_coeff_data.qcoeff,
- vp9_default_zig_zag1d_32x32, TX_32X32);
- xd->eobs[0] = c;
- eobtotal += c;
+ xd->qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32);
+ xd->eobs[0] = c;
+ eobtotal += c;
- // 16x16 chroma blocks
- seg_eob = get_eob(xd, segment_id, 256);
- for (i = 16; i < 24; i += 4) {
- c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
- xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
- vp9_default_zig_zag1d_16x16, TX_16X16);
- xd->eobs[i] = c;
- eobtotal += c;
+ // 16x16 chroma blocks
+ seg_eob = get_eob(xd, segment_id, 256);
+ for (i = 64; i < 96; i += 16) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_16x16, TX_16X16);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ case TX_16X16:
+ // 16x16 luma blocks
+ seg_eob = get_eob(xd, segment_id, 256);
+ for (i = 0; i < 64; i += 16) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_16x16, TX_16X16);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 16x16 chroma blocks
+ for (i = 64; i < 96; i += 16) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_16x16, TX_16X16);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ case TX_8X8:
+ // 8x8 luma blocks
+ seg_eob = get_eob(xd, segment_id, 64);
+ for (i = 0; i < 64; i += 4) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_8x8, TX_8X8);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 8x8 chroma blocks
+ for (i = 64; i < 96; i += 4) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_8x8, TX_8X8);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ case TX_4X4:
+ // 4x4 luma blocks
+ seg_eob = get_eob(xd, segment_id, 16);
+ for (i = 0; i < 64; i++) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_4x4, TX_4X4);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 4x4 chroma blocks
+ for (i = 64; i < 96; i++) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_4x4, TX_4X4);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ default: assert(0);
+ }
+
+ return eobtotal;
+}
+
+int vp9_decode_sb64_tokens(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ const int segment_id = xd->mode_info_context->mbmi.segment_id;
+ int i, eobtotal = 0, seg_eob, c;
+
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ // Luma block
+ seg_eob = get_eob(xd, segment_id, 1024);
+ for (i = 0; i < 256; i += 64) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_32x32, TX_32X32);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 32x32 chroma blocks
+ for (i = 256; i < 384; i += 64) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_32x32, TX_32X32);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ case TX_16X16:
+ // 16x16 luma blocks
+ seg_eob = get_eob(xd, segment_id, 256);
+ for (i = 0; i < 256; i += 16) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_16x16, TX_16X16);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 16x16 chroma blocks
+ for (i = 256; i < 384; i += 16) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_16x16, TX_16X16);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ case TX_8X8:
+ // 8x8 luma blocks
+ seg_eob = get_eob(xd, segment_id, 64);
+ for (i = 0; i < 256; i += 4) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_8x8, TX_8X8);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 8x8 chroma blocks
+ for (i = 256; i < 384; i += 4) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_8x8, TX_8X8);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ case TX_4X4:
+ // 4x4 luma blocks
+ seg_eob = get_eob(xd, segment_id, 16);
+ for (i = 0; i < 256; i++) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ DCT_DCT, seg_eob, xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_4x4, TX_4X4);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+
+ // 4x4 chroma blocks
+ for (i = 256; i < 384; i++) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
+ xd->qcoeff + i * 16,
+ vp9_default_zig_zag1d_4x4, TX_4X4);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ break;
+ default: assert(0);
}
return eobtotal;
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -14,8 +14,6 @@
#include "vp9/decoder/vp9_onyxd_int.h"
-void vp9_reset_mb_tokens_context(MACROBLOCKD* const);
-
int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
BOOL_DECODER* const bc,
PLANE_TYPE type, int i);
@@ -26,6 +24,10 @@
int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc);
+
+int vp9_decode_sb64_tokens(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc);
int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd,
BOOL_DECODER* const bc);
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -45,7 +45,7 @@
vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES];
vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES];
vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES];
-vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
+vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES];
extern unsigned int active_section;
#endif
@@ -1229,7 +1229,7 @@
#ifdef ENTROPY_STATS
cpi, context_counters_32x32,
#endif
- cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32);
+ cpi->frame_branch_ct_32x32, BLOCK_TYPES);
}
static void update_coef_probs_common(vp9_writer* const bc,
@@ -1388,7 +1388,7 @@
cpi->frame_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32,
cpi->frame_branch_ct_32x32,
- BLOCK_TYPES_32X32);
+ BLOCK_TYPES);
}
}
@@ -2103,13 +2103,13 @@
fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n");
print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES,
- "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]");
+ "vp9_coef_update_probs_4x4[BLOCK_TYPES]");
print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES,
- "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]");
+ "vp9_coef_update_probs_8x8[BLOCK_TYPES]");
print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES,
- "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]");
- print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32,
- "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]");
+ "vp9_coef_update_probs_16x16[BLOCK_TYPES]");
+ print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES,
+ "vp9_coef_update_probs_32x32[BLOCK_TYPES]");
fclose(f);
f = fopen("treeupdate.bin", "wb");
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -83,19 +83,12 @@
int64_t txfm_rd_diff[NB_TXFM_MODES];
} PICK_MODE_CONTEXT;
-typedef struct superblock {
- DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]);
- DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]);
-} SUPERBLOCK;
-
typedef struct macroblock MACROBLOCK;
struct macroblock {
- DECLARE_ALIGNED(16, int16_t, src_diff[384]); // 16x16 Y 8x8 U 8x8 V
- DECLARE_ALIGNED(16, int16_t, coeff[384]); // 16x16 Y 8x8 U 8x8 V
+ DECLARE_ALIGNED(16, int16_t, src_diff[64*64+32*32*2]);
+ DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]);
// 16 Y blocks, 4 U blocks, 4 V blocks,
BLOCK block[24];
-
- SUPERBLOCK sb_coeff_data;
YV12_BUFFER_CONFIG src;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1820,63 +1820,6 @@
#endif
}
-static void update_sb_skip_coeff_state(VP9_COMP *cpi,
- ENTROPY_CONTEXT_PLANES ta[4],
- ENTROPY_CONTEXT_PLANES tl[4],
- TOKENEXTRA *t[4],
- TOKENEXTRA **tp,
- int skip[4], int output_enabled) {
- MACROBLOCK *const x = &cpi->mb;
- TOKENEXTRA tokens[4][16 * 25];
- int n_tokens[4], n;
-
- // if there were no skips, we don't need to do anything
- if (!skip[0] && !skip[1] && !skip[2] && !skip[3])
- return;
-
- // if we don't do coeff skipping for this frame, we don't
- // need to do anything here
- if (!cpi->common.mb_no_coeff_skip)
- return;
-
- // if all 4 MBs skipped coeff coding, nothing to be done
- if (skip[0] && skip[1] && skip[2] && skip[3])
- return;
-
- // so the situation now is that we want to skip coeffs
- // for some MBs, but not all, and we didn't code EOB
- // coefficients for them. However, the skip flag for this
- // SB will be 0 overall, so we need to insert EOBs in the
- // middle of the token tree. Do so here.
- n_tokens[0] = t[1] - t[0];
- n_tokens[1] = t[2] - t[1];
- n_tokens[2] = t[3] - t[2];
- n_tokens[3] = *tp - t[3];
- if (n_tokens[0])
- memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0]));
- if (n_tokens[1])
- memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0]));
- if (n_tokens[2])
- memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0]));
- if (n_tokens[3])
- memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0]));
-
- // reset pointer, stuff EOBs where necessary
- *tp = t[0];
- for (n = 0; n < 4; n++) {
- if (skip[n]) {
- x->e_mbd.above_context = &ta[n];
- x->e_mbd.left_context = &tl[n];
- vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled);
- } else {
- if (n_tokens[n]) {
- memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]);
- }
- (*tp) += n_tokens[n];
- }
- }
-}
-
static void update_sb64_skip_coeff_state(VP9_COMP *cpi,
ENTROPY_CONTEXT_PLANES ta[16],
ENTROPY_CONTEXT_PLANES tl[16],
@@ -1994,7 +1937,9 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ MODE_INFO *mi = xd->mode_info_context;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const int mis = cm->mode_info_stride;
unsigned char ref_pred_flag;
assert(!xd->mode_info_context->mbmi.sb_type);
@@ -2190,12 +2135,11 @@
vp9_tokenize_mb(cpi, xd, t, !output_enabled);
} else {
- int mb_skip_context =
- cpi->common.mb_no_coeff_skip ?
- (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
- (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff :
- 0;
- if (cpi->common.mb_no_coeff_skip) {
+ // FIXME(rbultje): not tile-aware (mi - 1)
+ int mb_skip_context = cpi->common.mb_no_coeff_skip ?
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
+
+ if (cm->mb_no_coeff_skip) {
mbmi->mb_skip_coeff = 1;
if (output_enabled)
cpi->skip_true_count[mb_skip_context]++;
@@ -2250,12 +2194,8 @@
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
unsigned char ref_pred_flag;
- int n;
- TOKENEXTRA *tp[4];
- int skip[4];
MODE_INFO *mi = x->e_mbd.mode_info_context;
unsigned int segment_id = mi->mbmi.segment_id;
- ENTROPY_CONTEXT_PLANES ta[4], tl[4];
const int mis = cm->mode_info_stride;
if (cm->frame_type == KEY_FRAME) {
@@ -2342,118 +2282,101 @@
mb_row, mb_col);
}
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- if (!x->skip) {
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
- dst, dst_y_stride);
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
- usrc, vsrc, src_uv_stride,
- udst, vdst, dst_uv_stride);
- vp9_transform_sby_32x32(x);
- vp9_transform_sbuv_16x16(x);
- vp9_quantize_sby_32x32(x);
- vp9_quantize_sbuv_16x16(x);
- // TODO(rbultje): trellis optimize
- vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data);
- vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data);
- vp9_recon_sby_s_c(&x->e_mbd, dst);
- vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst);
-
- vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled);
- } else {
- int mb_skip_context =
- cpi->common.mb_no_coeff_skip ?
- (mi - 1)->mbmi.mb_skip_coeff +
- (mi - mis)->mbmi.mb_skip_coeff :
- 0;
- mi->mbmi.mb_skip_coeff = 1;
- if (cm->mb_no_coeff_skip) {
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_fix_contexts_sb(xd);
- } else {
- vp9_stuff_sb(cpi, xd, t, !output_enabled);
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
+ if (!x->skip) {
+ vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride,
+ dst, dst_y_stride);
+ vp9_subtract_sbuv_s_c(x->src_diff,
+ usrc, vsrc, src_uv_stride,
+ udst, vdst, dst_uv_stride);
+ switch (mi->mbmi.txfm_size) {
+ case TX_32X32:
+ vp9_transform_sby_32x32(x);
+ vp9_transform_sbuv_16x16(x);
+ vp9_quantize_sby_32x32(x);
+ vp9_quantize_sbuv_16x16(x);
+ if (x->optimize) {
+ vp9_optimize_sby_32x32(x);
+ vp9_optimize_sbuv_16x16(x);
+ }
+ vp9_inverse_transform_sby_32x32(xd);
+ vp9_inverse_transform_sbuv_16x16(xd);
+ break;
+ case TX_16X16:
+ vp9_transform_sby_16x16(x);
+ vp9_transform_sbuv_16x16(x);
+ vp9_quantize_sby_16x16(x);
+ vp9_quantize_sbuv_16x16(x);
+ if (x->optimize) {
+ vp9_optimize_sby_16x16(x);
+ vp9_optimize_sbuv_16x16(x);
+ }
+ vp9_inverse_transform_sby_16x16(xd);
+ vp9_inverse_transform_sbuv_16x16(xd);
+ break;
+ case TX_8X8:
+ vp9_transform_sby_8x8(x);
+ vp9_transform_sbuv_8x8(x);
+ vp9_quantize_sby_8x8(x);
+ vp9_quantize_sbuv_8x8(x);
+ if (x->optimize) {
+ vp9_optimize_sby_8x8(x);
+ vp9_optimize_sbuv_8x8(x);
+ }
+ vp9_inverse_transform_sby_8x8(xd);
+ vp9_inverse_transform_sbuv_8x8(xd);
+ break;
+ case TX_4X4:
+ vp9_transform_sby_4x4(x);
+ vp9_transform_sbuv_4x4(x);
+ vp9_quantize_sby_4x4(x);
+ vp9_quantize_sbuv_4x4(x);
+ if (x->optimize) {
+ vp9_optimize_sby_4x4(x);
+ vp9_optimize_sbuv_4x4(x);
+ }
+ vp9_inverse_transform_sby_4x4(xd);
+ vp9_inverse_transform_sbuv_4x4(xd);
+ break;
+ default: assert(0);
}
+ vp9_recon_sby_s_c(xd, dst);
+ vp9_recon_sbuv_s_c(xd, udst, vdst);
- // copy skip flag on all mb_mode_info contexts in this SB
- // if this was a skip at this txfm size
- if (mb_col < cm->mb_cols - 1)
- mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- if (mb_row < cm->mb_rows - 1) {
- mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- if (mb_col < cm->mb_cols - 1)
- mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- }
- skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff;
+ vp9_tokenize_sb(cpi, xd, t, !output_enabled);
} else {
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
+ // FIXME(rbultje): not tile-aware (mi - 1)
+ int mb_skip_context = cm->mb_no_coeff_skip ?
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
- xd->left_context = cm->left_context + y_idx + (mb_row & 2);
- xd->above_context = cm->above_context + mb_col + x_idx;
- memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
- memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
- tp[n] = *t;
- xd->mode_info_context = mi + x_idx + y_idx * mis;
-
- if (!x->skip) {
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
- vp9_subtract_mbuv_s_c(x->src_diff,
- usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- src_uv_stride,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- dst_uv_stride);
- vp9_fidct_mb(x);
- vp9_recon_mby_s_c(&x->e_mbd,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
- vp9_recon_mbuv_s_c(&x->e_mbd,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
-
- vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled);
- skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
- } else {
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
- (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff :
- 0;
- xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1;
- if (cpi->common.mb_no_coeff_skip) {
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_mb_tokens_context(xd);
- } else {
- vp9_stuff_mb(cpi, xd, t, !output_enabled);
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
- }
+ mi->mbmi.mb_skip_coeff = 1;
+ if (cm->mb_no_coeff_skip) {
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_reset_sb_tokens_context(xd);
+ } else {
+ vp9_stuff_sb(cpi, xd, t, !output_enabled);
+ if (output_enabled)
+ cpi->skip_false_count[mb_skip_context]++;
}
+ }
- xd->mode_info_context = mi;
- update_sb_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled);
+ // copy skip flag on all mb_mode_info contexts in this SB
+ // if this was a skip at this txfm size
+ if (mb_col < cm->mb_cols - 1)
+ mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ if (mb_row < cm->mb_rows - 1) {
+ mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ if (mb_col < cm->mb_cols - 1)
+ mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
}
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
- !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) ||
+ !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) ||
(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
- TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ?
- TX_32X32 :
- cm->txfm_mode;
+ TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
mi->mbmi.txfm_size = sz;
if (mb_col < cm->mb_cols - 1)
mi[1].mbmi.txfm_size = sz;
@@ -2481,11 +2404,8 @@
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
unsigned char ref_pred_flag;
int n;
- TOKENEXTRA *tp[16];
- int skip[16];
MODE_INFO *mi = x->e_mbd.mode_info_context;
unsigned int segment_id = mi->mbmi.segment_id;
- ENTROPY_CONTEXT_PLANES ta[16], tl[16];
const int mis = cm->mode_info_stride;
if (cm->frame_type == KEY_FRAME) {
@@ -2571,149 +2491,99 @@
mb_row, mb_col);
}
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- int n;
+ if (!x->skip) {
+ vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
+ vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
+ udst, vdst, dst_uv_stride);
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
-
- xd->mode_info_context = mi + x_idx * 2 + mis * y_idx * 2;
- xd->left_context = cm->left_context + (y_idx << 1);
- xd->above_context = cm->above_context + mb_col + (x_idx << 1);
- memcpy(&ta[n * 2], xd->above_context, sizeof(*ta) * 2);
- memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2);
- tp[n] = *t;
- xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2;
- if (!x->skip) {
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
- src + x_idx * 32 + y_idx * 32 * src_y_stride,
- src_y_stride,
- dst + x_idx * 32 + y_idx * 32 * dst_y_stride,
- dst_y_stride);
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
- usrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
- vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
- src_uv_stride,
- udst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
- vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
- dst_uv_stride);
- vp9_transform_sby_32x32(x);
- vp9_transform_sbuv_16x16(x);
- vp9_quantize_sby_32x32(x);
- vp9_quantize_sbuv_16x16(x);
- // TODO(rbultje): trellis optimize
- vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data);
- vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data);
- vp9_recon_sby_s_c(&x->e_mbd,
- dst + 32 * x_idx + 32 * y_idx * dst_y_stride);
- vp9_recon_sbuv_s_c(&x->e_mbd,
- udst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
- vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride);
-
- vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled);
- } else {
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (mi - 1)->mbmi.mb_skip_coeff +
- (mi - mis)->mbmi.mb_skip_coeff : 0;
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- if (cm->mb_no_coeff_skip) {
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_fix_contexts_sb(xd);
- } else {
- vp9_stuff_sb(cpi, xd, t, !output_enabled);
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ vp9_transform_sb64y_32x32(x);
+ vp9_transform_sb64uv_32x32(x);
+ vp9_quantize_sb64y_32x32(x);
+ vp9_quantize_sb64uv_32x32(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_32x32(x);
+ vp9_optimize_sb64uv_32x32(x);
}
- }
-
- // copy skip flag on all mb_mode_info contexts in this SB
- // if this was a skip at this txfm size
- if (mb_col + x_idx * 2 < cm->mb_cols - 1)
- mi[mis * y_idx * 2 + x_idx * 2 + 1].mbmi.mb_skip_coeff =
- mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff;
- if (mb_row + y_idx * 2 < cm->mb_rows - 1) {
- mi[mis * y_idx * 2 + x_idx * 2 + mis].mbmi.mb_skip_coeff =
- mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff;
- if (mb_col + x_idx * 2 < cm->mb_cols - 1)
- mi[mis * y_idx * 2 + x_idx * 2 + mis + 1].mbmi.mb_skip_coeff =
- mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff;
- }
- skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
+ vp9_inverse_transform_sb64y_32x32(xd);
+ vp9_inverse_transform_sb64uv_32x32(xd);
+ break;
+ case TX_16X16:
+ vp9_transform_sb64y_16x16(x);
+ vp9_transform_sb64uv_16x16(x);
+ vp9_quantize_sb64y_16x16(x);
+ vp9_quantize_sb64uv_16x16(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_16x16(x);
+ vp9_optimize_sb64uv_16x16(x);
+ }
+ vp9_inverse_transform_sb64y_16x16(xd);
+ vp9_inverse_transform_sb64uv_16x16(xd);
+ break;
+ case TX_8X8:
+ vp9_transform_sb64y_8x8(x);
+ vp9_transform_sb64uv_8x8(x);
+ vp9_quantize_sb64y_8x8(x);
+ vp9_quantize_sb64uv_8x8(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_8x8(x);
+ vp9_optimize_sb64uv_8x8(x);
+ }
+ vp9_inverse_transform_sb64y_8x8(xd);
+ vp9_inverse_transform_sb64uv_8x8(xd);
+ break;
+ case TX_4X4:
+ vp9_transform_sb64y_4x4(x);
+ vp9_transform_sb64uv_4x4(x);
+ vp9_quantize_sb64y_4x4(x);
+ vp9_quantize_sb64uv_4x4(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_4x4(x);
+ vp9_optimize_sb64uv_4x4(x);
+ }
+ vp9_inverse_transform_sb64y_4x4(xd);
+ vp9_inverse_transform_sb64uv_4x4(xd);
+ break;
+ default: assert(0);
}
+ vp9_recon_sb64y_s_c(xd, dst);
+ vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst);
+
+ vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled);
} else {
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
+ // FIXME(rbultje): not tile-aware (mi - 1)
+ int mb_skip_context = cpi->common.mb_no_coeff_skip ?
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
- xd->left_context = cm->left_context + y_idx;
- xd->above_context = cm->above_context + mb_col + x_idx;
- memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
- memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
- tp[n] = *t;
- xd->mode_info_context = mi + x_idx + y_idx * mis;
-
- if (!x->skip) {
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
- vp9_subtract_mbuv_s_c(x->src_diff,
- usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- src_uv_stride,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- dst_uv_stride);
- vp9_fidct_mb(x);
- vp9_recon_mby_s_c(&x->e_mbd,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
- vp9_recon_mbuv_s_c(&x->e_mbd,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
-
- vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled);
- skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
- } else {
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
- (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0;
- xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1;
- if (cpi->common.mb_no_coeff_skip) {
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_mb_tokens_context(xd);
- } else {
- vp9_stuff_mb(cpi, xd, t, !output_enabled);
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
- }
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ if (cm->mb_no_coeff_skip) {
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_reset_sb64_tokens_context(xd);
+ } else {
+ vp9_stuff_sb64(cpi, xd, t, !output_enabled);
+ if (output_enabled)
+ cpi->skip_false_count[mb_skip_context]++;
}
}
- xd->mode_info_context = mi;
- update_sb64_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled);
+ // copy skip flag on all mb_mode_info contexts in this SB
+ // if this was a skip at this txfm size
+ for (n = 1; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ if (mb_col + x_idx < cm->mb_cols && mb_row + y_idx < cm->mb_rows)
+ mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ }
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
- !((cm->mb_no_coeff_skip &&
- ((mi->mbmi.txfm_size == TX_32X32 &&
- skip[0] && skip[1] && skip[2] && skip[3]) ||
- (mi->mbmi.txfm_size != TX_32X32 &&
- skip[0] && skip[1] && skip[2] && skip[3] &&
- skip[4] && skip[5] && skip[6] && skip[7] &&
- skip[8] && skip[9] && skip[10] && skip[11] &&
- skip[12] && skip[13] && skip[14] && skip[15]))) ||
+ !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) ||
(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
int x, y;
- TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ?
- TX_32X32 :
- cm->txfm_mode;
+ TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) {
if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) {
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -58,7 +58,8 @@
} else {
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(x, ib);
- vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
+ vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
+ b->dqcoeff, b->diff, 32);
}
vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -174,13 +175,16 @@
} else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
- vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
- vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
+ b->dqcoeff, b->diff, 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1],
+ (b + 1)->dqcoeff, (b + 1)->diff, 32);
i++;
} else {
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(x, ib + iblock[i]);
- vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
+ b->dqcoeff, b->diff, 32);
}
}
}
@@ -210,7 +214,8 @@
x->fwd_txm4x4(be->src_diff, be->coeff, 16);
x->quantize_b_4x4(x, ib);
- vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
+ vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
+ b->dqcoeff, b->diff, 16);
vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
b->dst_stride);
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -146,6 +146,50 @@
}
}
+void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride,
+ const uint8_t *pred, int dst_stride) {
+ int r, c;
+
+ for (r = 0; r < 64; r++) {
+ for (c = 0; c < 64; c++) {
+ diff[c] = src[c] - pred[c];
+ }
+
+ diff += 64;
+ pred += dst_stride;
+ src += src_stride;
+ }
+}
+
+void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc,
+ const uint8_t *vsrc, int src_stride,
+ const uint8_t *upred,
+ const uint8_t *vpred, int dst_stride) {
+ int16_t *udiff = diff + 4096;
+ int16_t *vdiff = diff + 4096 + 1024;
+ int r, c;
+
+ for (r = 0; r < 32; r++) {
+ for (c = 0; c < 32; c++) {
+ udiff[c] = usrc[c] - upred[c];
+ }
+
+ udiff += 32;
+ upred += dst_stride;
+ usrc += src_stride;
+ }
+
+ for (r = 0; r < 32; r++) {
+ for (c = 0; c < 32; c++) {
+ vdiff[c] = vsrc[c] - vpred[c];
+ }
+
+ vdiff += 32;
+ vpred += dst_stride;
+ vsrc += src_stride;
+ }
+}
+
void vp9_subtract_mby_c(int16_t *diff, uint8_t *src,
uint8_t *pred, int stride) {
vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
@@ -245,17 +289,170 @@
}
void vp9_transform_sby_32x32(MACROBLOCK *x) {
- SUPERBLOCK * const x_sb = &x->sb_coeff_data;
- vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64);
+ vp9_short_fdct32x32(x->src_diff, x->coeff, 64);
}
+void vp9_transform_sby_16x16(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + n * 256, 64);
+ }
+}
+
+void vp9_transform_sby_8x8(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + n * 64, 64);
+ }
+}
+
+void vp9_transform_sby_4x4(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + n * 16, 64);
+ }
+}
+
void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
- SUPERBLOCK * const x_sb = &x->sb_coeff_data;
vp9_clear_system_state();
- x->fwd_txm16x16(x_sb->src_diff + 1024, x_sb->coeff + 1024, 32);
- x->fwd_txm16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32);
+ x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32);
+ x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32);
}
+void vp9_transform_sbuv_8x8(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8,
+ x->coeff + 1024 + n * 64, 32);
+ x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8,
+ x->coeff + 1280 + n * 64, 32);
+ }
+}
+
+void vp9_transform_sbuv_4x4(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4,
+ x->coeff + 1024 + n * 16, 32);
+ x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4,
+ x->coeff + 1280 + n * 16, 32);
+ }
+}
+
+void vp9_transform_sb64y_32x32(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32,
+ x->coeff + n * 1024, 128);
+ }
+}
+
+void vp9_transform_sb64y_16x16(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
+ x->coeff + n * 256, 128);
+ }
+}
+
+void vp9_transform_sb64y_8x8(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
+ x->coeff + n * 64, 128);
+ }
+}
+
+void vp9_transform_sb64y_4x4(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+
+ x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
+ x->coeff + n * 16, 128);
+ }
+}
+
+void vp9_transform_sb64uv_32x32(MACROBLOCK *x) {
+ vp9_clear_system_state();
+ vp9_short_fdct32x32(x->src_diff + 4096,
+ x->coeff + 4096, 64);
+ vp9_short_fdct32x32(x->src_diff + 4096 + 1024,
+ x->coeff + 4096 + 1024, 64);
+}
+
+void vp9_transform_sb64uv_16x16(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + 4096 + n * 256, 64);
+ x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + 4096 + 1024 + n * 256, 64);
+ }
+}
+
+void vp9_transform_sb64uv_8x8(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + 4096 + n * 64, 64);
+ x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + 4096 + 1024 + n * 64, 64);
+ }
+}
+
+void vp9_transform_sb64uv_4x4(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + 4096 + n * 16, 64);
+ x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + 4096 + 1024 + n * 16, 64);
+ }
+}
+
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp9_token_state vp9_token_state;
@@ -294,21 +491,20 @@
return vp9_get_coef_context(&recent_energy, token);
}
-static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
+static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type,
+ const int16_t *dequant_ptr,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *b = &mb->block[i];
- BLOCKD *d = &xd->block[i];
- vp9_token_state tokens[257][2];
- unsigned best_index[257][2];
- const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
- int eob = xd->eobs[i], final_eob, sz = 0;
+ vp9_token_state tokens[1025][2];
+ unsigned best_index[1025][2];
+ const int16_t *coeff_ptr = mb->coeff + ib * 16;
+ int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16;
+ int eob = xd->eobs[ib], final_eob, sz = 0;
const int i0 = 0;
- int rc, x, next;
+ int rc, x, next, i;
int64_t rdmult, rddiv, rd_cost0, rd_cost1;
int rate0, rate1, error0, error1, t0, t1;
int best, band, pt;
@@ -315,34 +511,15 @@
int err_mult = plane_rd_mult[type];
int default_eob;
int const *scan;
+ const int mul = 1 + (tx_size == TX_32X32);
switch (tx_size) {
default:
case TX_4X4:
- scan = vp9_default_zig_zag1d_4x4;
default_eob = 16;
- // TODO: this isn't called (for intra4x4 modes), but will be left in
- // since it could be used later
- {
- TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d);
- if (tx_type != DCT_DCT) {
- switch (tx_type) {
- case ADST_DCT:
- scan = vp9_row_scan_4x4;
- break;
-
- case DCT_ADST:
- scan = vp9_col_scan_4x4;
- break;
-
- default:
- scan = vp9_default_zig_zag1d_4x4;
- break;
- }
- } else {
- scan = vp9_default_zig_zag1d_4x4;
- }
- }
+ // FIXME(rbultje): although optimize_b currently isn't called for
+ // intra4x4, this should be changed to be adst-compatible
+ scan = vp9_default_zig_zag1d_4x4;
break;
case TX_8X8:
scan = vp9_default_zig_zag1d_8x8;
@@ -352,6 +529,10 @@
scan = vp9_default_zig_zag1d_16x16;
default_eob = 256;
break;
+ case TX_32X32:
+ scan = vp9_default_zig_zag1d_32x32;
+ default_eob = 1024;
+ break;
}
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -395,7 +576,7 @@
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = *(vp9_dct_value_cost_ptr + x);
- dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+ dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
d2 = dx * dx;
tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
tokens[i][0].error = d2 + (best ? error1 : error0);
@@ -407,8 +588,9 @@
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
- (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
+ if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
+ (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
+ dequant_ptr[rc != 0]))
shortcut = 1;
else
shortcut = 0;
@@ -504,7 +686,7 @@
final_eob = i;
rc = scan[i];
qcoeff_ptr[rc] = x;
- dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);
+ dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
next = tokens[i][best].next;
best = best_index[i][best];
@@ -511,7 +693,7 @@
}
final_eob++;
- xd->eobs[d - xd->block] = final_eob;
+ xd->eobs[ib] = final_eob;
*a = *l = (final_eob > 0);
}
@@ -531,7 +713,7 @@
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 0; b < 16; b++) {
- optimize_b(x, b, PLANE_TYPE_Y_WITH_DC,
+ optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
@@ -553,7 +735,7 @@
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 24; b++) {
- optimize_b(x, b, PLANE_TYPE_UV,
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
@@ -583,7 +765,8 @@
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8);
+ optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
+ &above_ec, &left_ec, TX_8X8);
a[1] = a[0] = above_ec;
l[1] = l[0] = left_ec;
}
@@ -602,7 +785,8 @@
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
- optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8);
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
+ &above_ec, &left_ec, TX_8X8);
}
}
@@ -621,12 +805,340 @@
ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
- optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16);
+ optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ &ta, &tl, TX_16X16);
}
static void optimize_mb_16x16(MACROBLOCK *x) {
vp9_optimize_mby_16x16(x);
vp9_optimize_mbuv_8x8(x);
+}
+
+void vp9_optimize_sby_32x32(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT ta, tl;
+
+ ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ &ta, &tl, TX_32X32);
+}
+
+void vp9_optimize_sby_16x16(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT ta[2], tl[2];
+ int n;
+
+ ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
+ ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
+ tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_16X16);
+ }
+}
+
+void vp9_optimize_sby_8x8(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT ta[4], tl[4];
+ int n;
+
+ ta[0] = (a[0] + a[1]) != 0;
+ ta[1] = (a[2] + a[3]) != 0;
+ ta[2] = (a1[0] + a1[1]) != 0;
+ ta[3] = (a1[2] + a1[3]) != 0;
+ tl[0] = (l[0] + l[1]) != 0;
+ tl[1] = (l[2] + l[3]) != 0;
+ tl[2] = (l1[0] + l1[1]) != 0;
+ tl[3] = (l1[2] + l1[3]) != 0;
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_8X8);
+ }
+}
+
+void vp9_optimize_sby_4x4(MACROBLOCK *x) {
+ ENTROPY_CONTEXT ta[8], tl[8];
+ int n;
+
+ vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_4X4);
+ }
+}
+
+void vp9_optimize_sbuv_16x16(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
+ int b;
+
+ for (b = 64; b < 96; b += 16) {
+ const int cidx = b >= 80 ? 20 : 16;
+ a = ta + vp9_block2above_sb[TX_16X16][b];
+ l = tl + vp9_block2left_sb[TX_16X16][b];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
+ left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_16X16);
+ }
+}
+
+void vp9_optimize_sbuv_8x8(MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l, above_ec, left_ec;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 64; b < 96; b += 4) {
+ const int cidx = b >= 80 ? 20 : 16;
+ a = ta + vp9_block2above_sb[TX_8X8][b];
+ l = tl + vp9_block2left_sb[TX_8X8][b];
+ above_ec = (a[0] + a[1]) != 0;
+ left_ec = (l[0] + l[1]) != 0;
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_8X8);
+ a[0] = a[1] = above_ec;
+ l[0] = l[1] = left_ec;
+ }
+}
+
+void vp9_optimize_sbuv_4x4(MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 64; b < 96; b++) {
+ const int cidx = b >= 80 ? 20 : 16;
+ a = ta + vp9_block2above_sb[TX_4X4][b];
+ l = tl + vp9_block2left_sb[TX_4X4][b];
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ a, l, TX_4X4);
+ }
+}
+
+void vp9_optimize_sb64y_32x32(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
+ ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
+ ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+ ENTROPY_CONTEXT ta[2], tl[2];
+ int n;
+
+ ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0;
+ tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0;
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ optimize_b(x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_32X32);
+ }
+}
+
+void vp9_optimize_sb64y_16x16(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
+ ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
+ ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+ ENTROPY_CONTEXT ta[4], tl[4];
+ int n;
+
+ ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
+ ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0;
+ ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0;
+ tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
+ tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0;
+ tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0;
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_16X16);
+ }
+}
+
+void vp9_optimize_sb64y_8x8(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
+ ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
+ ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+ ENTROPY_CONTEXT ta[8], tl[8];
+ int n;
+
+ ta[0] = (a[0] + a[1]) != 0;
+ ta[1] = (a[2] + a[3]) != 0;
+ ta[2] = (a1[0] + a1[1]) != 0;
+ ta[3] = (a1[2] + a1[3]) != 0;
+ ta[4] = (a2[0] + a2[1]) != 0;
+ ta[5] = (a2[2] + a2[3]) != 0;
+ ta[6] = (a3[0] + a3[1]) != 0;
+ ta[7] = (a3[2] + a3[3]) != 0;
+ tl[0] = (l[0] + l[1]) != 0;
+ tl[1] = (l[2] + l[3]) != 0;
+ tl[2] = (l1[0] + l1[1]) != 0;
+ tl[3] = (l1[2] + l1[3]) != 0;
+ tl[4] = (l2[0] + l2[1]) != 0;
+ tl[5] = (l2[2] + l2[3]) != 0;
+ tl[6] = (l3[0] + l3[1]) != 0;
+ tl[7] = (l3[2] + l3[3]) != 0;
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_8X8);
+ }
+}
+
+void vp9_optimize_sb64y_4x4(MACROBLOCK *x) {
+ ENTROPY_CONTEXT ta[16], tl[16];
+ int n;
+
+ vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT));
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+
+ optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_4X4);
+ }
+}
+
+void vp9_optimize_sb64uv_32x32(MACROBLOCK *x) {
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+ int b;
+
+ for (b = 256; b < 384; b += 64) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_32X32][b];
+ l = tl + vp9_block2left_sb64[TX_32X32][b];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &a_ec, &l_ec, TX_32X32);
+ }
+}
+
+void vp9_optimize_sb64uv_16x16(MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 256; b < 384; b += 16) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_16X16][b];
+ l = tl + vp9_block2left_sb64[TX_16X16][b];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
+ left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_16X16);
+ a[0] = a[1] = a1[0] = a1[1] = above_ec;
+ l[0] = l[1] = l1[0] = l1[1] = left_ec;
+ }
+}
+
+void vp9_optimize_sb64uv_8x8(MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l, above_ec, left_ec;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 256; b < 384; b += 4) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_8X8][b];
+ l = tl + vp9_block2left_sb64[TX_8X8][b];
+ above_ec = (a[0] + a[1]) != 0;
+ left_ec = (l[0] + l[1]) != 0;
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_8X8);
+ a[0] = a[1] = above_ec;
+ l[0] = l[1] = left_ec;
+ }
+}
+
+void vp9_optimize_sb64uv_4x4(MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 256; b < 384; b++) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_4X4][b];
+ l = tl + vp9_block2left_sb64[TX_4X4][b];
+ optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ a, l, TX_4X4);
+ }
}
void vp9_fidct_mb(MACROBLOCK *x) {
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -35,7 +35,6 @@
void vp9_transform_mb_8x8(MACROBLOCK *mb);
void vp9_transform_mby_8x8(MACROBLOCK *x);
void vp9_transform_mbuv_8x8(MACROBLOCK *x);
-void vp9_build_dcblock_8x8(MACROBLOCK *b);
void vp9_optimize_mby_8x8(MACROBLOCK *x);
void vp9_optimize_mbuv_8x8(MACROBLOCK *x);
@@ -44,8 +43,37 @@
void vp9_optimize_mby_16x16(MACROBLOCK *x);
void vp9_transform_sby_32x32(MACROBLOCK *x);
+void vp9_optimize_sby_32x32(MACROBLOCK *x);
+void vp9_transform_sby_16x16(MACROBLOCK *x);
+void vp9_optimize_sby_16x16(MACROBLOCK *x);
+void vp9_transform_sby_8x8(MACROBLOCK *x);
+void vp9_optimize_sby_8x8(MACROBLOCK *x);
+void vp9_transform_sby_4x4(MACROBLOCK *x);
+void vp9_optimize_sby_4x4(MACROBLOCK *x);
void vp9_transform_sbuv_16x16(MACROBLOCK *x);
+void vp9_optimize_sbuv_16x16(MACROBLOCK *x);
+void vp9_transform_sbuv_8x8(MACROBLOCK *x);
+void vp9_optimize_sbuv_8x8(MACROBLOCK *x);
+void vp9_transform_sbuv_4x4(MACROBLOCK *x);
+void vp9_optimize_sbuv_4x4(MACROBLOCK *x);
+void vp9_transform_sb64y_32x32(MACROBLOCK *x);
+void vp9_optimize_sb64y_32x32(MACROBLOCK *x);
+void vp9_transform_sb64y_16x16(MACROBLOCK *x);
+void vp9_optimize_sb64y_16x16(MACROBLOCK *x);
+void vp9_transform_sb64y_8x8(MACROBLOCK *x);
+void vp9_optimize_sb64y_8x8(MACROBLOCK *x);
+void vp9_transform_sb64y_4x4(MACROBLOCK *x);
+void vp9_optimize_sb64y_4x4(MACROBLOCK *x);
+void vp9_transform_sb64uv_32x32(MACROBLOCK *x);
+void vp9_optimize_sb64uv_32x32(MACROBLOCK *x);
+void vp9_transform_sb64uv_16x16(MACROBLOCK *x);
+void vp9_optimize_sb64uv_16x16(MACROBLOCK *x);
+void vp9_transform_sb64uv_8x8(MACROBLOCK *x);
+void vp9_optimize_sb64uv_8x8(MACROBLOCK *x);
+void vp9_transform_sb64uv_4x4(MACROBLOCK *x);
+void vp9_optimize_sb64uv_4x4(MACROBLOCK *x);
+
void vp9_fidct_mb(MACROBLOCK *x);
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
@@ -63,5 +91,11 @@
const uint8_t *vsrc, int src_stride,
const uint8_t *upred,
const uint8_t *vpred, int dst_stride);
+void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride,
+ const uint8_t *pred, int dst_stride);
+void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc,
+ const uint8_t *vsrc, int src_stride,
+ const uint8_t *upred,
+ const uint8_t *vpred, int dst_stride);
#endif // VP9_ENCODER_VP9_ENCODEMB_H_
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -92,7 +92,7 @@
vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
- vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
@@ -476,9 +476,9 @@
vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES];
- vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32];
- vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32];
- vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_count coef_counts_32x32[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES];
int gfu_boost;
int last_boost;
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -344,17 +344,17 @@
}
void vp9_quantize_sby_32x32(MACROBLOCK *x) {
- MACROBLOCKD *xd = &x->e_mbd;
- BLOCK *b = &x->block[0];
- BLOCKD *d = &xd->block[0];
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
quantize(b->zrun_zbin_boost,
- x->sb_coeff_data.coeff,
+ x->coeff,
1024, b->skip_block,
b->zbin,
b->round, b->quant, b->quant_shift,
- xd->sb_coeff_data.qcoeff,
- xd->sb_coeff_data.dqcoeff,
+ xd->qcoeff,
+ xd->dqcoeff,
d->dequant,
b->zbin_extra,
&xd->eobs[0],
@@ -361,22 +361,284 @@
vp9_default_zig_zag1d_32x32, 2);
}
+void vp9_quantize_sby_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 4; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 256,
+ 256, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 256,
+ xd->dqcoeff + n * 256,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n * 16],
+ vp9_default_zig_zag1d_16x16, 1);
+}
+
+void vp9_quantize_sby_8x8(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 16; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 64,
+ 64, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 64,
+ xd->dqcoeff + n * 64,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n * 4],
+ vp9_default_zig_zag1d_8x8, 1);
+}
+
+void vp9_quantize_sby_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 64; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 16,
+ 16, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 16,
+ xd->dqcoeff + n * 16,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n],
+ vp9_default_zig_zag1d_4x4, 1);
+}
+
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
int i;
- MACROBLOCKD *xd = &x->e_mbd;
+ MACROBLOCKD *const xd = &x->e_mbd;
- for (i = 16; i < 24; i += 4)
- quantize(x->block[i].zrun_zbin_boost,
- x->sb_coeff_data.coeff + 1024 + (i - 16) * 64,
- 256, x->block[i].skip_block,
- x->block[i].zbin,
- x->block[i].round, x->block[0].quant, x->block[i].quant_shift,
- xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
- xd->sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
- xd->block[i].dequant,
- x->block[i].zbin_extra,
+ for (i = 64; i < 96; i += 16) {
+ int cidx = i < 80 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 256, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
&xd->eobs[i],
vp9_default_zig_zag1d_16x16, 1);
+ }
+}
+
+void vp9_quantize_sbuv_8x8(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ for (i = 64; i < 96; i += 4) {
+ int cidx = i < 80 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 64, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
+ &xd->eobs[i],
+ vp9_default_zig_zag1d_8x8, 1);
+ }
+}
+
+void vp9_quantize_sbuv_4x4(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ for (i = 64; i < 96; i++) {
+ int cidx = i < 80 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 16, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
+ &xd->eobs[i],
+ vp9_default_zig_zag1d_4x4, 1);
+ }
+}
+
+void vp9_quantize_sb64y_32x32(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 4; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 1024,
+ 1024, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 1024,
+ xd->dqcoeff + n * 1024,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n * 64],
+ vp9_default_zig_zag1d_32x32, 2);
+}
+
+void vp9_quantize_sb64y_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 16; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 256,
+ 256, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 256,
+ xd->dqcoeff + n * 256,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n * 16],
+ vp9_default_zig_zag1d_16x16, 1);
+}
+
+void vp9_quantize_sb64y_8x8(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 64; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 64,
+ 64, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 64,
+ xd->dqcoeff + n * 64,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n * 4],
+ vp9_default_zig_zag1d_8x8, 1);
+}
+
+void vp9_quantize_sb64y_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const b = &x->block[0];
+ BLOCKD *const d = &xd->block[0];
+ int n;
+
+ for (n = 0; n < 256; n++)
+ quantize(b->zrun_zbin_boost,
+ x->coeff + n * 16,
+ 16, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + n * 16,
+ xd->dqcoeff + n * 16,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[n],
+ vp9_default_zig_zag1d_4x4, 1);
+}
+
+void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ for (i = 256; i < 384; i += 64) {
+ int cidx = i < 320 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 1024, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
+ &xd->eobs[i],
+ vp9_default_zig_zag1d_32x32, 2);
+ }
+}
+
+void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ for (i = 256; i < 384; i += 16) {
+ int cidx = i < 320 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 256, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
+ &xd->eobs[i],
+ vp9_default_zig_zag1d_16x16, 1);
+ }
+}
+
+void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ for (i = 256; i < 384; i += 4) {
+ int cidx = i < 320 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 64, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
+ &xd->eobs[i],
+ vp9_default_zig_zag1d_8x8, 1);
+ }
+}
+
+void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) {
+ int i;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ for (i = 256; i < 384; i++) {
+ int cidx = i < 320 ? 16 : 20;
+ quantize(x->block[cidx].zrun_zbin_boost,
+ x->coeff + i * 16,
+ 16, x->block[cidx].skip_block,
+ x->block[cidx].zbin, x->block[cidx].round,
+ x->block[cidx].quant, x->block[cidx].quant_shift,
+ xd->qcoeff + i * 16,
+ xd->dqcoeff + i * 16,
+ xd->block[cidx].dequant,
+ x->block[cidx].zbin_extra,
+ &xd->eobs[i],
+ vp9_default_zig_zag1d_4x4, 1);
+ }
}
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -74,7 +74,21 @@
extern prototype_quantize_mb(vp9_quantize_mby_16x16);
void vp9_quantize_sby_32x32(MACROBLOCK *x);
+void vp9_quantize_sby_16x16(MACROBLOCK *x);
+void vp9_quantize_sby_8x8(MACROBLOCK *x);
+void vp9_quantize_sby_4x4(MACROBLOCK *x);
void vp9_quantize_sbuv_16x16(MACROBLOCK *x);
+void vp9_quantize_sbuv_8x8(MACROBLOCK *x);
+void vp9_quantize_sbuv_4x4(MACROBLOCK *x);
+
+void vp9_quantize_sb64y_32x32(MACROBLOCK *x);
+void vp9_quantize_sb64y_16x16(MACROBLOCK *x);
+void vp9_quantize_sb64y_8x8(MACROBLOCK *x);
+void vp9_quantize_sb64y_4x4(MACROBLOCK *x);
+void vp9_quantize_sb64uv_32x32(MACROBLOCK *x);
+void vp9_quantize_sb64uv_16x16(MACROBLOCK *x);
+void vp9_quantize_sb64uv_8x8(MACROBLOCK *x);
+void vp9_quantize_sb64uv_4x4(MACROBLOCK *x);
struct VP9_COMP;
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -275,7 +275,7 @@
fill_token_costs(cpi->mb.token_costs[TX_16X16],
cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
- cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
+ cpi->common.fc.coef_probs_32x32, BLOCK_TYPES);
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
@@ -382,25 +382,27 @@
}
static INLINE int cost_coeffs(MACROBLOCK *mb,
- BLOCKD *b, PLANE_TYPE type,
+ int ib, PLANE_TYPE type,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
TX_SIZE tx_size) {
- int pt;
MACROBLOCKD *const xd = &mb->e_mbd;
- const int ib = (int)(b - xd->block);
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
+ int pt;
const int eob = xd->eobs[ib];
int c = 0;
int cost = 0, seg_eob;
- const int segment_id = xd->mode_info_context->mbmi.segment_id;
+ const int segment_id = mbmi->segment_id;
const int *scan;
- int16_t *qcoeff_ptr = b->qcoeff;
- const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, b) : DCT_DCT;
+ const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
+ const int ref = mbmi->ref_frame != INTRA_FRAME;
+ const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 &&
+ type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type(xd, &xd->block[ib]) : DCT_DCT;
unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref];
- ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
+ ENTROPY_CONTEXT a_ec, l_ec;
ENTROPY_CONTEXT *const a1 = a +
sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
ENTROPY_CONTEXT *const l1 = l +
@@ -408,6 +410,8 @@
switch (tx_size) {
case TX_4X4:
+ a_ec = *a;
+ l_ec = *l;
scan = vp9_default_zig_zag1d_4x4;
seg_eob = 16;
if (type == PLANE_TYPE_Y_WITH_DC) {
@@ -428,8 +432,6 @@
scan = vp9_default_zig_zag1d_16x16;
seg_eob = 256;
if (type == PLANE_TYPE_UV) {
- const int uv_idx = ib - 16;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
} else {
@@ -440,11 +442,22 @@
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
seg_eob = 1024;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff;
- a_ec = (a[0] + a[1] + a[2] + a[3] +
- a1[0] + a1[1] + a1[2] + a1[3]) != 0;
- l_ec = (l[0] + l[1] + l[2] + l[3] +
- l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ if (type == PLANE_TYPE_UV) {
+ ENTROPY_CONTEXT *a2, *a3, *l2, *l3;
+ a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] +
+ a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] +
+ l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ }
break;
default:
abort();
@@ -510,7 +523,7 @@
}
for (b = 0; b < 16; b++)
- cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
+ cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
@@ -553,7 +566,7 @@
}
for (b = 0; b < 16; b += 4)
- cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
+ cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b],
TX_8X8);
@@ -593,7 +606,7 @@
tl = (ENTROPY_CONTEXT *)xd->left_context;
}
- cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
+ cost = cost_coeffs(mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
return cost;
}
@@ -743,7 +756,7 @@
tl = (ENTROPY_CONTEXT *) xd->left_context;
}
- return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
+ return cost_coeffs(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
@@ -763,9 +776,7 @@
static void super_block_yrd_32x32(MACROBLOCK *x,
int *rate, int *distortion, int *skippable,
int backup) {
- SUPERBLOCK * const x_sb = &x->sb_coeff_data;
- MACROBLOCKD * const xd = &x->e_mbd;
- SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
+ MACROBLOCKD *const xd = &x->e_mbd;
#if DEBUG_ERROR
int16_t out[1024];
#endif
@@ -773,17 +784,17 @@
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
#if DEBUG_ERROR
- vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
+ vp9_short_idct32x32(xd->dqcoeff, out, 64);
#endif
- *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024);
#if DEBUG_ERROR
printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
- vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
+ vp9_block_error_c(x->src_diff, out, 1024), *distortion);
#endif
*rate = rdcost_sby_32x32(x, backup);
- *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd);
+ *skippable = vp9_sby_is_skippable_32x32(xd);
}
static void super_block_yrd(VP9_COMP *cpi,
@@ -807,7 +818,7 @@
s[n] = 1;
}
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
+ vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride,
dst, dst_y_stride);
super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
@@ -896,7 +907,7 @@
xd->above_context = &t_above[TX_32X32][x_idx << 1];
xd->left_context = &t_left[TX_32X32][y_idx << 1];
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
+ vp9_subtract_sby_s_c(x->src_diff,
src + 32 * x_idx + 32 * y_idx * src_y_stride,
src_y_stride,
dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
@@ -1051,7 +1062,8 @@
tempa = ta;
templ = tl;
- ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
+ ratey = cost_coeffs(x, b - xd->block,
+ PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
rate += ratey;
distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
@@ -1355,7 +1367,7 @@
ta1 = ta0 + 1;
tl1 = tl0 + 1;
- rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
+ rate_t = cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC,
ta0, tl0, TX_8X8);
rate += rate_t;
@@ -1388,12 +1400,12 @@
x->quantize_b_4x4(x, ib + iblock[i]);
}
distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
- rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
+ rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
TX_4X4);
if (do_two) {
i++;
- rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC,
+ rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
TX_4X4);
}
@@ -1500,7 +1512,7 @@
}
for (b = 16; b < 24; b++)
- cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
+ cost += cost_coeffs(mb, b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
@@ -1541,7 +1553,7 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
+ cost += cost_coeffs(mb, b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_8X8);
@@ -1580,7 +1592,7 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV,
+ cost += cost_coeffs(x, b * 4, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_16X16);
@@ -1596,8 +1608,8 @@
vp9_quantize_sbuv_16x16(x);
*rate = rd_cost_sbuv_16x16(x, backup);
- *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024,
- xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2;
+ *distortion = vp9_block_error_c(x->coeff + 1024,
+ xd->dqcoeff + 1024, 512) >> 2;
*skip = vp9_sbuv_is_skippable_16x16(xd);
}
@@ -1609,8 +1621,8 @@
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
- if (mbmi->txfm_size == TX_32X32) {
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
+ if (mbmi->txfm_size >= TX_16X16) {
+ vp9_subtract_sbuv_s_c(x->src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1);
@@ -1789,8 +1801,8 @@
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
- if (mbmi->txfm_size == TX_32X32) {
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
+ if (mbmi->txfm_size >= TX_16X16) {
+ vp9_subtract_sbuv_s_c(x->src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1);
@@ -1842,6 +1854,46 @@
}
}
+static int rd_cost_sb64uv_32x32(MACROBLOCK *x, int backup) {
+ int b;
+ int cost = 0;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta, *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+
+ ta = (ENTROPY_CONTEXT *) &t_above;
+ tl = (ENTROPY_CONTEXT *) &t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)xd->above_context;
+ tl = (ENTROPY_CONTEXT *)xd->left_context;
+ }
+
+ for (b = 16; b < 24; b += 4)
+ cost += cost_coeffs(x, b * 16, PLANE_TYPE_UV,
+ ta + vp9_block2above[TX_8X8][b],
+ tl + vp9_block2left[TX_8X8][b], TX_32X32);
+
+ return cost;
+}
+
+static void rd_inter64x64_uv_32x32(MACROBLOCK *x, int *rate,
+ int *distortion, int *skip,
+ int backup) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ vp9_transform_sb64uv_32x32(x);
+ vp9_quantize_sb64uv_32x32(x);
+
+ *rate = rd_cost_sb64uv_32x32(x, backup);
+ *distortion = vp9_block_error_c(x->coeff + 4096,
+ xd->dqcoeff + 4096, 2048);
+ *skip = vp9_sb64uv_is_skippable_32x32(xd);
+}
+
static void super_block_64_uvrd(MACROBLOCK *x,
int *rate,
int *distortion,
@@ -1856,10 +1908,15 @@
ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context;
int d = 0, r = 0, n, s = 1;
+ // FIXME not needed if tx=32x32
memcpy(t_above, xd->above_context, sizeof(t_above));
memcpy(t_left, xd->left_context, sizeof(t_left));
if (mbmi->txfm_size == TX_32X32) {
+ vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
+ udst, vdst, dst_uv_stride);
+ rd_inter64x64_uv_32x32(x, &r, &d, &s, 1);
+ } else if (mbmi->txfm_size == TX_16X16) {
int n;
*rate = 0;
@@ -1867,7 +1924,7 @@
int x_idx = n & 1, y_idx = n >> 1;
int r_tmp, d_tmp, s_tmp;
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
+ vp9_subtract_sbuv_s_c(x->src_diff,
usrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
src_uv_stride,
@@ -2170,7 +2227,7 @@
x->quantize_b_4x4(x, i);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(x, i, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][i],
tl + vp9_block2left[TX_4X4][i], TX_4X4);
}
@@ -2233,10 +2290,10 @@
x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
- othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
- tacp + vp9_block2above[TX_8X8][idx],
- tlcp + vp9_block2left[TX_8X8][idx],
- TX_8X8);
+ othercost += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[TX_8X8][idx],
+ tlcp + vp9_block2left[TX_8X8][idx],
+ TX_8X8);
}
for (j = 0; j < 4; j += 2) {
bd = &xd->block[ib + iblock[j]];
@@ -2245,11 +2302,12 @@
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][ib + iblock[j]],
tl + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
- *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(x, ib + iblock[j] + 1,
+ PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
tl + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
@@ -2263,11 +2321,12 @@
x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
- othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ othercost += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
- othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
+ othercost += cost_coeffs(x, ib + iblock[j] + 1,
+ PLANE_TYPE_Y_WITH_DC,
tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
TX_4X4);
@@ -2277,7 +2336,7 @@
x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][idx],
tl + vp9_block2left[TX_8X8][idx], TX_8X8);
}
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -28,12 +28,12 @@
vp9_coeff_accum context_counters_4x4[BLOCK_TYPES];
vp9_coeff_accum context_counters_8x8[BLOCK_TYPES];
vp9_coeff_accum context_counters_16x16[BLOCK_TYPES];
-vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32];
+vp9_coeff_accum context_counters_32x32[BLOCK_TYPES];
extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES];
extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES];
extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES];
-extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
+extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES];
#endif /* ENTROPY_STATS */
static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
@@ -101,37 +101,52 @@
PLANE_TYPE type,
TX_SIZE tx_size,
int dry_run) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt; /* near block/prev token context index */
int c = 0;
int recent_energy = 0;
- const BLOCKD * const b = xd->block + ib;
const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
- int16_t *qcoeff_ptr = b->qcoeff;
+ int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib;
int seg_eob;
- const int segment_id = xd->mode_info_context->mbmi.segment_id;
+ const int segment_id = mbmi->segment_id;
+ const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
const int *scan;
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, b) : DCT_DCT;
- const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+ const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 &&
+ type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type(xd, &xd->block[ib]) : DCT_DCT;
+ const int ref = mbmi->ref_frame != INTRA_FRAME;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
- ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context +
- vp9_block2left[tx_size][ib];
- ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
+ if (sb_type == BLOCK_SIZE_SB64X64) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb64[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ } else if (sb_type == BLOCK_SIZE_SB32X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a3 = l2 = l3 = NULL;
+ } else {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib];
+ a1 = l1 = a2 = l2 = a3 = l3 = NULL;
+ }
- ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) +
- vp9_block2left[tx_size][ib];
-
-
switch (tx_size) {
default:
case TX_4X4:
+ a_ec = *a;
+ l_ec = *l;
seg_eob = 16;
scan = vp9_default_zig_zag1d_4x4;
if (tx_type != DCT_DCT) {
@@ -164,23 +179,23 @@
scan = vp9_default_zig_zag1d_16x16;
counts = cpi->coef_counts_16x16;
probs = cpi->common.fc.coef_probs_16x16;
- if (type == PLANE_TYPE_UV) {
- int uv_idx = (ib - 16) >> 2;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx;
- }
break;
case TX_32X32:
- a_ec = a[0] + a[1] + a[2] + a[3] +
- a1[0] + a1[1] + a1[2] + a1[3];
- l_ec = l[0] + l[1] + l[2] + l[3] +
- l1[0] + l1[1] + l1[2] + l1[3];
- a_ec = a_ec != 0;
- l_ec = l_ec != 0;
+ if (type != PLANE_TYPE_UV) {
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] +
+ a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] +
+ l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ }
seg_eob = 1024;
scan = vp9_default_zig_zag1d_32x32;
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff;
break;
}
@@ -233,10 +248,17 @@
l1[0] = l1[1] = l[1] = l_ec;
}
} else if (tx_size == TX_32X32) {
- a[1] = a[2] = a[3] = a_ec;
- l[1] = l[2] = l[3] = l_ec;
- a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
- l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ if (type != PLANE_TYPE_UV) {
+ a[1] = a[2] = a[3] = a_ec;
+ l[1] = l[2] = l[3] = l_ec;
+ a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
+ l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ } else {
+ a[1] = a1[0] = a1[1] = a_ec;
+ l[1] = l1[0] = l1[1] = l_ec;
+ a2[0] = a2[1] = a3[0] = a3[1] = a_ec;
+ l2[0] = l2[1] = l3[0] = l3[1] = l_ec;
+ }
}
}
@@ -289,9 +311,7 @@
}
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) {
- int skip = 1;
- skip &= !xd->eobs[0];
- return skip;
+ return (!xd->eobs[0]);
}
static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
@@ -299,13 +319,11 @@
}
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) {
- int skip = 1;
- skip &= !xd->eobs[0];
- return skip;
+ return (!xd->eobs[0]);
}
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) {
- return (!xd->eobs[16]) & (!xd->eobs[20]);
+ return (!xd->eobs[64]) & (!xd->eobs[80]);
}
static int sb_is_skippable_32x32(MACROBLOCKD *xd) {
@@ -313,6 +331,68 @@
vp9_sbuv_is_skippable_16x16(xd);
}
+static int sby_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 64; i += 16)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb_is_skippable_16x16(MACROBLOCKD *xd) {
+ return sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd);
+}
+
+static int sby_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 64; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sbuv_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 64; i < 96; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb_is_skippable_8x8(MACROBLOCKD *xd) {
+ return sby_is_skippable_8x8(xd) & sbuv_is_skippable_8x8(xd);
+}
+
+static int sby_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 64; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sbuv_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 64; i < 96; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb_is_skippable_4x4(MACROBLOCKD *xd) {
+ return sby_is_skippable_4x4(xd) & sbuv_is_skippable_4x4(xd);
+}
+
void vp9_tokenize_sb(VP9_COMP *cpi,
MACROBLOCKD *xd,
TOKENEXTRA **t,
@@ -325,7 +405,21 @@
const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
int b;
- mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd);
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd);
+ break;
+ case TX_16X16:
+ mbmi->mb_skip_coeff = sb_is_skippable_16x16(xd);
+ break;
+ case TX_8X8:
+ mbmi->mb_skip_coeff = sb_is_skippable_8x8(xd);
+ break;
+ case TX_4X4:
+ mbmi->mb_skip_coeff = sb_is_skippable_4x4(xd);
+ break;
+ default: assert(0);
+ }
if (mbmi->mb_skip_coeff) {
if (!dry_run)
@@ -333,7 +427,7 @@
if (!cm->mb_no_coeff_skip) {
vp9_stuff_sb(cpi, xd, t, dry_run);
} else {
- vp9_fix_contexts_sb(xd);
+ vp9_reset_sb_tokens_context(xd);
}
if (dry_run)
*t = t_backup;
@@ -343,13 +437,215 @@
if (!dry_run)
cpi->skip_false_count[mb_skip_context] += skip_inc;
- tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC,
- TX_32X32, dry_run);
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC,
+ TX_32X32, dry_run);
+ for (b = 64; b < 96; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_16X16, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 64; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_16X16, dry_run);
+ for (b = 64; b < 96; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 64; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_8X8, dry_run);
+ for (b = 64; b < 96; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 64; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_4X4, dry_run);
+ for (b = 64; b < 96; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_4X4, dry_run);
+ break;
+ default: assert(0);
+ }
- for (b = 16; b < 24; b += 4) {
- tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
- TX_16X16, dry_run);
+ if (dry_run)
+ *t = t_backup;
+}
+
+static int sb64y_is_skippable_32x32(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i += 64)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) {
+ return (!xd->eobs[256]) & (!xd->eobs[320]);
+}
+
+static int sb64_is_skippable_32x32(MACROBLOCKD *xd) {
+ return sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd);
+}
+
+static int sb64y_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i += 16)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64uv_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 256; i < 384; i += 16)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64_is_skippable_16x16(MACROBLOCKD *xd) {
+ return sb64y_is_skippable_16x16(xd) & sb64uv_is_skippable_16x16(xd);
+}
+
+static int sb64y_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64uv_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 256; i < 384; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64_is_skippable_8x8(MACROBLOCKD *xd) {
+ return sb64y_is_skippable_8x8(xd) & sb64uv_is_skippable_8x8(xd);
+}
+
+static int sb64y_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64uv_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 256; i < 384; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64_is_skippable_4x4(MACROBLOCKD *xd) {
+ return sb64y_is_skippable_4x4(xd) & sb64uv_is_skippable_4x4(xd);
+}
+
+void vp9_tokenize_sb64(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **t,
+ int dry_run) {
+ VP9_COMMON * const cm = &cpi->common;
+ MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP);
+ const int segment_id = mbmi->segment_id;
+ const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
+ int b;
+
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ mbmi->mb_skip_coeff = sb64_is_skippable_32x32(xd);
+ break;
+ case TX_16X16:
+ mbmi->mb_skip_coeff = sb64_is_skippable_16x16(xd);
+ break;
+ case TX_8X8:
+ mbmi->mb_skip_coeff = sb64_is_skippable_8x8(xd);
+ break;
+ case TX_4X4:
+ mbmi->mb_skip_coeff = sb64_is_skippable_4x4(xd);
+ break;
+ default: assert(0);
}
+
+ if (mbmi->mb_skip_coeff) {
+ if (!dry_run)
+ cpi->skip_true_count[mb_skip_context] += skip_inc;
+ if (!cm->mb_no_coeff_skip) {
+ vp9_stuff_sb64(cpi, xd, t, dry_run);
+ } else {
+ vp9_reset_sb64_tokens_context(xd);
+ }
+ if (dry_run)
+ *t = t_backup;
+ return;
+ }
+
+ if (!dry_run)
+ cpi->skip_false_count[mb_skip_context] += skip_inc;
+
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ for (b = 0; b < 256; b += 64)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_32X32, dry_run);
+ for (b = 256; b < 384; b += 64)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_32X32, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 256; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_16X16, dry_run);
+ for (b = 256; b < 384; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 256; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_8X8, dry_run);
+ for (b = 256; b < 384; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 256; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_4X4, dry_run);
+ for (b = 256; b < 384; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_4X4, dry_run);
+ break;
+ default: assert(0);
+ }
+
if (dry_run)
*t = t_backup;
}
@@ -567,23 +863,23 @@
/* print counts */
print_counter(f, context_counters_4x4, BLOCK_TYPES,
- "vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]");
+ "vp9_default_coef_counts_4x4[BLOCK_TYPES]");
print_counter(f, context_counters_8x8, BLOCK_TYPES,
- "vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]");
+ "vp9_default_coef_counts_8x8[BLOCK_TYPES]");
print_counter(f, context_counters_16x16, BLOCK_TYPES,
- "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]");
- print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32,
- "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]");
+ "vp9_default_coef_counts_16x16[BLOCK_TYPES]");
+ print_counter(f, context_counters_32x32, BLOCK_TYPES,
+ "vp9_default_coef_counts_32x32[BLOCK_TYPES]");
/* print coefficient probabilities */
print_probs(f, context_counters_4x4, BLOCK_TYPES,
- "default_coef_probs_4x4[BLOCK_TYPES_4X4]");
+ "default_coef_probs_4x4[BLOCK_TYPES]");
print_probs(f, context_counters_8x8, BLOCK_TYPES,
- "default_coef_probs_8x8[BLOCK_TYPES_8X8]");
+ "default_coef_probs_8x8[BLOCK_TYPES]");
print_probs(f, context_counters_16x16, BLOCK_TYPES,
- "default_coef_probs_16x16[BLOCK_TYPES_16X16]");
- print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32,
- "default_coef_probs_32x32[BLOCK_TYPES_32X32]");
+ "default_coef_probs_16x16[BLOCK_TYPES]");
+ print_probs(f, context_counters_32x32, BLOCK_TYPES,
+ "default_coef_probs_32x32[BLOCK_TYPES]");
fclose(f);
@@ -600,31 +896,49 @@
fill_value_tokens();
}
-static INLINE void stuff_b(VP9_COMP *cpi,
- MACROBLOCKD *xd,
- const int ib,
- TOKENEXTRA **tp,
- PLANE_TYPE type,
- TX_SIZE tx_size,
- int dry_run) {
+static void stuff_b(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ const int ib,
+ TOKENEXTRA **tp,
+ PLANE_TYPE type,
+ TX_SIZE tx_size,
+ int dry_run) {
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
int pt, band;
TOKENEXTRA *t = *tp;
- const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
- ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context +
- vp9_block2left[tx_size][ib];
- ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
- ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) +
- vp9_block2left[tx_size][ib];
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ const int ref = mbmi->ref_frame != INTRA_FRAME;
+ const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+ if (sb_type == BLOCK_SIZE_SB32X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb64[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ } else if (sb_type == BLOCK_SIZE_SB32X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = l2 = a3 = l3 = NULL;
+ } else {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib];
+ a1 = l1 = a2 = l2 = a3 = l3 = NULL;
+ }
+
switch (tx_size) {
default:
case TX_4X4:
+ a_ec = a[0];
+ l_ec = l[0];
counts = cpi->coef_counts_4x4;
probs = cpi->common.fc.coef_probs_4x4;
break;
@@ -646,12 +960,17 @@
probs = cpi->common.fc.coef_probs_16x16;
break;
case TX_32X32:
- a_ec = a[0] + a[1] + a[2] + a[3] +
- a1[0] + a1[1] + a1[2] + a1[3];
- l_ec = l[0] + l[1] + l[2] + l[3] +
- l1[0] + l1[1] + l1[2] + l1[3];
- a_ec = a_ec != 0;
- l_ec = l_ec != 0;
+ if (type != PLANE_TYPE_UV) {
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] +
+ a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] +
+ l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ }
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
break;
@@ -678,10 +997,17 @@
l1[0] = l1[1] = l[1] = l_ec;
}
} else if (tx_size == TX_32X32) {
- a[1] = a[2] = a[3] = a_ec;
- l[1] = l[2] = l[3] = l_ec;
- a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
- l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ if (type != PLANE_TYPE_Y_WITH_DC) {
+ a[1] = a[2] = a[3] = a_ec;
+ l[1] = l[2] = l[3] = l_ec;
+ a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
+ l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ } else {
+ a[1] = a1[0] = a1[1] = a_ec;
+ l[1] = l1[0] = l1[1] = l_ec;
+ a2[0] = a2[1] = a3[0] = a3[1] = a_ec;
+ l2[0] = l2[1] = l3[0] = l3[1] = l_ec;
+ }
}
if (!dry_run) {
@@ -751,27 +1077,76 @@
}
}
-static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run) {
+void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
+ TOKENEXTRA * const t_backup = *t;
int b;
- stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run);
- for (b = 16; b < 24; b += 4) {
- stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run);
+ for (b = 64; b < 96; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 64; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
+ for (b = 64; b < 96; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 64; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 64; b < 96; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 64; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 64; b < 96; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
+ break;
+ default: assert(0);
}
+
+ if (dry_run) {
+ *t = t_backup;
+ }
}
-void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
+void vp9_stuff_sb64(VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run) {
TOKENEXTRA * const t_backup = *t;
+ int b;
- stuff_sb_32x32(cpi, xd, t, dry_run);
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ for (b = 0; b < 256; b += 64)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run);
+ for (b = 256; b < 384; b += 64)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_32X32, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 256; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
+ for (b = 256; b < 384; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 256; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 256; b < 384; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 256; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 256; b < 384; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
+ break;
+ default: assert(0);
+ }
if (dry_run) {
*t = t_backup;
}
-}
-
-void vp9_fix_contexts_sb(MACROBLOCKD *xd) {
- vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
- vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
}
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -38,6 +38,7 @@
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd);
struct VP9_COMP;
@@ -45,14 +46,16 @@
TOKENEXTRA **t, int dry_run);
void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
+void vp9_tokenize_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run);
+void vp9_stuff_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
-void vp9_fix_contexts_sb(MACROBLOCKD *xd);
-
#ifdef ENTROPY_STATS
void init_context_counters();
void print_context_counters();
@@ -60,7 +63,7 @@
extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES];
extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES];
extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES];
-extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32];
+extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES];
#endif
extern const int *vp9_dct_value_cost_ptr;
--
⑨