shithub: libvpx

Download patch

ref: ca6b85aa4eae6047315ac01eef44b0ebaef58da3
parent: 62371d382a4fb2570c60e0a0948bd32e91790f2a
author: Yaowu Xu <yaowu@google.com>
date: Thu Aug 4 12:30:27 EDT 2011

add 8x8 intra prediction modes

Patch 1 to Patch 3 is an initial implementation of 8x8 intra prediction
modes, here are with the following assumptions:
a. 8x8 has 4 prediction modes DC, H, V and TM
b. UV 4x4 block use the same mode as corresponding 8x8 area
c. i8x8 modes are enabled for key frame only for now
Patch 4:
d. removed debug code from previous patches
Patch 5:
e. added stats code to collect entropy stats and further cleaned up
Patch 6:
f. changed mode stats code to collect finer stats of modes
Patch 7:
g. normalized i8x8 modes distribution to total at 256 (8bits).
Patch 8:
h. fixed a bug in decoder and removed debug printf output.
Patch 9:
i. more cleanups to address paul's comment
Patch 10:
j. messy rebase/merges to bring the commit up to date.

Tests on HD clips encoded with all key frame showing consistent gain
on all clips and all metrics:~0.5%(psnr) and 0.6%(ssim):
http://www.corp.google.com/~yaowu/no_crawl/i8x8hd_allkey_fixedq.html

To build and test, configure with:
--enable-experimental --enable-i8x8

Change-Id: I9813fe07ae48cab5fdb5d904bca022514ad01e7f

--- a/configure
+++ b/configure
@@ -221,6 +221,7 @@
     segmentation
     t8x8
     csm
+    i8x8
 "
 CONFIG_LIST="
     external_build
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -83,6 +83,9 @@
     V_PRED,             /* vertical prediction */
     H_PRED,             /* horizontal prediction */
     TM_PRED,            /* Truemotion prediction */
+#if CONFIG_I8X8
+    I8X8_PRED,           /* 8x8 based prediction, each 8x8 has its own prediction mode */
+#endif
     B_PRED,             /* block based prediction, each block has its own prediction mode */
 
     NEARESTMV,
@@ -114,6 +117,7 @@
 
 #define VP8_YMODES  (B_PRED + 1)
 #define VP8_UV_MODES (TM_PRED + 1)
+#define VP8_I8X8_MODES (TM_PRED + 1)
 
 #define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
 
@@ -306,4 +310,22 @@
 extern void vp8_build_block_doffsets(MACROBLOCKD *x);
 extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
 
+static void update_blockd_bmi(MACROBLOCKD *xd)
+{
+    int i;
+    int is_4x4;
+    is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
+#if CONFIG_I8X8
+        (xd->mode_info_context->mbmi.mode==I8X8_PRED)||
+#endif
+        (xd->mode_info_context->mbmi.mode == B_PRED);
+
+    if (is_4x4)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            xd->block[i].bmi = xd->mode_info_context->bmi[i];
+        }
+    }
+}
 #endif  /* __INC_BLOCKD_H */
--- /dev/null
+++ b/vp8/common/defaultcoefcounts.h
@@ -1,0 +1,189 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* Generated file, included by entropy.c */
+
+#if CONFIG_T8X8
+static const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES]
+                                              [COEF_BANDS]
+                                              [PREV_COEF_CONTEXTS]
+                                              [MAX_ENTROPY_TOKENS] =
+{
+
+    { /* block Type 0 */
+      { /* Coeff Band 0 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 1 */
+        { 21041, 13314, 3420, 592, 117, 0, 0, 0, 0, 0, 0, 11783},
+        { 48236, 6918, 586, 153, 0, 0, 0, 0, 0, 0, 0, 23137},
+        { 676112, 106685, 24701, 6003, 1426, 429, 165, 0, 0, 0, 0, 28910}
+      },
+      { /* Coeff Band 2 */
+        { 660107, 75227, 8451, 1345, 259, 0, 0, 0, 0, 0, 0, 0},
+        { 79164, 36835, 6865, 1185, 246, 47, 0, 0, 0, 0, 0, 2575},
+        { 19469, 14330, 3070, 579, 94, 6, 0, 0, 0, 0, 0, 44}
+      },
+      { /* Coeff Band 3 */
+        { 1978004, 235343, 28485, 3242, 271, 0, 0, 0, 0, 0, 0, 0},
+        { 228684, 106736, 21431, 2842, 272, 46, 0, 0, 0, 0, 0, 9266},
+        { 32470, 27496, 6852, 1386, 45, 93, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 4 */
+        { 1911212, 224613, 49653, 13748, 2541, 568, 48, 0, 0, 0, 0, 0},
+        { 196670, 103472, 44473, 11490, 2432, 977, 72, 0, 0, 0, 0, 9447},
+        { 37876, 40417, 19142, 6069, 1799, 727, 51, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 3813399, 437714, 64387, 11312, 695, 219, 0, 0, 0, 0, 0, 0},
+        { 438288, 215917, 61905, 10194, 674, 107, 0, 0, 0, 0, 0, 17808},
+        { 99139, 93643, 30054, 5758, 802, 171, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 6 */
+        { 12259383, 1625505, 234927, 46306, 8417, 1456, 151, 0, 0, 0, 0, 0},
+        { 1518161, 734287, 204240, 44228, 9462, 2240, 65, 0, 0, 0, 0, 107630},
+        { 292470, 258894, 94925, 25864, 6662, 2055, 170, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 7 */
+        { 9791308, 2118949, 169439, 16735, 1122, 0, 0, 0, 0, 0, 0, 0},
+        { 1500281, 752410, 123259, 13065, 1168, 47, 0, 0, 0, 0, 0, 707182},
+        { 193067, 142638, 31018, 4719, 516, 138, 0, 0, 0, 0, 0, 12439}
+      }
+    },
+    { /* block Type 1 */
+      { /* Coeff Band 0 */
+        { 16925, 10553, 852, 16, 63, 87, 47, 0, 0, 0, 0, 31232},
+        { 39777, 26839, 6822, 1908, 678, 456, 227, 168, 35, 0, 0, 46825},
+        { 17300, 16666, 4168, 1209, 492, 154, 118, 207, 0, 0, 0, 19608}
+      },
+      { /* Coeff Band 1 */
+        { 35882, 31722, 4625, 1270, 266, 237, 0, 0, 0, 0, 0, 0},
+        { 15426, 13894, 4482, 1305, 281, 43, 0, 0, 0, 0, 0, 18627},
+        { 3900, 6552, 3472, 1723, 746, 366, 115, 35, 0, 0, 0, 798}
+      },
+      { /* Coeff Band 2 */
+        { 21998, 29132, 3353, 679, 46, 0, 0, 0, 0, 0, 0, 0},
+        { 9098, 15767, 3794, 792, 268, 47, 0, 0, 0, 0, 0, 22402},
+        { 4007, 8472, 2844, 687, 217, 0, 0, 0, 0, 0, 0, 2739}
+      },
+      { /* Coeff Band 3 */
+        { 0, 31414, 2911, 682, 96, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 16515, 4425, 938, 124, 0, 0, 0, 0, 0, 0, 31369},
+        { 0, 4833, 2787, 1213, 150, 0, 0, 0, 0, 0, 0, 3744}
+      },
+      { /* Coeff Band 4 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 6 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52762},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13326}
+      },
+      { /* Coeff Band 7 */
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+      }
+    },
+    { /* block Type 2 */
+      { /* Coeff Band 0 */
+        { 4444, 1614, 120, 48, 0, 48, 0, 0, 0, 0, 0, 278},
+        { 192436, 103730, 24494, 9845, 4122, 1193, 102, 0, 0, 0, 0, 2577},
+        { 3473446, 2308716, 815510, 370374, 167797, 92152, 12073, 86, 0, 0, 0, 6801}
+      },
+      { /* Coeff Band 1 */
+        { 2150616, 1136388, 250011, 86888, 31434, 13746, 1243, 0, 0, 0, 0, 0},
+        { 1179945, 799802, 266012, 106787, 40809, 16486, 1546, 0, 0, 0, 0, 2673},
+        { 465128, 504130, 286989, 146259, 62380, 30192, 2866, 20, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 2 */
+        { 2157762, 1177519, 282665, 108499, 43389, 23224, 2597, 34, 0, 0, 0, 0},
+        { 1135685, 813705, 278079, 123255, 53935, 29492, 3152, 39, 0, 0, 0, 2978},
+        { 391894, 428037, 264216, 144306, 69326, 40281, 5541, 29, 0, 0, 0, 38}
+      },
+      { /* Coeff Band 3 */
+        { 6669109, 3468471, 782161, 288484, 115500, 51083, 4943, 41, 0, 0, 0, 0},
+        { 3454493, 2361636, 809524, 337663, 141343, 65036, 6361, 0, 0, 0, 0, 8730},
+        { 1231825, 1359522, 824686, 420784, 185517, 98731, 10973, 72, 0, 0, 0, 20}
+      },
+      { /* Coeff Band 4 */
+        { 7606203, 3452846, 659856, 191703, 49335, 14336, 450, 0, 0, 0, 0, 0},
+        { 3806506, 2379332, 691697, 224938, 61966, 18324, 766, 0, 0, 0, 0, 8193},
+        { 1270110, 1283728, 628775, 243378, 72617, 24897, 1087, 0, 0, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 15314169, 7436809, 1579928, 515790, 167453, 58305, 3502, 19, 0, 0, 0, 0},
+        { 7021286, 4667922, 1545706, 574463, 191793, 68748, 4048, 1, 0, 0, 0, 17222},
+        { 2011989, 2145878, 1185336, 534879, 195719, 79103, 5343, 4, 0, 0, 0, 37}
+      },
+      { /* Coeff Band 6 */
+        { 63458382, 25384462, 4208045, 1091050, 299011, 95242, 5238, 33, 0, 0, 0, 0},
+        { 25638401, 14694085, 3945978, 1195420, 344813, 117355, 6703, 0, 0, 0, 0, 216811},
+        { 5988177, 5824044, 2754413, 1077350, 370739, 139710, 9693, 38, 0, 0, 0, 1835}
+      },
+      { /* Coeff Band 7 */
+        { 74998348, 29342158, 2955001, 452912, 69631, 9516, 37, 0, 0, 0, 0, 0},
+        { 24762356, 13281085, 2409883, 436787, 68948, 10658, 36, 0, 0, 0, 0, 6614989},
+        { 3882867, 3224489, 1052289, 252890, 46967, 8548, 154, 0, 0, 0, 0, 194354}
+      }
+    },
+    { /* block Type 3 */
+      { /* Coeff Band 0 */
+        { 10583, 12059, 3155, 1041, 248, 175, 24, 2, 0, 0, 0, 5717},
+        { 42461, 41782, 13553, 4966, 1352, 855, 89, 0, 0, 0, 0, 15000},
+        { 4691125, 5045589, 2673566, 1089317, 378161, 160268, 18252, 813, 69, 13, 0, 49}
+      },
+      { /* Coeff Band 1 */
+        { 1535203, 1685686, 924565, 390329, 141709, 60523, 5983, 171, 0, 0, 0, 0},
+        { 1594021, 1793276, 1016078, 441332, 164159, 70843, 8098, 311, 0, 0, 0, 11312},
+        { 1225223, 1430184, 888492, 460713, 203286, 115149, 22061, 804, 7, 0, 0, 0}
+      },
+      { /* Coeff Band 2 */
+        { 1522386, 1590366, 799910, 303691, 96625, 37608, 3637, 180, 33, 11, 0, 0},
+        { 1682184, 1793869, 913649, 353520, 113674, 46309, 4736, 221, 18, 3, 0, 963},
+        { 1574580, 1740474, 954392, 417994, 151400, 67091, 8000, 536, 73, 10, 0, 63}
+      },
+      { /* Coeff Band 3 */
+        { 4963672, 5197790, 2585383, 982161, 313333, 118498, 16014, 536, 62, 0, 0, 0},
+        { 5223913, 5569803, 2845858, 1107384, 364949, 147841, 18296, 658, 11, 11, 0, 1866},
+        { 4042207, 4548894, 2608767, 1154993, 446290, 221295, 41054, 2438, 124, 20, 0, 0}
+      },
+      { /* Coeff Band 4 */
+        { 3857216, 4431325, 2670447, 1330169, 553301, 286825, 46763, 1917, 0, 0, 0, 0},
+        { 4226215, 4963701, 3046198, 1523923, 644670, 355519, 58792, 2525, 0, 0, 0, 1298},
+        { 3831873, 4580350, 3018580, 1660048, 797298, 502983, 123906, 7172, 16, 0, 0, 0}
+      },
+      { /* Coeff Band 5 */
+        { 8524543, 9285149, 4979435, 2039330, 683458, 266032, 22628, 270, 0, 0, 0, 0},
+        { 9432163, 10428088, 5715661, 2385738, 838389, 326264, 29981, 361, 0, 0, 0, 884},
+        { 9039066, 10368964, 6136765, 2862030, 1098269, 511668, 63105, 945, 14, 0, 0, 0}
+      },
+      { /* Coeff Band 6 */
+        { 33222872, 34748297, 17701695, 7214933, 2602336, 1191859, 187873, 12667, 390, 3, 0, 0},
+        { 34765051, 37140719, 19525578, 8268934, 3085012, 1473864, 246743, 15258, 736, 3, 0, 8403},
+        { 28591289, 32252393, 19037068, 9213729, 4020653, 2372354, 586420, 67428, 3920, 92, 7, 3}
+      },
+      { /* Coeff Band 7 */
+        { 68604786, 60777665, 19712887, 5656955, 1520443, 507166, 51829, 2466, 10, 0, 0, 0},
+        { 55447403, 51682540, 19008774, 5928582, 1706884, 595531, 65998, 3661, 101, 0, 0, 8468343},
+        { 28321970, 29149398, 13565882, 5258675, 1868588, 898041, 192023, 21497, 672, 17, 0, 1884921}
+      }
+    }
+  };
+#endif
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -200,9 +200,13 @@
 };
 
 #include "default_coef_probs.h"
+#include "defaultcoefcounts.h"
 
 void vp8_default_coef_probs(VP8_COMMON *pc)
 {
+#if CONFIG_T8X8
+    int h;
+#endif
     vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
                    sizeof(default_coef_probs));
 #if CONFIG_T8X8
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -12,11 +12,17 @@
 #include "entropymode.h"
 #include "entropy.h"
 #include "vpx_mem/vpx_mem.h"
-
+#if CONFIG_I8X8
+static const unsigned int kf_y_mode_cts[VP8_YMODES] = { 49, 22, 23, 11, 23, 128};
+static const unsigned int y_mode_cts  [VP8_YMODES] = { 8080, 1908, 1582, 1007, 0, 5874};
+#else
 static const unsigned int kf_y_mode_cts[VP8_YMODES] = { 1607, 915, 812, 811, 5455};
 static const unsigned int y_mode_cts  [VP8_YMODES] = { 8080, 1908, 1582, 1007, 5874};
-
+#endif
 static const unsigned int uv_mode_cts  [VP8_UV_MODES] = { 59483, 13605, 16492, 4230};
+#if CONFIG_I8X8
+static const unsigned int i8x8_mode_cts  [VP8_UV_MODES] = {93, 69, 81, 13};
+#endif
 static const unsigned int kf_uv_mode_cts[VP8_UV_MODES] = { 5319, 1904, 1703, 674};
 
 static const unsigned int bmode_cts[VP8_BINTRAMODES] =
@@ -117,7 +123,6 @@
 
 /* Again, these trees use the same probability indices as their
    explicitly-programmed predecessors. */
-
 const vp8_tree_index vp8_ymode_tree[8] =
 {
     -DC_PRED, 2,
@@ -126,14 +131,32 @@
     -TM_PRED, -B_PRED
 };
 
-const vp8_tree_index vp8_kf_ymode_tree[8] =
+#if CONFIG_I8X8
+const vp8_tree_index vp8_kf_ymode_tree[10] =
 {
     -B_PRED, 2,
     4, 6,
     -DC_PRED, -V_PRED,
+    -H_PRED, 8,
+    -TM_PRED, -I8X8_PRED
+};
+
+const vp8_tree_index vp8_i8x8_mode_tree[6] =
+{
+    -DC_PRED, 2,
+    -V_PRED, 4,
     -H_PRED, -TM_PRED
 };
+#else
 
+const vp8_tree_index vp8_kf_ymode_tree[8] =
+{
+    -B_PRED, 2,
+    4, 6,
+    -DC_PRED, -V_PRED,
+    -H_PRED, -TM_PRED
+};
+#endif
 const vp8_tree_index vp8_uv_mode_tree[6] =
 {
     -DC_PRED, 2,
@@ -168,6 +191,9 @@
 struct vp8_token_struct vp8_ymode_encodings   [VP8_YMODES];
 struct vp8_token_struct vp8_kf_ymode_encodings [VP8_YMODES];
 struct vp8_token_struct vp8_uv_mode_encodings  [VP8_UV_MODES];
+#if CONFIG_I8X8
+struct vp8_token_struct vp8_i8x8_mode_encodings  [VP8_UV_MODES];
+#endif
 struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];
 
 struct vp8_token_struct vp8_mv_ref_encoding_array    [VP8_MVREFS];
@@ -211,7 +237,15 @@
         x->kf_uv_mode_prob, bct, kf_uv_mode_cts,
         256, 1
     );
+#if CONFIG_I8X8
+    vp8_tree_probs_from_distribution(
+        VP8_UV_MODES, vp8_i8x8_mode_encodings, vp8_i8x8_mode_tree,
+        x->i8x8_mode_prob, bct, i8x8_mode_cts,
+        256, 1
+        );
+#endif
     vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
+
 }
 
 
@@ -262,6 +296,9 @@
     vp8_tokens_from_tree(vp8_ymode_encodings,   vp8_ymode_tree);
     vp8_tokens_from_tree(vp8_kf_ymode_encodings, vp8_kf_ymode_tree);
     vp8_tokens_from_tree(vp8_uv_mode_encodings,  vp8_uv_mode_tree);
+#if CONFIG_I8X8
+    vp8_tokens_from_tree(vp8_i8x8_mode_encodings,  vp8_i8x8_mode_tree);
+#endif
     vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
 
     vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -38,7 +38,9 @@
 extern const vp8_tree_index  vp8_ymode_tree[];
 extern const vp8_tree_index  vp8_kf_ymode_tree[];
 extern const vp8_tree_index  vp8_uv_mode_tree[];
-
+#if CONFIG_I8X8
+extern const vp8_tree_index  vp8_i8x8_mode_tree[];
+#endif
 extern const vp8_tree_index  vp8_mbsplit_tree[];
 extern const vp8_tree_index  vp8_mv_ref_tree[];
 extern const vp8_tree_index  vp8_sub_mv_ref_tree[];
@@ -46,6 +48,9 @@
 extern struct vp8_token_struct vp8_bmode_encodings   [VP8_BINTRAMODES];
 extern struct vp8_token_struct vp8_ymode_encodings   [VP8_YMODES];
 extern struct vp8_token_struct vp8_kf_ymode_encodings [VP8_YMODES];
+#if CONFIG_I8X8
+extern struct vp8_token_struct vp8_i8x8_mode_encodings  [VP8_UV_MODES];
+#endif
 extern struct vp8_token_struct vp8_uv_mode_encodings  [VP8_UV_MODES];
 extern struct vp8_token_struct vp8_mbsplit_encodings  [VP8_NUMMBSPLITS];
 
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -51,7 +51,8 @@
         if (above->mbmi.mv.as_int)
         {
             (++mv)->as_int = above->mbmi.mv.as_int;
-            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame],
+                refframe, mv, ref_frame_sign_bias);
             ++cntx;
         }
 
@@ -66,7 +67,8 @@
             int_mv this_mv;
 
             this_mv.as_int = left->mbmi.mv.as_int;
-            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame],
+                refframe, &this_mv, ref_frame_sign_bias);
 
             if (this_mv.as_int != mv->as_int)
             {
@@ -88,7 +90,8 @@
             int_mv this_mv;
 
             this_mv.as_int = aboveleft->mbmi.mv.as_int;
-            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame],
+                refframe, &this_mv, ref_frame_sign_bias);
 
             if (this_mv.as_int != mv->as_int)
             {
@@ -149,7 +152,8 @@
     p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
     p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
     p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
-    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
+    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1]
+    + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
     return p;
 }
 
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -125,8 +125,6 @@
         --cur_mb;
         switch (cur_mb->mbmi.mode)
         {
-            case B_PRED:
-              return (cur_mb->bmi + b + 3)->as_mode;
             case DC_PRED:
                 return B_DC_PRED;
             case V_PRED:
@@ -135,6 +133,11 @@
                 return B_HE_PRED;
             case TM_PRED:
                 return B_TM_PRED;
+#if CONFIG_I8X8
+            case I8X8_PRED:
+#endif
+            case B_PRED:
+              return (cur_mb->bmi + b + 3)->as_mode;
             default:
                 return B_DC_PRED;
         }
@@ -143,7 +146,8 @@
     return (cur_mb->bmi + b - 1)->as_mode;
 }
 
-static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride)
+static B_PREDICTION_MODE above_block_mode(const MODE_INFO
+                                          *cur_mb, int b, int mi_stride)
 {
     if (!(b >> 2))
     {
@@ -152,8 +156,6 @@
 
         switch (cur_mb->mbmi.mode)
         {
-            case B_PRED:
-              return (cur_mb->bmi + b + 12)->as_mode;
             case DC_PRED:
                 return B_DC_PRED;
             case V_PRED:
@@ -162,6 +164,11 @@
                 return B_HE_PRED;
             case TM_PRED:
                 return B_TM_PRED;
+#if CONFIG_I8X8
+            case I8X8_PRED:
+#endif
+            case B_PRED:
+              return (cur_mb->bmi + b + 12)->as_mode;
             default:
                 return B_DC_PRED;
         }
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -86,6 +86,9 @@
     rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
     rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
     rtcd->recon.recon       = vp8_recon_b_c;
+#if CONFIG_I8X8
+    rtcd->recon.recon_uv    = vp8_recon_uv_b_c;
+#endif
     rtcd->recon.recon2      = vp8_recon2b_c;
     rtcd->recon.recon4      = vp8_recon4b_c;
     rtcd->recon.recon_mb    = vp8_recon_mb_c;
@@ -100,6 +103,14 @@
         vp8_build_intra_predictors_mbuv_s;
     rtcd->recon.intra4x4_predict =
         vp8_intra4x4_predict;
+
+#if CONFIG_I8X8
+    rtcd->recon.intra8x8_predict =
+        vp8_intra8x8_predict;
+    rtcd->recon.intra_uv4x4_predict =
+        vp8_intra_uv4x4_predict;
+#endif
+
 
     rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
     rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -185,6 +185,9 @@
     vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
     vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
     vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
+#if CONFIG_I8X8
+    vp8_prob i8x8_mode_prob [VP8_UV_MODES-1];
+#endif
 
 
     FRAME_CONTEXT lfc; /* last frame entropy */
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -44,6 +44,38 @@
     }
 }
 
+#if CONFIG_I8X8
+void vp8_recon_uv_b_c
+(
+    unsigned char *pred_ptr,
+    short *diff_ptr,
+    unsigned char *dst_ptr,
+    int stride
+)
+{
+    int r, c;
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred_ptr[c] ;
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dst_ptr[c] = (unsigned char) a ;
+        }
+
+        dst_ptr += stride;
+        diff_ptr += 8;
+        pred_ptr += 8;
+    }
+}
+#endif
 void vp8_recon4b_c
 (
     unsigned char *pred_ptr,
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -59,6 +59,14 @@
 #endif
 extern prototype_recon_block(vp8_recon_recon);
 
+#if CONFIG_I8X8
+#ifndef vp8_recon_recon_uv
+#define vp8_recon_recon_uv vp8_recon_uv_b_c
+#endif
+extern prototype_recon_block(vp8_recon_recon_uv);
+#endif
+
+extern prototype_recon_block(vp8_recon_recon);
 #ifndef vp8_recon_recon2
 #define vp8_recon_recon2 vp8_recon2b_c
 #endif
@@ -85,6 +93,14 @@
 extern prototype_build_intra_predictors\
     (vp8_recon_build_intra_predictors_mby);
 
+#if CONFIG_I8X8
+#ifndef vp8_recon_build_intra8x8_predictors_mby
+#define vp8_recon_build_intra8x8_predictors_mby vp8_build_intra8x8_predictors_mby
+#endif
+extern prototype_build_intra_predictors\
+    (vp8_recon_build_intra8x8_predictors_mby);
+#endif
+
 #ifndef vp8_recon_build_intra_predictors_mby_s
 #define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s
 #endif
@@ -97,6 +113,14 @@
 extern prototype_build_intra_predictors\
     (vp8_recon_build_intra_predictors_mbuv);
 
+#if CONFIG_I8X8
+#ifndef vp8_recon_build_intra8x8_predictors_mbuv
+#define vp8_recon_build_intra8x8_predictors_mbuv vp8_build_intra8x8_predictors_mbuv
+#endif
+extern prototype_build_intra_predictors\
+    (vp8_recon_build_intra8x8_predictors_mbuv);
+#endif
+
 #ifndef vp8_recon_build_intra_predictors_mbuv_s
 #define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s
 #endif
@@ -109,7 +133,21 @@
 extern prototype_intra4x4_predict\
     (vp8_recon_intra4x4_predict);
 
+#if CONFIG_I8X8
+#ifndef vp8_recon_intra8x8_predict
+#define vp8_recon_intra8x8_predict vp8_intra8x8_predict
+#endif
+extern prototype_intra4x4_predict\
+    (vp8_recon_intra8x8_predict);
 
+#ifndef vp8_recon_intra_uv4x4_predict
+#define vp8_recon_intra_uv4x4_predict vp8_intra_uv4x4_predict
+#endif
+extern prototype_intra4x4_predict\
+    (vp8_recon_intra_uv4x4_predict);
+
+#endif
+
 typedef prototype_copy_block((*vp8_copy_block_fn_t));
 typedef prototype_recon_block((*vp8_recon_fn_t));
 typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
@@ -121,6 +159,9 @@
     vp8_copy_block_fn_t  copy8x8;
     vp8_copy_block_fn_t  copy8x4;
     vp8_recon_fn_t       recon;
+#if CONFIG_I8X8
+    vp8_recon_fn_t       recon_uv;
+#endif
     vp8_recon_fn_t       recon2;
     vp8_recon_fn_t       recon4;
     vp8_recon_mb_fn_t    recon_mb;
@@ -130,6 +171,10 @@
     vp8_build_intra_pred_fn_t  build_intra_predictors_mbuv_s;
     vp8_build_intra_pred_fn_t  build_intra_predictors_mbuv;
     vp8_intra4x4_pred_fn_t intra4x4_predict;
+#if CONFIG_I8X8
+    vp8_intra4x4_pred_fn_t intra8x8_predict;
+    vp8_intra4x4_pred_fn_t intra_uv4x4_predict;
+#endif
 } vp8_recon_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -70,11 +70,7 @@
                 {
                     average += yleft_col[i];
                 }
-
             }
-
-
-
             shift = 3 + x->up_available + x->left_available;
             expected_dc = (average + (1 << (shift - 1))) >> shift;
         }
@@ -135,6 +131,9 @@
 
     }
     break;
+#if CONIFG_I8X8
+    case I8X8_PRED:
+#endif
     case B_PRED:
     case NEARESTMV:
     case NEARMV:
@@ -194,8 +193,6 @@
 
             }
 
-
-
             shift = 3 + x->up_available + x->left_available;
             expected_dc = (average + (1 << (shift - 1))) >> shift;
         }
@@ -554,3 +551,184 @@
         break;
     }
 }
+#if CONFIG_I8X8
+void vp8_intra8x8_predict(BLOCKD *x,
+                          int mode,
+                          unsigned char *predictor)
+{
+
+    unsigned char *yabove_row = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char yleft_col[8];
+    unsigned char ytop_left = yabove_row[-1];
+    int r, c, i;
+
+    for (i = 0; i < 8; i++)
+    {
+        yleft_col[i] = (*(x->base_dst))[x->dst - 1 + i * x->dst_stride];
+    }
+    switch (mode)
+    {
+    case DC_PRED:
+        {
+            int expected_dc = 0;
+
+            for (i = 0; i < 8; i++)
+            {
+                expected_dc += yabove_row[i];
+                expected_dc += yleft_col[i];
+            }
+            expected_dc = (expected_dc + 8) >> 4;
+
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+                    predictor[c] = expected_dc;
+                }
+                predictor += 16;
+            }
+        }
+        break;
+    case V_PRED:
+        {
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+
+                    predictor[c] = yabove_row[c];
+                }
+                predictor += 16;
+            }
+
+        }
+        break;
+    case H_PRED:
+        {
+
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+                    predictor[c] = yleft_col[r];
+                }
+                predictor += 16;
+            }
+        }
+        break;
+    case TM_PRED:
+        {
+            /* prediction similar to true_motion prediction */
+            for (r = 0; r < 8; r++)
+            {
+                for (c = 0; c < 8; c++)
+                {
+                    int pred = yabove_row[c] - ytop_left + yleft_col[r];
+                    if (pred < 0)
+                        pred = 0;
+
+                    if (pred > 255)
+                        pred = 255;
+                    predictor[c] = pred;
+                }
+
+                predictor += 16;
+            }
+        }
+        break;
+    }
+}
+
+void vp8_intra_uv4x4_predict(BLOCKD *x,
+                             int mode,
+                             unsigned char *predictor)
+{
+
+    unsigned char *above_row = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char left_col[4];
+    unsigned char top_left = above_row[-1];
+    int r, c, i;
+
+    for (i = 0; i < 4; i++)
+    {
+        left_col[i] = (*(x->base_dst))[x->dst - 1 + i * x->dst_stride];
+    }
+    switch (mode)
+    {
+    case DC_PRED:
+        {
+            int expected_dc = 0;
+
+            for (i = 0; i < 4; i++)
+            {
+                expected_dc += above_row[i];
+                expected_dc += left_col[i];
+            }
+            expected_dc = (expected_dc + 4) >> 3;
+
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+                    predictor[c] = expected_dc;
+                }
+                predictor += 8;
+            }
+        }
+        break;
+    case V_PRED:
+        {
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+
+                    predictor[c] = above_row[c];
+                }
+                predictor += 8;
+            }
+
+        }
+        break;
+    case H_PRED:
+        {
+
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+                    predictor[c] = left_col[r];
+                }
+                predictor += 8;
+            }
+        }
+        break;
+    case TM_PRED:
+        {
+            /* prediction similar to true_motion prediction */
+            for (r = 0; r < 4; r++)
+            {
+                for (c = 0; c < 4; c++)
+                {
+                    int pred = above_row[c] - top_left + left_col[r];
+                    if (pred < 0)
+                        pred = 0;
+
+                    if (pred > 255)
+                        pred = 255;
+                    predictor[c] = pred;
+                }
+
+                predictor += 8;
+            }
+        }
+        break;
+    }
+}
+
+
+/* TODO: try different ways of use Y-UV mode correlation
+ Current code assumes that a uv 4x4 block use same mode
+ as corresponding Y 8x8 area
+ */
+#endif
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -39,7 +39,14 @@
 
     return i;
 }
+#if CONFIG_I8X8
+static int vp8_read_i8x8_mode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_i8x8_mode_tree, p);
 
+    return i;
+}
+#endif
 
 
 static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
@@ -61,7 +68,7 @@
             mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
     }
 }
-
+extern const int vp8_i8x8_block[4];
 static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col)
 {
     vp8_reader *const bc = & pbi->bc;
@@ -91,7 +98,6 @@
             if ((m->mbmi.mode = y_mode) == B_PRED)
             {
                 int i = 0;
-
                 do
                 {
                     const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
@@ -101,7 +107,26 @@
                 }
                 while (++i < 16);
             }
-
+#if CONFIG_I8X8
+            if((m->mbmi.mode = y_mode) == I8X8_PRED)
+            {
+                int i;
+                int mode8x8;
+                //printf("F%3d:%d:%d:", pbi->common.current_video_frame, mb_row, mb_col);
+                for(i=0;i<4;i++)
+                 {
+                     int ib = vp8_i8x8_block[i];
+                     mode8x8 = vp8_read_i8x8_mode(bc, pbi->common.i8x8_mode_prob);
+                     m->bmi[ib+0].as_mode= mode8x8;
+                     m->bmi[ib+1].as_mode= mode8x8;
+                     m->bmi[ib+4].as_mode= mode8x8;
+                     m->bmi[ib+5].as_mode= mode8x8;
+                 }
+                //printf("%2d%2d%2d%2d\n", m->bmi[0].as_mode,m->bmi[2].as_mode,
+                //                       m->bmi[8].as_mode,m->bmi[10].as_mode);
+           }
+            else
+#endif
             m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
         }
 }
@@ -553,6 +578,22 @@
             else
                 read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
 
+            //printf("%3d", mi->mbmi.mode);
+
+            /*
+            if(pbi->common.current_video_frame==7)
+            {
+                FILE *fmode=fopen("kfmode.txt", "a");
+                fprintf(fmode, "%3d:%3d:%d\n",mb_row, mb_col, mi->mbmi.mode);
+                fclose(fmode);
+
+            }*/
+            /*
+            if(mi->mbmi.mode==I8X8_PRED)
+            {
+                printf("F%3d:%d:%d\n", pbi->common.current_video_frame, mb_row, mb_col);
+            }
+            */
 #if CONFIG_ERROR_CONCEALMENT
             /* look for corruption. set mvs_corrupt_from_mb to the current
              * mb_num if the frame is corrupt from this macroblock. */
@@ -568,7 +609,7 @@
 
             mi++;       /* next macroblock */
         }
-
+       // printf("\n");
         mi++;           /* skip left predictor each row */
     }
 }
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -199,7 +199,10 @@
     }
 
 }
+#if CONFIG_I8X8
+extern const int vp8_i8x8_block[4];
 
+#endif
 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
                               unsigned int mb_idx)
 {
@@ -246,8 +249,12 @@
 
     mode = xd->mode_info_context->mbmi.mode;
 
-    if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV &&
-            !vp8dx_bool_error(xd->current_bc))
+    if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV
+#if CONFIG_I8X8
+        && mode != I8X8_PRED
+#endif
+        &&!vp8dx_bool_error(xd->current_bc)
+        )
     {
         /* Special case:  Force the loopfilter to skip when eobtotal and
          * mb_skip_coeff are zero.
@@ -264,6 +271,10 @@
     /* do prediction */
     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
+#if CONFIG_I8X8
+        if(mode != I8X8_PRED)
+        {
+#endif
         RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mbuv)(xd);
 
         if (mode != B_PRED)
@@ -273,6 +284,9 @@
         } else {
             vp8_intra_prediction_down_copy(xd);
         }
+#if CONFIG_I8X8
+        }
+#endif
     }
     else
     {
@@ -301,6 +315,56 @@
 #endif
 
     /* dequantization and idct */
+#if CONFIG_I8X8
+    if (mode == I8X8_PRED)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            int ib = vp8_i8x8_block[i];
+            const int iblock[4]={0,1,4,5};
+            int j;
+            int i8x8mode;
+            BLOCKD *b;
+
+            b = &xd->block[ib];
+            i8x8mode= b->bmi.as_mode;
+            RECON_INVOKE(RTCD_VTABLE(recon), intra8x8_predict)
+                          (b, i8x8mode, b->predictor);
+
+            for(j = 0; j < 4; j++)
+            {
+                b = &xd->block[ib+iblock[j]];
+                if (xd->eobs[ib+iblock[j]] > 1)
+                {
+                    DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                        (b->qcoeff, b->dequant,  b->predictor,
+                        *(b->base_dst) + b->dst, 16, b->dst_stride);
+                }
+                else
+                {
+                    IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+                        (b->qcoeff[0] * b->dequant[0], b->predictor,
+                        *(b->base_dst) + b->dst, 16, b->dst_stride);
+                    ((int *)b->qcoeff)[0] = 0;
+                }
+            }
+
+            b = &xd->block[16+i];
+            RECON_INVOKE(RTCD_VTABLE(recon), intra_uv4x4_predict)
+                          (b, i8x8mode, b->predictor);
+            DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                (b->qcoeff, b->dequant,  b->predictor,
+                *(b->base_dst) + b->dst, 8, b->dst_stride);
+            b = &xd->block[20+i];
+            RECON_INVOKE(RTCD_VTABLE(recon), intra_uv4x4_predict)
+                          (b, i8x8mode, b->predictor);
+            DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                (b->qcoeff, b->dequant,  b->predictor,
+                *(b->base_dst) + b->dst, 8, b->dst_stride);
+        }
+    }
+    else
+#endif
     if (mode == B_PRED)
     {
         for (i = 0; i < 16; i++)
@@ -420,7 +484,9 @@
     }
     else
 #endif
-
+#if CONFIG_I8X8
+    if(xd->mode_info_context->mbmi.mode!=I8X8_PRED)
+#endif
     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
                     (xd->qcoeff+16*16, xd->block[16].dequant,
                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
@@ -511,6 +577,9 @@
         }
 #endif
 
+#if CONFIG_I8X8
+        update_blockd_bmi(xd);
+#endif
         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -366,16 +366,19 @@
       DECODE_AND_LOOP_IF_ZERO_8X8(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_);
     }
     DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_8x8_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_8x8_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_8x8_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_8x8_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_8x8_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
-    bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
-
+    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
+                                LOW_VAL_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
+                                HIGH_LOW_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
+                                CAT_THREEFOUR_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
+                                CAT_FIVE_CONTEXT_NODE_0_8x8_);
+    val = CAT6_MIN_VAL;
+    bits_count = CONFIG_EXTEND_QRANGE?12:10;
     do
     {
-        DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
+        DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
         bits_count -- ;
     }
     while (bits_count >= 0);
@@ -389,12 +392,12 @@
     }
 
 CAT_FIVE_CONTEXT_NODE_0_8x8_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
+    val = CAT5_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
     if(i==24)
     {
         DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
@@ -405,12 +408,13 @@
     }
 
 CAT_THREEFOUR_CONTEXT_NODE_0_8x8_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_8x8_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
+                            CAT_THREE_CONTEXT_NODE_0_8x8_);
+    val = CAT4_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
     if(i==24)
     {
         DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
@@ -421,10 +425,10 @@
     }
 
 CAT_THREE_CONTEXT_NODE_0_8x8_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
+    val = CAT3_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
     if(i==24)
     {
         DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
@@ -435,11 +439,11 @@
     }
 
 HIGH_LOW_CONTEXT_NODE_0_8x8_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_8x8_);
-
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
+                            CAT_ONE_CONTEXT_NODE_0_8x8_);
+    val = CAT2_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
     if(i==24)
     {
         DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
@@ -450,8 +454,8 @@
     }
 
 CAT_ONE_CONTEXT_NODE_0_8x8_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
+    val = CAT1_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
     if(i==24)
     {
         DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val);
@@ -462,8 +466,10 @@
     }
 
 LOW_VAL_CONTEXT_NODE_0_8x8_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_8x8_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE],
+                                TWO_CONTEXT_NODE_0_8x8_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE],
+                                THREE_CONTEXT_NODE_0_8x8_);
     if(i==24)
     {
         DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(4);
@@ -647,8 +653,10 @@
 
     scan = vp8_default_zig_zag1d;
     qcoeff_ptr = &x->qcoeff[0];
-
     if (x->mode_info_context->mbmi.mode != B_PRED &&
+#if CONFIG_I8X8
+        x->mode_info_context->mbmi.mode != I8X8_PRED &&
+#endif
         x->mode_info_context->mbmi.mode != SPLITMV)
     {
         i = 24;
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -103,7 +103,6 @@
     int mt_baseline_filter_level[MAX_MB_SEGMENTS];
     int sync_range;
     int *mt_current_mb_col;                  /* Each row remembers its already decoded column. */
-
     unsigned char **mt_yabove_row;           /* mb_rows x width */
     unsigned char **mt_uabove_row;
     unsigned char **mt_vabove_row;
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -151,6 +151,12 @@
     vp8_write_token(bc, vp8_kf_ymode_tree, p, vp8_kf_ymode_encodings + m);
 }
 
+#if CONFIG_I8X8
+static void write_i8x8_mode(vp8_writer *bc, int m, const vp8_prob *p)
+{
+    vp8_write_token(bc,vp8_i8x8_mode_tree, p, vp8_i8x8_mode_encodings + m);
+}
+#endif
 static void write_uv_mode(vp8_writer *bc, int m, const vp8_prob *p)
 {
     vp8_write_token(bc, vp8_uv_mode_tree, p, vp8_uv_mode_encodings + m);
@@ -1188,7 +1194,6 @@
                 vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
 
             kfwrite_ymode(bc, ym, c->kf_ymode_prob);
-
             if (ym == B_PRED)
             {
                 const int mis = c->mode_info_stride;
@@ -1208,10 +1213,20 @@
                 }
                 while (++i < 16);
             }
-
+#if CONFIG_I8X8
+            if(ym == I8X8_PRED)
+            {
+                write_i8x8_mode(bc, m->bmi[0].as_mode, c->i8x8_mode_prob);
+                write_i8x8_mode(bc, m->bmi[2].as_mode, c->i8x8_mode_prob);
+                write_i8x8_mode(bc, m->bmi[8].as_mode, c->i8x8_mode_prob);
+                write_i8x8_mode(bc, m->bmi[10].as_mode, c->i8x8_mode_prob);
+                m++;
+            }
+            else
+#endif
             write_uv_mode(bc, (m++)->mbmi.uv_mode, c->kf_uv_mode_prob);
         }
-
+        //printf("\n");
         m++;    // skip L prediction border
     }
 }
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -98,6 +98,9 @@
     int mbmode_cost[2][MB_MODE_COUNT];
     int intra_uv_mode_cost[2][MB_MODE_COUNT];
     unsigned int bmode_costs[10][10][10];
+#if CONFIG_I8X8
+    unsigned int i8x8_mode_costs[MB_MODE_COUNT];
+#endif
     unsigned int inter_bmode_costs[B_MODE_COUNT];
 
     // These define limits to motion vector components to prevent them from extending outside the UMV borders
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -70,11 +70,12 @@
 
 #ifdef MODE_STATS
 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
-unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
-unsigned int uv_modes[4]  = {0, 0, 0, 0};
-unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+unsigned int inter_uv_modes[VP8_UV_MODES] = {0, 0, 0, 0};
+unsigned int inter_b_modes[15] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+unsigned int y_modes[VP8_YMODES] = {0, 0, 0, 0, 0};
+unsigned int i8x8_modes[VP8_I8X8_MODES]={0};
+unsigned int uv_modes[VP8_UV_MODES] = {0, 0, 0, 0};
+unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 #endif
 
 
@@ -1427,8 +1428,16 @@
         }
         while (++b < 16);
     }
-
+#if CONFIG_I8X8
+    if(m==I8X8_PRED)
+    {
+        i8x8_modes[xd->block[0].bmi.as_mode]++;
+        i8x8_modes[xd->block[2].bmi.as_mode]++;
+        i8x8_modes[xd->block[8].bmi.as_mode]++;
+        i8x8_modes[xd->block[10].bmi.as_mode]++;
+    }
 #endif
+#endif
 
     ++cpi->ymode_count[m];
     ++cpi->uv_mode_count[uvm];
@@ -1476,6 +1485,14 @@
         vp8_update_zbin_extra(cpi, x);
     }
 
+#if CONFIG_I8X8
+    if(x->e_mbd.mode_info_context->mbmi.mode == I8X8_PRED)
+    {
+        vp8_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x);
+        vp8_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x);
+    }
+    else
+#endif
     if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
         vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
     else
@@ -1486,6 +1503,9 @@
 #endif
         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
     }
+#if CONFIG_I8X8
+        if(x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED)
+#endif
     vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
     sum_intra_stats(cpi, x);
     vp8_tokenize_mb(cpi, &x->e_mbd, t);
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -236,3 +236,83 @@
 
     vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
+
+#if CONFIG_I8X8
+void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
+                              MACROBLOCK *x, int ib)
+{
+    BLOCKD *b = &x->e_mbd.block[ib];
+    BLOCK *be = &x->block[ib];
+    const int iblock[4]={0,1,4,5};
+    int i;
+
+    RECON_INVOKE(&rtcd->common->recon, intra8x8_predict)
+                (b, b->bmi.as_mode, b->predictor);
+
+    for(i=0;i<4;i++)
+    {
+        b = &x->e_mbd.block[ib + iblock[i]];
+        be = &x->block[ib + iblock[i]];
+        ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
+        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->quantize_b(be, b);
+        vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
+        RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor,
+            b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    }
+
+}
+
+extern const int vp8_i8x8_block[4];
+void vp8_encode_intra8x8mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+    int i, ib;
+
+    for(i=0;i<4;i++)
+    {
+        ib = vp8_i8x8_block[i];
+        vp8_encode_intra8x8(rtcd, x, ib);
+    }
+
+}
+
+void vp8_encode_intra_uv4x4(const VP8_ENCODER_RTCD *rtcd,
+                              MACROBLOCK *x, int ib,
+                              int mode)
+{
+    BLOCKD *b = &x->e_mbd.block[ib];
+    BLOCK *be = &x->block[ib];
+
+    RECON_INVOKE(&rtcd->common->recon, intra_uv4x4_predict)
+                (b, mode, b->predictor);
+
+    ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 8);
+
+    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 16);
+
+    x->quantize_b(be, b);
+
+    vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 16);
+
+    RECON_INVOKE(&rtcd->common->recon, recon_uv)(b->predictor,
+        b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+}
+
+
+
+void vp8_encode_intra8x8mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+{
+    int i, ib, mode;
+    BLOCKD *b;
+    for(i=0;i<4;i++)
+    {
+        ib = vp8_i8x8_block[i];
+        b = &x->e_mbd.block[ib];
+        mode = b->bmi.as_mode;
+        /*u */
+        vp8_encode_intra_uv4x4(rtcd, x, i+16, mode);
+        /*v */
+        vp8_encode_intra_uv4x4(rtcd, x, i+20, mode);
+    }
+}
+#endif
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -53,6 +53,25 @@
     }
 }
 
+void vp8_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch)
+{
+    unsigned char *src_ptr = (*(be->base_src) + be->src);
+    short *diff_ptr = be->src_diff;
+    unsigned char *pred_ptr = bd->predictor;
+    int src_stride = be->src_stride;
+    int r, c;
+    for (r = 0; r < 8; r++)
+    {
+        for (c = 0; c < 8; c++)
+        {
+            diff_ptr[c] = src_ptr[c] - pred_ptr[c];
+        }
+        diff_ptr += pitch;
+        pred_ptr += pitch;
+        src_ptr  += src_stride;
+    }
+}
+
 void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
 {
     short *udiff = diff + 256;
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -44,4 +44,10 @@
 
     vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree);
     vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob, vp8_uv_mode_tree);
+#if CONFIG_I8X8
+    vp8_cost_tokens(c->mb.i8x8_mode_costs,
+                    x->i8x8_mode_prob,vp8_i8x8_mode_tree);
+#endif
+
+
 }
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -101,6 +101,7 @@
 
 #endif
 
+//#define OUTPUT_YUV_REC
 
 #ifdef OUTPUT_YUV_SRC
 FILE *yuv_file;
@@ -137,9 +138,10 @@
 extern unsigned __int64 Sectionbits[500];
 #endif
 #ifdef MODE_STATS
-extern unsigned __int64 Sectionbits[50];
-extern int y_modes[5]  ;
-extern int uv_modes[4] ;
+extern INT64 Sectionbits[500];
+extern int y_modes[VP8_YMODES]  ;
+extern int i8x8_modes[VP8_I8X8_MODES];
+extern int uv_modes[VP8_UV_MODES] ;
 extern int b_modes[10]  ;
 extern int inter_y_modes[10] ;
 extern int inter_uv_modes[4] ;
@@ -2305,10 +2307,18 @@
 #ifdef MODE_STATS
         {
             extern int count_mb_seg[4];
-            FILE *f = fopen("modes.stt", "w");
+            char modes_stats_file[250];
+            FILE *f; 
             double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000 ;
+            sprintf(modes_stats_file, "modes_q%03d.stt",cpi->common.base_qindex);
+            f = fopen(modes_stats_file, "w");
             fprintf(f, "intra_mode in Intra Frames:\n");
+#if CONFIG_I8X8
+            fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4], y_modes[5]);
+            fprintf(f, "I8:%8d, %8d, %8d, %8d\n", i8x8_modes[0], i8x8_modes[1], i8x8_modes[2], i8x8_modes[3]);
+#else
             fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
+#endif
             fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
             fprintf(f, "B: ");
             {
@@ -4622,7 +4632,7 @@
         fclose(recon_file);
     }
 #endif
-#ifdef OUTPUT_YUV_REC
+#if OUTPUT_YUV_REC
     vp8_write_yuv_rec_frame(cm);
 #endif
 
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -70,18 +70,20 @@
 
 #ifdef MODE_STATS
     // Stats
-    int y_modes[5];
-    int uv_modes[4];
+    int y_modes[VP8_YMODES];
+    int uv_modes[VP8_UV_MODES];
+    int i8x8_modes[VP8_I8X8_MODES];
     int b_modes[10];
     int inter_y_modes[10];
-    int inter_uv_modes[4];
+    int inter_uv_modes[VP8_UV_MODES];
     int inter_b_modes[10];
 #endif
-
-    vp8_prob ymode_prob[4], uv_mode_prob[3];   /* interframe intra mode probs */
-    vp8_prob kf_ymode_prob[4], kf_uv_mode_prob[3];   /* keyframe "" */
-
-    int ymode_count[5], uv_mode_count[4];  /* intra MB type cts this frame */
+    /* interframe intra mode probs */
+    vp8_prob ymode_prob[VP8_YMODES-1], uv_mode_prob[VP8_UV_MODES-1];
+    /* keyframe intra mode probs */
+    vp8_prob kf_ymode_prob[VP8_YMODES-1], kf_uv_mode_prob[VP8_UV_MODES-1];
+    /* intra MB type cts this frame */
+    int ymode_count[VP8_YMODES], uv_mode_count[VP8_UV_MODES];
 
     int count_mb_ref_frame_usage[MAX_REF_FRAMES];
 
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -33,12 +33,12 @@
 
 
 #ifdef MODE_STATS
-extern int y_modes[5];
-extern int uv_modes[4];
+extern int y_modes[VP8_YMODES];
+extern int uv_modes[VP8_UV_MODES];
 extern int b_modes[10];
 
 extern int inter_y_modes[10];
-extern int inter_uv_modes[4];
+extern int inter_uv_modes[VP8_UV_MODES];
 extern int inter_b_modes[10];
 #endif
 
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -657,6 +657,29 @@
     d[8] = p[8];
     d[12] = p[12];
 }
+
+static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor)
+{
+    const unsigned int *p = (const unsigned int *)predictor;
+    unsigned int *d = (unsigned int *)dst;
+    d[0] = p[0];
+    d[1] = p[1];
+    d[4] = p[4];
+    d[5] = p[5];
+    d[8] = p[8];
+    d[9] = p[9];
+    d[12] = p[12];
+    d[13] = p[13];
+    d[16] = p[16];
+    d[17] = p[17];
+    d[20] = p[20];
+    d[21] = p[21];
+    d[24] = p[24];
+    d[25] = p[25];
+    d[28] = p[28];
+    d[29] = p[29];
+}
+
 static int rd_pick_intra4x4block(
     VP8_COMP *cpi,
     MACROBLOCK *x,
@@ -834,7 +857,157 @@
     x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
     return best_rd;
 }
+#if CONFIG_I8X8
+static int rd_pick_intra8x8block(
+    VP8_COMP *cpi,
+    MACROBLOCK *x,
+    int ib,
+    B_PREDICTION_MODE *best_mode,
+    unsigned int *mode_costs,
+    ENTROPY_CONTEXT *a,
+    ENTROPY_CONTEXT *l,
+    int *bestrate,
+    int *bestratey,
+    int *bestdistortion)
+{
+    MB_PREDICTION_MODE mode;
+    MACROBLOCKD *xd = &x->e_mbd;
+    int best_rd = INT_MAX;
+    int rate = 0;
+    int distortion;
+    BLOCK  *be=x->block + ib;
+    BLOCKD *b=x->e_mbd.block + ib;
+    ENTROPY_CONTEXT ta0, ta1, besta0, besta1;
+    ENTROPY_CONTEXT tl0, tl1, bestl0, bestl1;
 
+
+    /*
+     * The predictor buffer is a 2d buffer with a stride of 16.  Create
+     * a temp buffer that meets the stride requirements, but we are only
+     * interested in the left 8x8 block
+     * */
+
+    DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*8);
+    DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16*4);
+
+    for (mode = DC_PRED; mode <= TM_PRED; mode++)
+    {
+        int this_rd;
+        int rate_t;
+
+        rate = mode_costs[mode];
+
+        RECON_INVOKE(&cpi->rtcd.common->recon, intra8x8_predict)
+                     (b, mode, b->predictor);
+
+        vp8_subtract_4b_c(be, b, 16);
+
+        x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32);
+        x->vp8_short_fdct8x4(be->src_diff + 64, be->coeff + 64, 32);
+
+        x->quantize_b_pair(x->block+ib, x->block+ib+1,
+                            xd->block+ib, xd->block+ib+1);
+        x->quantize_b_pair(x->block+ib+4, x->block+ib+5,
+                            xd->block+ib+4, xd->block+ib+5);
+
+        distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)
+            ((x->block+ib)->coeff,(xd->block+ib)->dqcoeff)>>2;
+        distortion += ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)
+            ((x->block+ib+1)->coeff,(xd->block+ib+1)->dqcoeff)>>2;
+        distortion += ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)
+            ((x->block+ib+4)->coeff,(xd->block+ib+4)->dqcoeff)>>2;
+        distortion += ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)
+            ((x->block+ib+5)->coeff,(xd->block+ib+5)->dqcoeff)>>2;
+
+        ta0 = *(a + vp8_block2above[ib]);
+        ta1 = *(a + vp8_block2above[ib+1]);
+        tl0 = *(l + vp8_block2above[ib]);
+        tl1 = *(l + vp8_block2above[ib+4]);
+        rate_t = cost_coeffs(x, xd->block+ib, PLANE_TYPE_Y_WITH_DC,
+            &ta0, &tl0);
+        rate_t += cost_coeffs(x, xd->block+ib+1, PLANE_TYPE_Y_WITH_DC,
+            &ta1, &tl0);
+        rate_t += cost_coeffs(x, xd->block+ib+4, PLANE_TYPE_Y_WITH_DC,
+            &ta0, &tl1);
+        rate_t += cost_coeffs(x, xd->block+ib+5, PLANE_TYPE_Y_WITH_DC,
+            &ta1, &tl1);
+        rate += rate_t;
+        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+        if (this_rd < best_rd)
+        {
+            *bestrate = rate;
+            *bestratey = rate_t;
+            *bestdistortion = distortion;
+            besta0 = ta0;
+            besta1 = ta1;
+            bestl0 = tl0;
+            bestl1 = tl1;
+            best_rd = this_rd;
+            *best_mode = mode;
+            copy_predictor_8x8(best_predictor, b->predictor);
+            vpx_memcpy(best_dqcoeff, b->dqcoeff, 64);
+            vpx_memcpy(best_dqcoeff+32, b->dqcoeff+64, 64);
+        }
+    }
+    b->bmi.as_mode = (*best_mode);
+    vp8_encode_intra8x8 (IF_RTCD(&cpi->rtcd), x, ib);
+    *(a + vp8_block2above[ib])   = besta0;
+    *(a + vp8_block2above[ib+1]) = besta1;
+    *(l + vp8_block2above[ib])   = bestl0;
+    *(l + vp8_block2above[ib+4]) = bestl1;
+    return best_rd;
+}
+
+const int vp8_i8x8_block[4]={0, 2, 8, 10};
+int rd_pick_intra8x8mby_modes(VP8_COMP *cpi,
+                                      MACROBLOCK *mb,
+                                      int *Rate,
+                                      int *rate_y,
+                                      int *Distortion,
+                                      int best_rd)
+{
+    MACROBLOCKD *const xd = &mb->e_mbd;
+    int i,ib;
+    int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED];
+    int distortion = 0;
+    int tot_rate_y = 0;
+    long long total_rd = 0;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+    unsigned int *i8x8mode_costs;
+
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
+
+    i8x8mode_costs  = mb->i8x8_mode_costs;
+
+    for (i = 0; i < 4; i++)
+    {
+        MODE_INFO *const mic = xd->mode_info_context;
+        const int mis = xd->mode_info_stride;
+        B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
+        int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
+
+        ib = vp8_i8x8_block[i];
+        total_rd += rd_pick_intra8x8block(
+            cpi, mb, ib, &best_mode, i8x8mode_costs,
+            ta, tl, &r, &ry, &d);
+        cost += r;
+        distortion += d;
+        tot_rate_y += ry;
+        mic->bmi[ib].as_mode = best_mode;
+    }
+    *Rate = cost;
+    *rate_y += tot_rate_y;
+    *Distortion = distortion;
+    return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
+}
+#endif
+
 static int rd_cost_mbuv(MACROBLOCK *mb)
 {
     int b;
@@ -2502,6 +2675,27 @@
 
 }
 
+#if CONFIG_I8X8
+static void set_i8x8_block_modes(MACROBLOCK *x, int *modes)
+{
+    int i;
+    MACROBLOCKD *xd = &x->e_mbd;
+    for(i=0;i<4;i++)
+    {
+        int ib = vp8_i8x8_block[i];
+        x->e_mbd.mode_info_context->bmi[ib+0].as_mode= modes[i];
+        x->e_mbd.mode_info_context->bmi[ib+1].as_mode= modes[i];
+        x->e_mbd.mode_info_context->bmi[ib+4].as_mode= modes[i];
+        x->e_mbd.mode_info_context->bmi[ib+5].as_mode= modes[i];
+    }
+
+    for (i = 0; i < 16; i++)
+    {
+        xd->block[i].bmi = xd->mode_info_context->bmi[i];
+    }
+}
+#endif
+
 void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
 {
     int error4x4, error16x16;
@@ -2511,6 +2705,12 @@
     int rate4x4_tokenonly = 0;
     int rate16x16_tokenonly = 0;
     int rateuv_tokenonly = 0;
+#if CONFIG_I8X8
+    int error8x8, rate8x8_tokenonly=0;
+    int rate8x8, dist8x8;
+    int mode16x16;
+    int mode8x8[4];
+#endif
 
     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
@@ -2520,11 +2720,24 @@
     error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
                                             &rate16x16, &rate16x16_tokenonly,
                                             &dist16x16);
-
+#if CONFIG_I8X8
+    mode16x16 = x->e_mbd.mode_info_context->mbmi.mode;
+    error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
+                                         &rate8x8, &rate8x8_tokenonly,
+                                         &dist8x8, error16x16);
+    mode8x8[0]= x->e_mbd.mode_info_context->bmi[0].as_mode;
+    mode8x8[1]= x->e_mbd.mode_info_context->bmi[2].as_mode;
+    mode8x8[2]= x->e_mbd.mode_info_context->bmi[8].as_mode;
+    mode8x8[3]= x->e_mbd.mode_info_context->bmi[10].as_mode;
+#endif
     error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
                                          &rate4x4, &rate4x4_tokenonly,
                                          &dist4x4, error16x16);
 
+#if CONFIG_I8X8
+    if(error8x8> error16x16)
+    {
+#endif
     if (error4x4 < error16x16)
     {
         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
@@ -2532,8 +2745,29 @@
     }
     else
     {
+#if CONFIG_I8X8
+        x->e_mbd.mode_info_context->mbmi.mode = mode16x16;
+#endif
         rate += rate16x16;
+
     }
+#if CONFIG_I8X8
+    }
+    else
+    {
+        if (error4x4 < error8x8)
+        {
+            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+            rate += rate4x4;
+        }
+        else
+        {
 
+            x->e_mbd.mode_info_context->mbmi.mode = I8X8_PRED;
+            set_i8x8_block_modes(x, mode8x8);
+            rate += rate8x8;
+        }
+    }
+#endif
     *rate_ = rate;
 }
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -454,6 +454,9 @@
     int b;
 
     has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
+#if CONFIG_I8X8
+                    && x->mode_info_context->mbmi.mode != I8X8_PRED
+#endif
                     && x->mode_info_context->mbmi.mode != SPLITMV);
 
     x->mode_info_context->mbmi.mb_skip_coeff =