shithub: libvpx

Download patch

ref: c6f1bf43210f93dafbae4a2744cadd6f18924255
parent: 2210767c3f7ce179ea9b352c8de2790b23487bca
author: Deb Mukherjee <debargha@google.com>
date: Thu Apr 12 05:24:03 EDT 2012

Differential encoding of probability updates

Adds differential encoding of prob updates using a subexponential
code centered around the previous probability value.
Also searches for the most cost-effective update, and breaks
up the coefficient updates into smaller groups.

Small gain on Derf: 0.2%

Change-Id: Ie0071e3dc113e3d0d7ab95b6442bb07a89970030

--- a/configure
+++ b/configure
@@ -224,6 +224,7 @@
     sixteenth_subpel_uv
     comp_intra_pred
     newentropy
+    newupdate
     superblocks
 "
 CONFIG_LIST="
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -173,6 +173,10 @@
     { 0, 0, 0, 0}
 };
 
+#if CONFIG_NEWUPDATE
+const vp8_prob updprobs[4] = {128, 136, 120, 112};
+#endif
+
 #include "default_coef_probs.h"
 #include "defaultcoefcounts.h"
 
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -82,7 +82,11 @@
    distinct bands). */
 
 /*# define DC_TOKEN_CONTEXTS        3*/ /* 00, 0!0, !0!0 */
-#   define PREV_COEF_CONTEXTS       3
+#define PREV_COEF_CONTEXTS          3
+#if CONFIG_NEWUPDATE
+#define SUBEXP_PARAM                1  /* Subexponential code parameter */
+#define COEFUPDATETYPE              2  /* coef update type to use (1/2/3) */
+#endif
 
 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
 
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -50,3 +50,67 @@
     br->value = value;
     br->count = count;
 }
+
+
+#if CONFIG_NEWUPDATE
+static int get_unsigned_bits(unsigned num_values)
+{
+    int cat=0;
+    if ((num_values--)<=1) return 0;
+    while (num_values>0)
+    {
+        cat++;
+        num_values>>=1;
+    }
+    return cat;
+}
+
+int inv_recenter_nonneg(int v, int m)
+{
+    if (v>(m<<1)) return v;
+    else if ((v&1)==0) return (v>>1)+m;
+    else return m-((v+1)>>1);
+}
+
+int vp8_decode_uniform(BOOL_DECODER *br, int n)
+{
+    int v;
+    int l=get_unsigned_bits(n);
+    int m=(1<<l)-n;
+    if (!l) return 0;
+    v = vp8_decode_value(br, l-1);
+    if (v < m)
+        return v;
+    else
+        return (v<<1)-m+vp8_decode_value(br, 1);
+}
+
+int vp8_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms)
+{
+    int i=0, mk=0, word;
+    while (1)
+    {
+        int b = (i?k+i-1:k);
+        int a = (1<<b);
+        if (num_syms<=mk+3*a)
+        {
+            word = vp8_decode_uniform(br, num_syms-mk) + mk;
+            break;
+        }
+        else
+        {
+            if (vp8_decode_value(br, 1))
+            {
+                i++;
+                mk += a;
+            }
+            else
+            {
+                word = vp8_decode_value(br, b) + mk;
+                break;
+            }
+        }
+    }
+    return word;
+}
+#endif
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -42,6 +42,12 @@
 
 void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
 
+#if CONFIG_NEWUPDATE
+int vp8_decode_uniform(BOOL_DECODER *br, int n);
+int vp8_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms);
+int inv_recenter_nonneg(int v, int m);
+#endif
+
 /*The refill loop is used in several places, so define it in a macro to make
    sure they're all consistent.
   An inline function would be cleaner, but has a significant penalty, because
@@ -151,4 +157,5 @@
     /* No error. */
     return 0;
 }
+
 #endif
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -34,6 +34,7 @@
 #include "dboolhuff.h"
 
 #include "vp8/common/seg_common.h"
+#include "vp8/common/entropy.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -43,6 +44,23 @@
 int dec_debug = 0;
 #endif
 
+#if CONFIG_NEWUPDATE
+static int inv_remap_prob(int v, int m)
+{
+    const int n = 256;
+    int i;
+    //if (v <= n - 2 - s) v += s; else v =  n - 2 - v;
+    //v = ((v&240)>>4) | ((v&15)<<4);
+    v = (v%15)*17 + (v/15);
+    if ((m<<1)<=n) {
+        i = inv_recenter_nonneg(v+1, m);
+    } else {
+        i = n-1-inv_recenter_nonneg(v+1, n-1-m);
+    }
+    return i;
+}
+#endif
+
 void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
 {
     int i;
@@ -769,6 +787,170 @@
 
 }
 
+#if CONFIG_NEWUPDATE
+static void read_coef_probs3(VP8D_COMP *pbi)
+{
+    const vp8_prob grpupd = 216;
+    int i, j, k, l;
+    vp8_reader *const bc = & pbi->bc;
+    VP8_COMMON *const pc = & pbi->common;
+    for (i = 0; i < BLOCK_TYPES; i++)
+        for (l = 0; l < ENTROPY_NODES; l++)
+        {
+            if(vp8_read(bc, grpupd))
+            {
+                //printf("Decoding %d\n", l);
+                for (j = !i; j < COEF_BANDS; j++)
+                    for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+                    {
+                        vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l;
+                        int u = vp8_read(bc, vp8_coef_update_probs [i][j][k][l]);
+                        if (u)
+                        {
+                            int delp = vp8_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+                            *p = (vp8_prob)inv_remap_prob(delp, *p);
+                        }
+                    }
+            }
+        }
+
+    if(pbi->common.txfm_mode == ALLOW_8X8)
+    {
+        for (i = 0; i < BLOCK_TYPES; i++)
+            for (l = 0; l < ENTROPY_NODES; l++)
+            {
+                if(vp8_read(bc, grpupd))
+                {
+                    for (j = !i; j < COEF_BANDS; j++)
+                        for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+                        {
+                            vp8_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l;
+                            int u = vp8_read(bc, vp8_coef_update_probs_8x8 [i][j][k][l]);
+                            if (u)
+                            {
+                                int delp = vp8_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+                                *p = (vp8_prob)inv_remap_prob(delp, *p);
+                            }
+                        }
+                }
+            }
+    }
+}
+
+static void read_coef_probs2(VP8D_COMP *pbi)
+{
+    const vp8_prob grpupd = 192;
+    int i, j, k, l;
+    vp8_reader *const bc = & pbi->bc;
+    VP8_COMMON *const pc = & pbi->common;
+    for (l = 0; l < ENTROPY_NODES; l++)
+    {
+        if(vp8_read(bc, grpupd))
+        {
+            //printf("Decoding %d\n", l);
+            for (i = 0; i < BLOCK_TYPES; i++)
+                for (j = !i; j < COEF_BANDS; j++)
+                    for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+                    {
+                        vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l;
+                        int u = vp8_read(bc, vp8_coef_update_probs [i][j][k][l]);
+                        if (u)
+                        {
+                            int delp = vp8_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+                            *p = (vp8_prob)inv_remap_prob(delp, *p);
+                        }
+                    }
+        }
+    }
+    if(pbi->common.txfm_mode == ALLOW_8X8)
+    {
+        for (l = 0; l < ENTROPY_NODES; l++)
+        {
+            if(vp8_read(bc, grpupd))
+            {
+                for (i = 0; i < BLOCK_TYPES; i++)
+                    for (j = !i; j < COEF_BANDS; j++)
+                        for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+                        {
+                            vp8_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l;
+
+                            int u = vp8_read(bc, vp8_coef_update_probs_8x8 [i][j][k][l]);
+                            if (u)
+                            {
+                                int delp = vp8_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+                                *p = (vp8_prob)inv_remap_prob(delp, *p);
+                            }
+                        }
+            }
+        }
+    }
+}
+#endif
+
+static void read_coef_probs(VP8D_COMP *pbi)
+{
+    int i, j, k, l;
+    vp8_reader *const bc = & pbi->bc;
+    VP8_COMMON *const pc = & pbi->common;
+
+    {
+        if(vp8_read_bit(bc))
+        {
+        /* read coef probability tree */
+        for (i = 0; i < BLOCK_TYPES; i++)
+#if CONFIG_NEWUPDATE
+            for (j = !i; j < COEF_BANDS; j++)
+#else
+            for (j = 0; j < COEF_BANDS; j++)
+#endif
+                for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+                    for (l = 0; l < ENTROPY_NODES; l++)
+                    {
+                        vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l;
+
+                        if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l]))
+                        {
+#if CONFIG_NEWUPDATE
+                            int delp = vp8_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+                            //printf("delp = %d/%d", *p, delp);
+                            *p = (vp8_prob)inv_remap_prob(delp, *p);
+                            //printf("/%d\n", *p);
+#else
+                            *p = (vp8_prob)vp8_read_literal(bc, 8);
+#endif
+                        }
+                    }
+        }
+    }
+
+    if(pbi->common.txfm_mode == ALLOW_8X8 && vp8_read_bit(bc))
+    {
+        // read coef probability tree
+        for (i = 0; i < BLOCK_TYPES; i++)
+#if CONFIG_NEWUPDATE
+            for (j = !i; j < COEF_BANDS; j++)
+#else
+            for (j = 0; j < COEF_BANDS; j++)
+#endif
+                for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+                    for (l = 0; l < ENTROPY_NODES; l++)
+                    {
+
+                        vp8_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l;
+
+                        if (vp8_read(bc, vp8_coef_update_probs_8x8 [i][j][k][l]))
+                        {
+#if CONFIG_NEWUPDATE
+                            int delp = vp8_decode_term_subexp(bc, SUBEXP_PARAM, 255);
+                            *p = (vp8_prob)inv_remap_prob(delp, *p);
+#else
+                            *p = (vp8_prob)vp8_read_literal(bc, 8);
+#endif
+                        }
+                    }
+    }
+}
+
 int vp8_decode_frame(VP8D_COMP *pbi)
 {
     vp8_reader *const bc = & pbi->bc;
@@ -1198,44 +1380,13 @@
         fclose(z);
     }
 
-    {
-        if(vp8_read_bit(bc))
-        {
-        /* read coef probability tree */
-        for (i = 0; i < BLOCK_TYPES; i++)
-            for (j = 0; j < COEF_BANDS; j++)
-                for (k = 0; k < PREV_COEF_CONTEXTS; k++)
-                    for (l = 0; l < ENTROPY_NODES; l++)
-                    {
-                        vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l;
-
-                        if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l]))
-                        {
-                            *p = (vp8_prob)vp8_read_literal(bc, 8);
-
-                        }
-                    }
-        }
-    }
-
-    if(pbi->common.txfm_mode == ALLOW_8X8 && vp8_read_bit(bc))
-    {
-        // read coef probability tree
-        for (i = 0; i < BLOCK_TYPES; i++)
-            for (j = 0; j < COEF_BANDS; j++)
-                for (k = 0; k < PREV_COEF_CONTEXTS; k++)
-                    for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++)
-                    {
-
-                        vp8_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l;
-
-                        if (vp8_read(bc, vp8_coef_update_probs_8x8 [i][j][k][l]))
-                        {
-                            *p = (vp8_prob)vp8_read_literal(bc, 8);
-
-                        }
-                    }
-    }
+#if COEFUPDATETYPE == 2
+    read_coef_probs2(pbi);
+#elif COEFUPDATETYPE == 3
+    read_coef_probs3(pbi);
+#else
+    read_coef_probs(pbi);
+#endif
 
 
     vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -27,6 +27,7 @@
 
 #include "vp8/common/seg_common.h"
 #include "vp8/common/pred_common.h"
+#include "vp8/common/entropy.h"
 
 #if defined(SECTIONBITS_OUTPUT)
 unsigned __int64 Sectionbits[500];
@@ -45,7 +46,34 @@
 #endif
 
 #define vp8_cost_upd  ((int)(vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8)
+#define vp8_cost_upd256  ((int)(vp8_cost_one(upd) - vp8_cost_zero(upd)))
 
+#if CONFIG_NEWUPDATE
+#define SEARCH_NEWP
+static int update_bits[255];
+
+static void compute_update_table()
+{
+    int i;
+    for (i=0; i<255; i++)
+        update_bits[i] = vp8_count_term_subexp(i, SUBEXP_PARAM, 255);
+}
+
+static int remap_prob(int v, int m)
+{
+    const int n = 256;
+    int i;
+    if ((m<<1)<=n)
+        i = recenter_nonneg(v, m) - 1;
+    else
+        i = recenter_nonneg(n-1-v, n-1-m) - 1;
+    //if (i >= s) i -= s; else i = n - 2 - i;
+    //i = ((i>>4)&15) | ((i((i>>4)&15) | ((i&15)<<4);&15)<<4);
+    i = (i%17)*15 + (i/17);
+    return i;
+}
+#endif
+
 static void update_mode(
     vp8_writer *const w,
     int n,
@@ -306,12 +334,46 @@
                                const vp8_prob oldp, const vp8_prob newp,
                                const vp8_prob upd)
 {
-    const int old_b = vp8_cost_branch(ct, oldp);
-    const int new_b = vp8_cost_branch(ct, newp);
-    const int update_b = 8 + vp8_cost_upd;
+    const int old_b = vp8_cost_branch256(ct, oldp);
+    const int new_b = vp8_cost_branch256(ct, newp);
+#if CONFIG_NEWUPDATE
+    const int delp = remap_prob(newp, oldp);
+    const int update_b = update_bits[delp]*256 + vp8_cost_upd256;
+#else
+    const int update_b = 2048 + vp8_cost_upd256;
+#endif
+    return (old_b - new_b - update_b);
+}
 
-    return old_b - new_b - update_b;
+#if CONFIG_NEWUPDATE
+static int prob_update_savings_search(const unsigned int *ct,
+                                      const vp8_prob oldp, vp8_prob *bestp,
+                                      const vp8_prob upd)
+{
+    const int old_b = vp8_cost_branch256(ct, oldp);
+    int new_b, delp, update_b, savings, bestsavings, step;
+    vp8_prob newp, bestnewp;
+
+    bestsavings = 0;
+    bestnewp = oldp;
+
+    step = (*bestp > oldp ? -1 : 1);
+    for (newp = *bestp; newp != oldp; newp+=step)
+    {
+        new_b = vp8_cost_branch256(ct, newp);
+        delp = remap_prob(newp, oldp);
+        update_b = update_bits[delp]*256 + vp8_cost_upd256;
+        savings = old_b - new_b - update_b;
+        if (savings > bestsavings)
+        {
+            bestsavings = savings;
+            bestnewp = newp;
+        }
+    }
+    *bestp = bestnewp;
+    return bestsavings;
 }
+#endif
 
 static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
 {
@@ -1373,7 +1435,11 @@
     int i = 0;
     do
     {
-        int j = 0;
+#if CONFIG_NEWUPDATE
+        int j = !i;
+#else
+        int j = 0;      /* token/prob index */
+#endif
         do
         {
             int k = 0;
@@ -1387,7 +1453,6 @@
 
                 int t = 0;      /* token/prob index */
 
-
                 vp8_tree_probs_from_distribution(
                     MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
                     cpi->frame_coef_probs [i][j][k],
@@ -1399,10 +1464,15 @@
                 do
                 {
                     const unsigned int *ct  = cpi->frame_branch_ct [i][j][k][t];
-                    const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                    vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
                     const vp8_prob oldp = cpi->common.fc.coef_probs [i][j][k][t];
                     const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
+
+#if CONFIG_NEWUPDATE && defined(SEARCH_NEWP)
+                    const int s = prob_update_savings_search(ct, oldp, &newp, upd);
+#else
                     const int s = prob_update_savings(ct, oldp, newp, upd);
+#endif
 
                     if (s > 0)
                     {
@@ -1430,6 +1500,7 @@
             int k = 0;
             do
             {
+                int t;
                 vp8_tree_probs_from_distribution(
                     MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
                     cpi->frame_coef_probs [i][j][k],
@@ -1437,6 +1508,14 @@
                     cpi->coef_counts [i][j][k],
                     256, 1
                 );
+#ifdef ENTROPY_STATS
+                t = 0;
+                do
+                {
+                    context_counters [i][j][k][t] += cpi->coef_counts [i][j][k][t];
+                }
+                while (++t < MAX_ENTROPY_TOKENS);
+#endif
             }
             while (++k < PREV_COEF_CONTEXTS);
         }
@@ -1450,7 +1529,7 @@
     {
         do
         {
-            int j = 0;
+            int j = 0;      /* token/prob index */
             do
             {
                 int k = 0;
@@ -1468,6 +1547,14 @@
                         cpi->coef_counts_8x8 [i][j][k],
                         256, 1
                         );
+#ifdef ENTROPY_STATS
+                    t = 0;
+                    do
+                    {
+                        context_counters [i][j][k][t] += cpi->coef_counts [i][j][k][t];
+                    }
+                    while (++t < MAX_ENTROPY_TOKENS);
+#endif
 
                 }
                 while (++k < PREV_COEF_CONTEXTS);
@@ -1479,11 +1566,376 @@
 
 }
 
+#if CONFIG_NEWUPDATE
+static void update_coef_probs3(VP8_COMP *cpi)
+{
+    const vp8_prob grpupd = 216;
+    int i, j, k, t;
+    vp8_writer *const w = & cpi->bc;
+    int update[2];
+    int savings;
+    int bestupdndx[2*ENTROPY_NODES];
+
+    vp8_clear_system_state(); //__asm emms;
+    // Build the cofficient contexts based on counts collected in encode loop
+    build_coeff_contexts(cpi);
+
+    i = 0;
+    for (i = 0; i < BLOCK_TYPES; ++i)
+    {
+        for (t = 0; t < ENTROPY_NODES; ++t)
+        {
+            /* dry run to see if there is any udpate at all needed */
+            savings = 0;
+            update[0] = update[1] = 0;
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                            cpi->frame_branch_ct [i][j][k][t], *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold) u = 1;
+                    if (u)
+                        savings += s - (int)(vp8_cost_zero(upd));
+                    else
+                        savings -= (int)(vp8_cost_zero(upd));
+#else
+                    s = prob_update_savings(
+                        cpi->frame_branch_ct [i][j][k][t], *Pold, newp, upd);
+                    if (s > 0) u = 1;
+                    if (u)
+                        savings += s;
+#endif
+                    //printf("    %d %d %d: %d\n", i, j, k, u);
+                    update[u]++;
+                }
+            }
+            if (update[1] == 0 || savings < 0)
+            {
+                vp8_write(w, 0, grpupd);
+                continue;
+            }
+            vp8_write(w, 1, grpupd);
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                        cpi->frame_branch_ct [i][j][k][t], *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold) u = 1;
+#else
+                    s = prob_update_savings(
+                        cpi->frame_branch_ct [i][j][k][t], *Pold, newp, upd);
+                    if (s > 0) u = 1;
+#endif
+                    //printf("  %d %d %d: %d (%d)\n", i, j, k, u, upd);
+                    vp8_write(w, u, upd);
+#ifdef ENTROPY_STATS
+                    ++ tree_update_hist [i][j][k][t] [u];
+#endif
+                    if (u)
+                    { /* send/use new probability */
+                        int delp = remap_prob(newp, *Pold);
+                        vp8_encode_term_subexp(w, delp, SUBEXP_PARAM, 255);
+                        *Pold = newp;
+                    }
+
+                }
+            }
+        }
+    }
+
+    if(cpi->common.txfm_mode != ALLOW_8X8) return;
+
+    for (i = 0; i < BLOCK_TYPES; ++i)
+    {
+        for (t = 0; t < ENTROPY_NODES; ++t)
+        {
+            /* dry run to see if there is any udpate at all needed */
+            savings = 0;
+            update[0] = update[1] = 0;
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                            cpi->frame_branch_ct_8x8 [i][j][k][t],
+                            *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold)
+                        u = 1;
+                    if (u)
+                        savings += s - (int)(vp8_cost_zero(upd));
+                    else
+                        savings -= (int)(vp8_cost_zero(upd));
+#else
+                    s = prob_update_savings(
+                            cpi->frame_branch_ct_8x8 [i][j][k][t],
+                            *Pold, newp, upd);
+                    if (s > 0)
+                        u = 1;
+                    if (u)
+                        savings += s;
+#endif
+                    update[u]++;
+                }
+            }
+            if (update[1] == 0 || savings < 0)
+            {
+                vp8_write(w, 0, grpupd);
+                continue;
+            }
+            vp8_write(w, 1, grpupd);
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                        cpi->frame_branch_ct_8x8 [i][j][k][t],
+                        *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold)
+                        u = 1;
+#else
+                    s = prob_update_savings(
+                        cpi->frame_branch_ct_8x8 [i][j][k][t],
+                        *Pold, newp, upd);
+                    if (s > 0)
+                        u = 1;
+#endif
+                    vp8_write(w, u, upd);
+#ifdef ENTROPY_STATS
+                    ++ tree_update_hist_8x8 [i][j][k][t] [u];
+#endif
+                    if (u)
+                    {
+                        /* send/use new probability */
+                        int delp = remap_prob(newp, *Pold);
+                        vp8_encode_term_subexp( w, delp, SUBEXP_PARAM, 255);
+                        *Pold = newp;
+                    }
+                }
+            }
+        }
+    }
+}
+
+static void update_coef_probs2(VP8_COMP *cpi)
+{
+    const vp8_prob grpupd = 192;
+    int i, j, k, t;
+    vp8_writer *const w = & cpi->bc;
+    int update[2];
+    int savings;
+    int bestupdndx[2*ENTROPY_NODES];
+
+    vp8_clear_system_state(); //__asm emms;
+    // Build the cofficient contexts based on counts collected in encode loop
+    build_coeff_contexts(cpi);
+
+    for (t = 0; t < ENTROPY_NODES; ++t)
+    {
+        /* dry run to see if there is any udpate at all needed */
+        savings = 0;
+        update[0] = update[1] = 0;
+        for (i = 0; i < BLOCK_TYPES; ++i)
+        {
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                            cpi->frame_branch_ct [i][j][k][t], *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold) u = 1;
+                    if (u)
+                        savings += s - (int)(vp8_cost_zero(upd));
+                    else
+                        savings -= (int)(vp8_cost_zero(upd));
+#else
+                    s = prob_update_savings(
+                        cpi->frame_branch_ct [i][j][k][t], *Pold, newp, upd);
+                    if (s > 0) u = 1;
+                    if (u)
+                        savings += s;
+#endif
+                    //printf("    %d %d %d: %d\n", i, j, k, u);
+                    update[u]++;
+                }
+            }
+        }
+        if (update[1] == 0 || savings < 0)
+        {
+            vp8_write(w, 0, grpupd);
+            continue;
+        }
+        vp8_write(w, 1, grpupd);
+        for (i = 0; i < BLOCK_TYPES; ++i)
+        {
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                        cpi->frame_branch_ct [i][j][k][t], *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold) u = 1;
+#else
+                    s = prob_update_savings(
+                        cpi->frame_branch_ct [i][j][k][t], *Pold, newp, upd);
+                    if (s > 0) u = 1;
+#endif
+                    //printf("  %d %d %d: %d (%d)\n", i, j, k, u, upd);
+                    vp8_write(w, u, upd);
+#ifdef ENTROPY_STATS
+                    ++ tree_update_hist [i][j][k][t] [u];
+#endif
+                    if (u)
+                    { /* send/use new probability */
+                        int delp = remap_prob(newp, *Pold);
+                        vp8_encode_term_subexp(w, delp, SUBEXP_PARAM, 255);
+                        *Pold = newp;
+                    }
+                }
+            }
+        }
+    }
+
+    if(cpi->common.txfm_mode != ALLOW_8X8) return;
+
+    for (t = 0; t < ENTROPY_NODES; ++t)
+    {
+        /* dry run to see if there is any udpate at all needed */
+        savings = 0;
+        update[0] = update[1] = 0;
+        for (i = 0; i < BLOCK_TYPES; ++i)
+        {
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                            cpi->frame_branch_ct_8x8 [i][j][k][t],
+                            *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold)
+                        u = 1;
+                    if (u)
+                        savings += s - (int)(vp8_cost_zero(upd));
+                    else
+                        savings -= (int)(vp8_cost_zero(upd));
+#else
+                    s = prob_update_savings(
+                            cpi->frame_branch_ct_8x8 [i][j][k][t],
+                            *Pold, newp, upd);
+                    if (s > 0)
+                        u = 1;
+                    if (u)
+                        savings += s;
+#endif
+                    update[u]++;
+                }
+            }
+        }
+        if (update[1] == 0 || savings < 0)
+        {
+            vp8_write(w, 0, grpupd);
+            continue;
+        }
+        vp8_write(w, 1, grpupd);
+        for (i = 0; i < BLOCK_TYPES; ++i)
+        {
+            for (j = !i; j < COEF_BANDS; ++j)
+            {
+                for (k = 0; k < PREV_COEF_CONTEXTS; ++k)
+                {
+                    vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                    vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
+                    const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
+                    int s;
+                    int u = 0;
+
+#if defined(SEARCH_NEWP)
+                        s = prob_update_savings_search(
+                            cpi->frame_branch_ct_8x8 [i][j][k][t],
+                            *Pold, &newp, upd);
+                        if (s > 0 && newp != *Pold)
+                            u = 1;
+#else
+                        s = prob_update_savings(
+                            cpi->frame_branch_ct_8x8 [i][j][k][t],
+                            *Pold, newp, upd);
+                        if (s > 0)
+                            u = 1;
+#endif
+                        vp8_write(w, u, upd);
+#ifdef ENTROPY_STATS
+                        ++ tree_update_hist_8x8 [i][j][k][t] [u];
+#endif
+                        if (u)
+                        {
+                            /* send/use new probability */
+                            int delp = remap_prob(newp, *Pold);
+                            vp8_encode_term_subexp( w, delp, SUBEXP_PARAM, 255);
+                            *Pold = newp;
+                        }
+                }
+            }
+        }
+    }
+}
+#endif
+
 static void update_coef_probs(VP8_COMP *cpi)
 {
     int i = 0;
     vp8_writer *const w = & cpi->bc;
-    int update = 0;
+    int update[2] = {0, 0};
+    int savings;
 
     vp8_clear_system_state(); //__asm emms;
 
@@ -1490,10 +1942,17 @@
     // Build the cofficient contexts based on counts collected in encode loop
     build_coeff_contexts(cpi);
 
+    //vp8_prob bestupd = find_coef_update_prob(cpi);
+
     /* dry run to see if there is any udpate at all needed */
+    savings = 0;
     do
     {
-        int j = 0;
+#if CONFIG_NEWUPDATE
+        int j = !i;
+#else
+        int j = 0;      /* token/prob index */
+#endif
         do
         {
             int k = 0;
@@ -1503,20 +1962,33 @@
                 int t = 0;      /* token/prob index */
                 do
                 {
-                    const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                    vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
                     vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t;
                     const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
                     int s = prev_coef_savings[t];
                     int u = 0;
 
+#if CONFIG_NEWUPDATE && defined(SEARCH_NEWP)
+                    s = prob_update_savings_search(
+                            cpi->frame_branch_ct [i][j][k][t],
+                            *Pold, &newp, upd);
+                    if (s > 0 && newp != *Pold)
+                        u = 1;
+                    if (u)
+                        savings += s - (int)(vp8_cost_zero(upd));
+                    else
+                        savings -= (int)(vp8_cost_zero(upd));
+#else
                     s = prob_update_savings(
                             cpi->frame_branch_ct [i][j][k][t],
                             *Pold, newp, upd);
-
                     if (s > 0)
                         u = 1;
+                    if (u)
+                        savings += s;
+#endif
 
-                    update += u;
+                    update[u]++;
                 }
                 while (++t < ENTROPY_NODES);
             }
@@ -1525,8 +1997,14 @@
         while (++j < COEF_BANDS);
     }
     while (++i < BLOCK_TYPES);
+
+    //printf("Update %d %d, savings %d\n", update[0], update[1], savings);
     /* Is coef updated at all */
-    if(update==0)
+#if CONFIG_NEWUPDATE
+    if(update[1] == 0 || savings < 0)
+#else
+    if(update[1] == 0)
+#endif
     {
         vp8_write_bit(w, 0);
     }
@@ -1536,7 +2014,11 @@
         i=0;
         do
         {
-            int j = 0;
+#if CONFIG_NEWUPDATE
+            int j = !i;
+#else
+            int j = 0;      /* token/prob index */
+#endif
             do
             {
                 int k = 0;
@@ -1548,19 +2030,27 @@
                     int t = 0;      /* token/prob index */
                     do
                     {
-                        const vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
+                        vp8_prob newp = cpi->frame_coef_probs [i][j][k][t];
                         vp8_prob *Pold = cpi->common.fc.coef_probs [i][j][k] + t;
                         const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];
                         int s = prev_coef_savings[t];
                         int u = 0;
 
+#if CONFIG_NEWUPDATE && defined(SEARCH_NEWP)
+                        s = prob_update_savings_search(
+                            cpi->frame_branch_ct [i][j][k][t],
+                            *Pold, &newp, upd);
+                        if (s > 0 && newp != *Pold)
+                            u = 1;
+#else
                         s = prob_update_savings(
                             cpi->frame_branch_ct [i][j][k][t],
                             *Pold, newp, upd);
-
                         if (s > 0)
                             u = 1;
+#endif
 
+
                         vp8_write(w, u, upd);
 #ifdef ENTROPY_STATS
                         ++ tree_update_hist [i][j][k][t] [u];
@@ -1568,14 +2058,20 @@
                         if (u)
                         {
                             /* send/use new probability */
-                            *Pold = newp;
+#if CONFIG_NEWUPDATE
+                            vp8_encode_term_subexp(
+                                w, remap_prob(newp, *Pold), SUBEXP_PARAM, 255);
+                            //printf("delp = %d/%d/%d\n", *Pold, remap_prob(newp, *Pold, 256), newp);
+#else
                             vp8_write_literal(w, newp, 8);
+#endif
+                            *Pold = newp;
                         }
                     }
                     while (++t < ENTROPY_NODES);
 
                     // Accum token counts for generation of default statistics
-#ifdef ENTROPY_STATS
+#if 0//def ENTROPY_STATS
                     t = 0;
                     do
                     {
@@ -1596,11 +2092,16 @@
     if(cpi->common.txfm_mode == ALLOW_8X8)
     {
         /* dry run to see if update is necessary */
-        update = 0;
+        update[0] = update[1] = 0;
+        savings = 0;
         i = 0;
         do
         {
-            int j = 0;
+#if CONFIG_NEWUPDATE
+            int j = !i;
+#else
+            int j = 0;      /* token/prob index */
+#endif
             do
             {
                 int k = 0;
@@ -1611,25 +2112,33 @@
                     do
                     {
                         const unsigned int *ct  = cpi->frame_branch_ct_8x8 [i][j][k][t];
-                        const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                        vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
                         vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
-                        const vp8_prob old = *Pold;
+                        const vp8_prob oldp = *Pold;
                         const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
-                        const int old_b = vp8_cost_branch(ct, old);
-                        const int new_b = vp8_cost_branch(ct, newp);
-                        const int update_b = 8 + vp8_cost_upd;
-                        const int s = old_b - new_b - update_b;
+#if CONFIG_NEWUPDATE && defined(SEARCH_NEWP)
+                        const int s = prob_update_savings_search(ct, oldp, &newp, upd);
+                        const int u = s > 0 && newp != oldp ? 1 : 0;
+                        if (u)
+                            savings += s - (int)(vp8_cost_zero(upd));
+                        else
+                            savings -= (int)(vp8_cost_zero(upd));
+#else
+                        const int s = prob_update_savings(ct, oldp, newp, upd);
                         const int u = s > 0 ? 1 : 0;
+                        if (u)
+                            savings += s;
+#endif
 
 #ifdef ENTROPY_STATS
                         ++ tree_update_hist_8x8 [i][j][k][t] [u];
 #endif
-                        update += u;
+                        update[u]++;
                     }
                     while (++t < MAX_ENTROPY_TOKENS - 1);
 
                     // Accum token counts for generation of default statistics
-#ifdef ENTROPY_STATS
+#if 0//def ENTROPY_STATS
                     t = 0;
 
                     do
@@ -1636,7 +2145,7 @@
                     {
                         context_counters_8x8 [i][j][k][t] += cpi->coef_counts_8x8 [i][j][k][t];
                     }
-                    while (++t < MAX_ENTROPY_TOKENS);
+                    while (++t < MAX_ENTROPY_TOKENS - 1);
 
 #endif
                 }
@@ -1646,10 +2155,13 @@
         }
         while (++i < BLOCK_TYPES);
 
-        if(update == 0)
+#if CONFIG_NEWUPDATE
+        if (update[1] == 0 || savings < 0)
+#else
+        if (update[1] == 0)
+#endif
         {
             vp8_write_bit(w, 0);
-
         }
         else
         {
@@ -1657,7 +2169,11 @@
             i = 0;
             do
             {
-                int j = 0;
+#if CONFIG_NEWUPDATE
+                int j = !i;
+#else
+                int j = 0;      /* token/prob index */
+#endif
                 do
                 {
                     int k = 0;
@@ -1667,15 +2183,17 @@
                         do
                         {
                             const unsigned int *ct  = cpi->frame_branch_ct_8x8 [i][j][k][t];
-                            const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
+                            vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t];
                             vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t;
-                            const vp8_prob old = *Pold;
+                            const vp8_prob oldp = *Pold;
                             const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t];
-                            const int old_b = vp8_cost_branch(ct, old);
-                            const int new_b = vp8_cost_branch(ct, newp);
-                            const int update_b = 8 + vp8_cost_upd;
-                            const int s = old_b - new_b - update_b;
+#if CONFIG_NEWUPDATE && defined(SEARCH_NEWP)
+                            const int s = prob_update_savings_search(ct, oldp, &newp, upd);
+                            const int u = s > 0 && newp != oldp ? 1 : 0;
+#else
+                            const int s = prob_update_savings(ct, oldp, newp, upd);
                             const int u = s > 0 ? 1 : 0;
+#endif
                             vp8_write(w, u, upd);
 #ifdef ENTROPY_STATS
                             ++ tree_update_hist_8x8 [i][j][k][t] [u];
@@ -1683,13 +2201,18 @@
                             if (u)
                             {
                                 /* send/use new probability */
-                                *Pold = newp;
+#if CONFIG_NEWUPDATE
+                                vp8_encode_term_subexp(
+                                    w, remap_prob(newp, oldp), SUBEXP_PARAM, 255);
+#else
                                 vp8_write_literal(w, newp, 8);
+#endif
+                                *Pold = newp;
                             }
                         }
                         while (++t < MAX_ENTROPY_TOKENS - 1);
                         // Accum token counts for generation of default statistics
-#ifdef ENTROPY_STATS
+#if 0//def ENTROPY_STATS
                         t = 0;
                         do
                         {
@@ -1706,6 +2229,7 @@
         }
     }
 }
+
 #ifdef PACKET_TESTING
 FILE *vpxlogc = 0;
 #endif
@@ -1802,6 +2326,10 @@
     Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256;
 #endif
 
+#if CONFIG_NEWUPDATE
+    compute_update_table();
+#endif
+
     //vp8_kf_default_bmode_probs() is called in vp8_setup_key_frame() once for each
     //K frame before encode frame. pc->kf_bmode_prob doesn't get changed anywhere
     //else. No need to call it again here. --yw
@@ -2165,7 +2693,13 @@
 
     vp8_clear_system_state();  //__asm emms;
 
+#if COEFUPDATETYPE == 2
+    update_coef_probs2(cpi);
+#elif COEFUPDATETYPE == 3
+    update_coef_probs3(cpi);
+#else
     update_coef_probs(cpi);
+#endif
 
 #ifdef ENTROPY_STATS
     active_section = 2;
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -68,3 +68,99 @@
         vp8_encode_bool(br, (1 & (data >> bit)), 0x80);
 
 }
+
+#if CONFIG_NEWUPDATE
+int recenter_nonneg(int v, int m)
+{
+    if (v > (m<<1)) return v;
+    else if (v >= m) return ((v-m)<<1);
+    else return ((m-v)<<1)-1;
+}
+
+static int get_unsigned_bits(unsigned num_values)
+{
+    int cat=0;
+    if ((num_values--)<=1) return 0;
+    while (num_values>0)
+    {
+        cat++;
+        num_values>>=1;
+    }
+    return cat;
+}
+
+void vp8_encode_uniform(BOOL_CODER *br, int v, int n)
+{
+    int l = get_unsigned_bits(n);
+    if (l == 0) return;
+    int m = (1<<l)-n;
+    if (v<m)
+        vp8_encode_value(br, v, l-1);
+    else
+    {
+        vp8_encode_value(br, m+((v-m)>>1), l-1);
+        vp8_encode_value(br, (v-m)&1, 1);
+    }
+}
+
+int vp8_count_uniform(int v, int n)
+{
+    int l = get_unsigned_bits(n);
+    if (l == 0) return 0;
+    int m = (1<<l)-n;
+    if (v<m)
+        return l-1;
+    else
+        return l;
+}
+
+void vp8_encode_term_subexp(BOOL_CODER *br, int word, int k, int num_syms)
+{
+    int i = 0;
+    int mk = 0;
+    while (1) {
+        int b = (i?k+i-1:k);
+        int a = (1<<b);
+        if (num_syms<=mk+3*a) {
+            vp8_encode_uniform(br, word-mk, num_syms-mk);
+            break;
+        } else {
+            int t = (word>=mk+a);
+            vp8_encode_value(br, t, 1);
+            if (t) {
+                i=i+1;
+                mk+=a;
+            } else {
+                vp8_encode_value(br, word-mk, b);
+                break;
+            }
+        }
+    }
+}
+
+int vp8_count_term_subexp(int word, int k, int num_syms)
+{
+    int count = 0;
+    int i = 0;
+    int mk = 0;
+    while (1) {
+        int b = (i?k+i-1:k);
+        int a = (1<<b);
+        if (num_syms<=mk+3*a) {
+            count += vp8_count_uniform(num_syms-mk, word-mk);
+            break;
+        } else {
+            int t = (word>=mk+a);
+            count++;
+            if (t) {
+                i=i+1;
+                mk+=a;
+            } else {
+                count += b;
+                break;
+            }
+        }
+    }
+    return count;
+}
+#endif
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -41,6 +41,13 @@
 extern void vp8_stop_encode(BOOL_CODER *bc);
 extern const unsigned int vp8_prob_cost[256];
 
+#if CONFIG_NEWENTROPY
+extern void vp8_encode_uniform(BOOL_CODER *bc, int v, int n);
+extern void vp8_encode_term_subexp(BOOL_CODER *bc, int v, int k, int n);
+extern int vp8_count_uniform(int v, int n);
+extern int vp8_count_term_subexp(int v, int k, int n);
+extern int recenter_nonneg(int v, int m);
+#endif
 
 DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
 
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -50,6 +50,14 @@
             + (ct[1] * vp8_cost_one(p))) >> 8;
 }
 
+static __inline unsigned int vp8_cost_branch256(const unsigned int ct[2], vp8_prob p)
+{
+    /* Imitate existing calculation */
+
+    return ((ct[0] * vp8_cost_zero(p))
+            + (ct[1] * vp8_cost_one(p)));
+}
+
 /* Small functions to write explicit values and tokens, as well as
    estimate their lengths. */