shithub: libvpx

Download patch

ref: 09f9c5d7f90bd19dfc1994926a6e35680ba9545c
parent: fdc977afc6b431c7577e70d151f89ea726bcaf8f
author: Alex Converse <aconverse@google.com>
date: Wed Feb 17 08:39:44 EST 2016

Better workaround for Bug 1089.

Don't initialize first pass costs for a number of symbols where first
pass probabilities aren't initialized.

This brings a 1.22x first pass speedup.

https://bugs.chromium.org/p/webm/issues/detail?id=1089

Change-Id: I97438c357bd88f52f5a15c697031cf0c3cc8f510

--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -728,10 +728,8 @@
 };
 
 static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
-  // TODO(aconverse): model[PIVOT_NODE] should never be zero.
-  // https://code.google.com/p/webm/issues/detail?id=1089
-  memcpy(probs, vp9_pareto8_full[p == 0 ? 254 : p - 1],
-         MODEL_NODES * sizeof(vpx_prob));
+  assert(p != 0);
+  memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
 }
 
 void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
--- a/vp9/encoder/vp9_cost.c
+++ b/vp9/encoder/vp9_cost.c
@@ -12,9 +12,8 @@
 #include "vp9/encoder/vp9_cost.h"
 
 /* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
-   Begins and ends with a bogus entry to satisfy use of prob=0 in the firstpass.
-   https://code.google.com/p/webm/issues/detail?id=1089 */
-const uint16_t vp9_prob_cost[257] = {
+   Begins with a bogus entry for simpler addressing. */
+const uint16_t vp9_prob_cost[256] = {
     4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
     2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
     1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
@@ -36,7 +35,7 @@
     125,  122,  119,  115,  112,  109,  105,  102,  99,   95,   92,   89,
     86,   82,   79,   76,   73,   70,   66,   63,   60,   57,   54,   51,
     48,   45,   42,   38,   35,   32,   29,   26,   23,   20,   18,   15,
-    12,   9,    6,    3,     3};
+    12,   9,    6,    3};
 
 static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
                  int i, int c) {
@@ -43,6 +42,7 @@
   const vpx_prob prob = probs[i / 2];
   int b;
 
+  assert(prob != 0);
   for (b = 0; b <= 1; ++b) {
     const int cc = c + vp9_cost_bit(prob, b);
     const vpx_tree_index ii = tree[i + b];
--- a/vp9/encoder/vp9_cost.h
+++ b/vp9/encoder/vp9_cost.h
@@ -18,7 +18,7 @@
 extern "C" {
 #endif
 
-extern const uint16_t vp9_prob_cost[257];
+extern const uint16_t vp9_prob_cost[256];
 
 // The factor to scale from cost in bits to cost in vp9_prob_cost units.
 #define VP9_PROB_COST_SHIFT 9
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -286,29 +286,37 @@
   set_block_thresholds(cm, rd);
   set_partition_probs(cm, xd);
 
-  if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
-    fill_token_costs(x->token_costs, cm->fc->coef_probs);
+  if (cpi->oxcf.pass == 1) {
+    if (!frame_is_intra_only(cm))
+      vp9_build_nmv_cost_table(
+          x->nmvjointcost,
+          cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
+          &cm->fc->nmvc, cm->allow_high_precision_mv);
+  } else {
+    if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
+      fill_token_costs(x->token_costs, cm->fc->coef_probs);
 
-  if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
-      cm->frame_type == KEY_FRAME) {
-    for (i = 0; i < PARTITION_CONTEXTS; ++i)
-      vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
-                      vp9_partition_tree);
-  }
+    if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
+        cm->frame_type == KEY_FRAME) {
+      for (i = 0; i < PARTITION_CONTEXTS; ++i)
+        vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
+                        vp9_partition_tree);
+    }
 
-  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
-      cm->frame_type == KEY_FRAME) {
-    fill_mode_costs(cpi);
+    if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+        cm->frame_type == KEY_FRAME) {
+      fill_mode_costs(cpi);
 
-    if (!frame_is_intra_only(cm)) {
-      vp9_build_nmv_cost_table(x->nmvjointcost,
-                               cm->allow_high_precision_mv ? x->nmvcost_hp
-                                                           : x->nmvcost,
-                               &cm->fc->nmvc, cm->allow_high_precision_mv);
+      if (!frame_is_intra_only(cm)) {
+        vp9_build_nmv_cost_table(
+            x->nmvjointcost,
+            cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
+            &cm->fc->nmvc, cm->allow_high_precision_mv);
 
-      for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-        vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
-                        cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
+        for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+          vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
+                          cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
+      }
     }
   }
 }