shithub: libvpx

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -65,8 +65,14 @@

   int skip_optimize;

   int q_index;

+  // The equivalent error at the current rdmult of one whole bit (not one

+  // bitcost unit).

   int errorperbit;

+  // The equivalend SAD error of one (whole) bit at the current quantizer

+  // for large blocks.

   int sadperbit16;

+  // The equivalend SAD error of one (whole) bit at the current quantizer

+  // for sub-8x8 blocks.

   int sadperbit4;

   int rddiv;

   int rdmult;

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -80,27 +80,29 @@

   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);

-static int mv_err_cost(const MV *mv, const MV *ref,

-                       const int *mvjcost, int *mvcost[2],

-                       int error_per_bit) {

+#define PIXEL_TRANSFORM_ERROR_SCALE 4

+static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,

+                       int *mvcost[2], int error_per_bit) {

   if (mvcost) {

-    const MV diff = { mv->row - ref->row,

-                      mv->col - ref->col };

-    // TODO(aconverse): See if this shift needs to be tied to

-    // VP9_PROB_COST_SHIFT.

-    return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, mvjcost, mvcost) *

-                                  error_per_bit, 13);

+    const MV diff = {mv->row - ref->row, mv->col - ref->col};

+    // This product sits at a 32-bit ceiling right now and any additional

+    // accuracy in either bit cost or error cost will cause it to overflow.

+    return ROUND_POWER_OF_TWO(

+        (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,

+        RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +

+            PIXEL_TRANSFORM_ERROR_SCALE);

   return 0;

 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,

-                          int error_per_bit) {

+                          int sad_per_bit) {

   const MV diff = { mv->row - ref->row,

                     mv->col - ref->col };

-  // TODO(aconverse): See if this shift needs to be tied to VP9_PROB_COST_SHIFT.

-  return ROUND_POWER_OF_TWO((unsigned)mv_cost(&diff, x->nmvjointsadcost,

-                                      x->nmvsadcost) * error_per_bit, 8);

+  return ROUND_POWER_OF_TWO(

+      (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *

+          sad_per_bit,

+      VP9_PROB_COST_SHIFT);

 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {

@@ -152,12 +154,13 @@

  * could reduce the area.

*/

-/* estimated cost of a motion vector (r,c) */

+/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes

+ * from the same math as in mv_err_cost(). */

 #define MVC(r, c)                                              \

     (mvcost ?                                                  \

      ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] +      \

        mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) *        \

-      error_per_bit + 4096) >> 13 : 0)

+      error_per_bit + 8192) >> 14 : 0)

 // convert motion vector component to offset for sv[a]f calc

--- a/vp9/encoder/vp9_quantize.c

+++ b/vp9/encoder/vp9_quantize.c

@@ -342,8 +342,7 @@

   x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);

   x->q_index = qindex;

-  x->errorperbit = rdmult >> 6;

-  x->errorperbit += (x->errorperbit == 0);

+  set_error_per_bit(x, rdmult);

   vp9_initialize_me_consts(cpi, x, x->q_index);

--- a/vp9/encoder/vp9_rd.c

+++ b/vp9/encoder/vp9_rd.c

@@ -41,7 +41,6 @@

 #include "vp9/encoder/vp9_tokenize.h"

 #define RD_THRESH_POW      1.25

-#define RD_MULT_EPB_RATIO  64

 // Factor to weigh the rate for switchable interp filters.

 #define SWITCHABLE_INTERP_RATE_FACTOR 1

@@ -279,8 +278,7 @@

   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).

   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);

-  x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;

-  x->errorperbit += (x->errorperbit == 0);

+  set_error_per_bit(x, rd->RDMULT);

   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&

                        cm->frame_type != KEY_FRAME) ? 0 : 1;

--- a/vp9/encoder/vp9_rd.h

+++ b/vp9/encoder/vp9_rd.h

@@ -24,6 +24,7 @@

 #endif

 #define RDDIV_BITS          7

+#define RD_EPB_SHIFT        6

 #define RDCOST(RM, DM, R, D) \

   (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))

@@ -166,6 +167,11 @@

 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,

                                       int thresh_fact) {

     return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;

+}

+static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {

+  x->errorperbit = rdmult >> RD_EPB_SHIFT;

+  x->errorperbit += (x->errorperbit == 0);

 void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x,

--- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c

+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c

@@ -47,12 +47,12 @@

 static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,

-                          int error_per_bit) {

+                          int sad_per_bit) {

   const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,

                                   mv.as_mv.col - ref->col);

   return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost,

                                               x->nmvsadcost) *

-                                              error_per_bit, 8);

+                                              sad_per_bit, VP9_PROB_COST_SHIFT);

 /*****************************************************************************

--

⑨