shithub: libvpx

--- a/vp8/encoder/block.h

+++ b/vp8/encoder/block.h

@@ -81,6 +81,7 @@

     int errthresh;

     int rddiv;

     int rdmult;

+    INT64 activity_sum;

     int mvcosts[2][MVvals+1];

     int *mvcost[2];

--- a/vp8/encoder/encodeframe.c

+++ b/vp8/encoder/encodeframe.c

@@ -375,7 +375,63 @@

+/* activity_avg must be positive, or flat regions could get a zero weight

+ *  (infinite lambda), which confounds analysis.

+ * This also avoids the need for divide by zero checks in

+ *  vp8_activity_masking().

+ */

+#define VP8_ACTIVITY_AVG_MIN (64)

+/* This is used as a reference when computing the source variance for the

+ *  purposes of activity masking.

+ * Eventually this should be replaced by custom no-reference routines,

+ *  which will be faster.

+ */

+static const unsigned char VP8_VAR_OFFS[16]=

+{

+    128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128

+};

+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)

+{

+    unsigned int act;

+    unsigned int sse;

+    int sum;

+    unsigned int a;

+    unsigned int b;

+    unsigned int d;

+    /* TODO: This could also be done over smaller areas (8x8), but that would

+     *  require extensive changes elsewhere, as lambda is assumed to be fixed

+     *  over an entire MB in most of the code.

+     * Another option is to compute four 8x8 variances, and pick a single

+     *  lambda using a non-linear combination (e.g., the smallest, or second

+     *  smallest, etc.).

+     */

+    VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,

+     x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);

+    /* This requires a full 32 bits of precision. */

+    act = (sse<<8) - sum*sum;

+    /* Drop 4 to give us some headroom to work with. */

+    act = (act + 8) >> 4;

+    /* If the region is flat, lower the activity some more. */

+    if (act < 8<<12)

+        act = act < 5<<12 ? act : 5<<12;

+    /* TODO: For non-flat regions, edge regions should receive less masking

+     *  than textured regions, but identifying edge regions quickly and

+     *  reliably enough is still a subject of experimentation.

+     * This will be most noticable near edges with a complex shape (e.g.,

+     *  text), but the 4x4 transform size should make this less of a problem

+     *  than it would be for an 8x8 transform.

+     */

+    /* Apply the masking to the RD multiplier. */

+    a = act + 4*cpi->activity_avg;

+    b = 4*act + cpi->activity_avg;

+    x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);

+    return act;

+}

 static

 void encode_mb_row(VP8_COMP *cpi,

                    VP8_COMMON *cm,

@@ -386,6 +442,7 @@

                    int *segment_counts,

                    int *totalrate)

+    INT64 activity_sum = 0;

     int i;

     int recon_yoffset, recon_uvoffset;

     int mb_col;

@@ -437,6 +494,11 @@

         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;

         xd->left_available = (mb_col != 0);

+        x->rddiv = cpi->RDDIV;

+        x->rdmult = cpi->RDMULT;

+        activity_sum += vp8_activity_masking(cpi, x);

         // Is segmentation enabled

         // MB level adjutment to quantizer

         if (xd->segmentation_enabled)

@@ -543,6 +605,7 @@

     // this is to account for the border

     xd->mode_info_context++;

     x->partition_info++;

+    x->activity_sum += activity_sum;

@@ -659,8 +722,7 @@

     vp8_setup_block_ptrs(x);

-    x->rddiv = cpi->RDDIV;

-    x->rdmult = cpi->RDMULT;

+    x->activity_sum = 0;

 #if 0

     // Experimental rd code

@@ -715,11 +777,12 @@

         else

 #if CONFIG_MULTITHREAD

+            int i;

             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);

             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))

-                int i;

                 cpi->current_mb_col_main = -1;

                 for (i = 0; i < cpi->encoding_thread_count; i++)

@@ -797,6 +860,11 @@

                 totalrate += cpi->mb_row_ei[i].totalrate;

+            for (i = 0; i < cpi->encoding_thread_count; i++)

+            {

+                x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;

+            }

 #endif

@@ -931,6 +999,14 @@

     // Keep record of the total distortion this time around for future use

     cpi->last_frame_distortion = cpi->frame_distortion;

 #endif

+    /* Update the average activity for the next frame.

+     * This is feed-forward for now; it could also be saved in two-pass, or

+     *  done during lookahead when that is eventually added.

+     */

+    cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);

+    if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)

+        cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;

 void vp8_setup_block_ptrs(MACROBLOCK *x)

--- a/vp8/encoder/encodeintra.c

+++ b/vp8/encoder/encodeintra.c

@@ -105,7 +105,7 @@

 #if !(CONFIG_REALTIME_ONLY)

 #if 1

-    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))

+    if (x->optimize)

         vp8_optimize_mby(x, rtcd);

 #endif

--- a/vp8/encoder/encodemb.c

+++ b/vp8/encoder/encodemb.c

@@ -635,7 +635,7 @@

     vp8_quantize_mb(x);

 #if !(CONFIG_REALTIME_ONLY)

-    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))

+    if (x->optimize)

         vp8_optimize_mb(x, rtcd);

 #endif

--- a/vp8/encoder/ethreading.c

+++ b/vp8/encoder/ethreading.c

@@ -61,6 +61,7 @@

                     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;

                     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;

                     volatile int *last_row_current_mb_col;

+                    INT64 activity_sum = 0;

                     if (ithread > 0)

                         last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;

@@ -111,6 +112,11 @@

                         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;

                         xd->left_available = (mb_col != 0);

+                        x->rddiv = cpi->RDDIV;

+                        x->rdmult = cpi->RDMULT;

+                        activity_sum += vp8_activity_masking(cpi, x);

                         // Is segmentation enabled

                         // MB level adjutment to quantizer

                         if (xd->segmentation_enabled)

@@ -197,6 +203,7 @@

                     // this is to account for the border

                     xd->mode_info_context++;

                     x->partition_info++;

+                    x->activity_sum += activity_sum;

                     x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;

                     x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;

@@ -240,8 +247,6 @@

     z->sadperbit16      = x->sadperbit16;

     z->sadperbit4       = x->sadperbit4;

     z->errthresh        = x->errthresh;

-    z->rddiv            = x->rddiv;

-    z->rdmult           = x->rdmult;

/*

     z->mv_col_min    = x->mv_col_min;

@@ -392,8 +397,7 @@

         vp8_setup_block_ptrs(mb);

-        mb->rddiv = cpi->RDDIV;

-        mb->rdmult = cpi->RDMULT;

+        mb->activity_sum = 0;

         mbd->left_context = &cm->left_context;

         mb->mvc = cm->fc.mvc;

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -2205,6 +2205,8 @@

     init_context_counters();

 #endif

+    /*Initialize the feed-forward activity masking.*/

+    cpi->activity_avg = 90<<12;

     cpi->frames_since_key = 8;        // Give a sensible default for the first frame.

     cpi->key_frame_frequency = cpi->oxcf.key_freq;

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -321,6 +321,7 @@

     int mvcostmultiplier;

     int subseqblockweight;

     int errthresh;

+    unsigned int activity_avg;

     int RDMULT;

     int RDDIV ;

@@ -675,6 +676,8 @@

 void vp8_encode_frame(VP8_COMP *cpi);

 void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);

+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);

 int rd_cost_intra_mb(MACROBLOCKD *x);