shithub: libvpx

--- a/vp8/encoder/arm/arm_csystemdependent.c

+++ b/vp8/encoder/arm/arm_csystemdependent.c

@@ -54,8 +54,6 @@

         /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

         /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;

-        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;

-        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;

         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/

         /*cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;

@@ -104,8 +102,6 @@

         /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

         cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;

-        /*cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;

-        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;*/

         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;

         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;

--- a/vp8/encoder/arm/variance_arm.h

+++ b/vp8/encoder/arm/variance_arm.h

@@ -84,8 +84,6 @@

 //extern prototype_getmbss(vp8_get_mb_ss_c);

 extern prototype_variance(vp8_mse16x16_neon);

 extern prototype_get16x16prederror(vp8_get16x16pred_error_neon);

-//extern prototype_variance2(vp8_get8x8var_c);

-//extern prototype_variance2(vp8_get16x16var_c);

 extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);

 #if !CONFIG_RUNTIME_CPU_DETECT

@@ -151,12 +149,6 @@

 #undef  vp8_variance_get16x16prederror

 #define vp8_variance_get16x16prederror vp8_get16x16pred_error_neon

-//#undef  vp8_variance_get8x8var

-//#define vp8_variance_get8x8var vp8_get8x8var_c

-//#undef  vp8_variance_get16x16var

-//#define vp8_variance_get16x16var vp8_get16x16var_c

 #undef  vp8_variance_get4x4sse_cs

 #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon

--- a/vp8/encoder/encodeframe.c

+++ b/vp8/encoder/encodeframe.c

@@ -84,8 +84,6 @@

     unsigned int act;

     unsigned int sse;

-    int sum;

     /* TODO: This could also be done over smaller areas (8x8), but that would

      *  require extensive changes elsewhere, as lambda is assumed to be fixed

      *  over an entire MB in most of the code.

@@ -93,14 +91,9 @@

      *  lambda using a non-linear combination (e.g., the smallest, or second

      *  smallest, etc.).

*/

-    VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,

-                    x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);

-    /* This requires a full 32 bits of precision. */

-    act = (sse<<8) - sum*sum;

-    /* Drop 4 to give us some headroom to work with. */

-    act = (act + 8) >> 4;

+    act =     VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer,

+                    x->src.y_stride, VP8_VAR_OFFS, 0, &sse);

+    act = act<<4;

     /* If the region is flat, lower the activity some more. */

     if (act < 8<<12)

--- a/vp8/encoder/generic/csystemdependent.c

+++ b/vp8/encoder/generic/csystemdependent.c

@@ -68,8 +68,6 @@

     cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;

     cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;

-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;

-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;

     cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;

     cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;

--- a/vp8/encoder/ppc/csystemdependent.c

+++ b/vp8/encoder/ppc/csystemdependent.c

@@ -49,8 +49,6 @@

 void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);

 unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);

-unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);

-unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);

 unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);

 // c imports

@@ -89,8 +87,6 @@

 extern unsigned int vp8_get_mb_ss_c(short *);

 extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);

-extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);

-extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);

 extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);

 // ppc

@@ -150,8 +146,6 @@

     vp8_get_mb_ss                 = vp8_get_mb_ss_c;

     vp8_get16x16pred_error       = vp8_get16x16pred_error_c;

-    vp8_get8x8var               = vp8_get8x8var_ppc;

-    vp8_get16x16var             = vp8_get16x16var_ppc;

     vp8_get4x4sse_cs            = vp8_get4x4sse_cs_c;

     vp8_sad16x16                = vp8_sad16x16_ppc;

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -2182,8 +2182,8 @@

             else if (x->encode_breakout)

-                int sum;

                 unsigned int sse;

+                unsigned int var;

                 int threshold = (xd->block[0].dequant[1]

                             * xd->block[0].dequant[1] >>4);

@@ -2190,21 +2190,20 @@

                 if(threshold < x->encode_breakout)

                     threshold = x->encode_breakout;

-                VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)

-                    (x->src.y_buffer, x->src.y_stride,

-                     x->e_mbd.predictor, 16, &sse, &sum);

+                var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)

+                        (x->src.y_buffer, x->src.y_stride,

+                        x->e_mbd.predictor, 16, &sse);

                 if (sse < threshold)

-                    // Check u and v to make sure skip is ok

-                    int sse2 = 0;

+                     unsigned int q2dc = xd->block[24].dequant[0];

                     /* If theres is no codeable 2nd order dc

                        or a very small uniform pixel change change */

-                    if (abs(sum) < (xd->block[24].dequant[0]<<2)||

-                        ((sum * sum>>8) > sse && abs(sum) <128))

+                    if ((sse - var < q2dc * q2dc >>4) ||

+                        (sse /2 > var && sse-var < 64))

-                        sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));

+                        // Check u and v to make sure skip is ok

+                        int sse2=  VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));

                         if (sse2 * 2 < threshold)

                             x->skip = 1;

--- a/vp8/encoder/variance.h

+++ b/vp8/encoder/variance.h

@@ -313,16 +313,6 @@

 #endif

 extern prototype_get16x16prederror(vp8_variance_get16x16prederror);

-#ifndef vp8_variance_get8x8var

-#define vp8_variance_get8x8var vp8_get8x8var_c

-#endif

-extern prototype_variance2(vp8_variance_get8x8var);

-#ifndef vp8_variance_get16x16var

-#define vp8_variance_get16x16var vp8_get16x16var_c

-#endif

-extern prototype_variance2(vp8_variance_get16x16var);

 #ifndef vp8_variance_get4x4sse_cs

 #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c

 #endif

@@ -377,8 +367,6 @@

     vp8_variance_fn_t        mse16x16;

     vp8_get16x16prederror_fn_t get16x16prederror;

-    vp8_variance2_fn_t       get8x8var;

-    vp8_variance2_fn_t       get16x16var;

     vp8_get16x16prederror_fn_t get4x4sse_cs;

     vp8_sad_multi_fn_t       sad16x16x3;

--- a/vp8/encoder/variance_c.c

+++ b/vp8/encoder/variance_c.c

@@ -61,40 +61,6 @@

-unsigned int

-vp8_get8x8var_c

-(

-    const unsigned char *src_ptr,

-    int  source_stride,

-    const unsigned char *ref_ptr,

-    int  recon_stride,

-    unsigned int *SSE,

-    int *Sum

-)

-{

-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);

-    return (*SSE - (((*Sum) * (*Sum)) >> 6));

-}

-unsigned int

-vp8_get16x16var_c

-(

-    const unsigned char *src_ptr,

-    int  source_stride,

-    const unsigned char *ref_ptr,

-    int  recon_stride,

-    unsigned int *SSE,

-    int *Sum

-)

-{

-    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);

-    return (*SSE - (((*Sum) * (*Sum)) >> 8));

-}

 unsigned int vp8_variance16x16_c(

     const unsigned char *src_ptr,

--- a/vp8/encoder/x86/variance_mmx.c

+++ b/vp8/encoder/x86/variance_mmx.c

@@ -84,36 +84,6 @@

     int ref_stride

);

-unsigned int vp8_get16x16var_mmx(

-    const unsigned char *src_ptr,

-    int  source_stride,

-    const unsigned char *ref_ptr,

-    int  recon_stride,

-    unsigned int *SSE,

-    int *SUM

-)

-{

-    unsigned int sse0, sse1, sse2, sse3, var;

-    int sum0, sum1, sum2, sum3, avg;

-    vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;

-    vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);

-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;

-    vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);

-    var = sse0 + sse1 + sse2 + sse3;

-    avg = sum0 + sum1 + sum2 + sum3;

-    *SSE = var;

-    *SUM = avg;

-    return (var - ((avg * avg) >> 8));

-}

 unsigned int vp8_variance4x4_mmx(

     const unsigned char *src_ptr,

--- a/vp8/encoder/x86/variance_x86.h

+++ b/vp8/encoder/x86/variance_x86.h

@@ -43,7 +43,6 @@

 extern prototype_variance(vp8_mse16x16_mmx);

 extern prototype_get16x16prederror(vp8_get16x16pred_error_mmx);

 extern prototype_variance2(vp8_get8x8var_mmx);

-extern prototype_variance2(vp8_get16x16var_mmx);

 extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx);

 #if !CONFIG_RUNTIME_CPU_DETECT

@@ -113,12 +112,6 @@

 #undef  vp8_variance_get16x16prederror

 #define vp8_variance_get16x16prederror vp8_get16x16pred_error_mmx

-#undef  vp8_variance_get8x8var

-#define vp8_variance_get8x8var vp8_get8x8var_mmx

-#undef  vp8_variance_get16x16var

-#define vp8_variance_get16x16var vp8_get16x16var_mmx

 #undef  vp8_variance_get4x4sse_cs

 #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_mmx

@@ -218,12 +211,6 @@

 #undef  vp8_variance_get16x16prederror

 #define vp8_variance_get16x16prederror vp8_get16x16pred_error_sse2

-#undef  vp8_variance_get8x8var

-#define vp8_variance_get8x8var vp8_get8x8var_sse2

-#undef  vp8_variance_get16x16var

-#define vp8_variance_get16x16var vp8_get16x16var_sse2

 #endif

 #endif

--- a/vp8/encoder/x86/x86_csystemdependent.c

+++ b/vp8/encoder/x86/x86_csystemdependent.c

@@ -176,8 +176,6 @@

         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx;

         cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_mmx;

-        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx;

-        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx;

         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;

         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;

@@ -227,9 +225,6 @@

         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2;

         cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_sse2;

-        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_sse2;

-        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2;

         /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */;

--

⑨