shithub: libvpx

--- a/vp8/encoder/arm/mcomp_arm.c

+++ /dev/null

@@ -1,615 +1,0 @@

-/*

- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "mcomp.h"

-#include "vpx_mem/vpx_mem.h"

-#include <stdio.h>

-#include <limits.h>

-#include <math.h>

-#ifdef ENTROPY_STATS

-static int mv_ref_ct [31] [4] [2];

-static int mv_mode_cts [4] [2];

-#endif

-extern unsigned int vp8_sub_pixel_variance16x16s_neon

-(

-    unsigned char  *src_ptr,

-    int  src_pixels_per_line,

-    int  xoffset,

-    int  yoffset,

-    unsigned char *dst_ptr,

-    int dst_pixels_per_line,

-    unsigned int *sse

-);

-extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon

-(

-    unsigned char  *src_ptr,

-    int  src_pixels_per_line,

-    unsigned char *dst_ptr,

-    int dst_pixels_per_line,

-    unsigned int *sse

-);

-extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon

-(

-    unsigned char  *src_ptr,

-    int  src_pixels_per_line,

-    unsigned char *dst_ptr,

-    int dst_pixels_per_line,

-    unsigned int *sse

-);

-extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon

-(

-    unsigned char  *src_ptr,

-    int  src_pixels_per_line,

-    unsigned char *dst_ptr,

-    int dst_pixels_per_line,

-    unsigned int *sse

-);

-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])

-{

-    int bestmse = INT_MAX;

-    MV startmv;

-    //MV this_mv;

-    MV this_mv;

-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;

-    unsigned char *z = (*(b->base_src) + b->src);

-    int left, right, up, down, diag;

-    unsigned int sse;

-    int whichdir ;

-    // Trap uncodable vectors

-    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))

-    {

-        bestmv->row <<= 3;

-        bestmv->col <<= 3;

-        return INT_MAX;

-    }

-    // central mv

-    bestmv->row <<= 3;

-    bestmv->col <<= 3;

-    startmv = *bestmv;

-    // calculate central point error

-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);

-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

-    // go left then right and check error

-    this_mv.row = startmv.row;

-    this_mv.col = ((startmv.col - 8) | 4);

-    left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);

-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (left < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = left;

-    }

-    this_mv.col += 8;

-    right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);

-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (right < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = right;

-    }

-    // go up then down and check error

-    this_mv.col = startmv.col;

-    this_mv.row = ((startmv.row - 8) | 4);

-    up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (up < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = up;

-    }

-    this_mv.row += 8;

-    down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);

-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (down < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = down;

-    }

-    // now check 1 more diagonal

-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

-    //for(whichdir =0;whichdir<4;whichdir++)

-    //{

-    this_mv = startmv;

-    switch (whichdir)

-    {

-    case 0:

-        this_mv.col = (this_mv.col - 8) | 4;

-        this_mv.row = (this_mv.row - 8) | 4;

-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-        break;

-    case 1:

-        this_mv.col += 4;

-        this_mv.row = (this_mv.row - 8) | 4;

-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-        break;

-    case 2:

-        this_mv.col = (this_mv.col - 8) | 4;

-        this_mv.row += 4;

-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);

-        break;

-    case 3:

-        this_mv.col += 4;

-        this_mv.row += 4;

-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);

-        break;

-    }

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-//  }

-    // time to check quarter pels.

-    if (bestmv->row < startmv.row)

-        y -= d->pre_stride;

-    if (bestmv->col < startmv.col)

-        y--;

-    startmv = *bestmv;

-    // go left then right and check error

-    this_mv.row = startmv.row;

-    if (startmv.col & 7)

-    {

-        this_mv.col = startmv.col - 2;

-        left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-    }

-    else

-    {

-        this_mv.col = (startmv.col - 8) | 6;

-        left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);

-    }

-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (left < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = left;

-    }

-    this_mv.col += 4;

-    right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (right < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = right;

-    }

-    // go up then down and check error

-    this_mv.col = startmv.col;

-    if (startmv.row & 7)

-    {

-        this_mv.row = startmv.row - 2;

-        up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-    }

-    else

-    {

-        this_mv.row = (startmv.row - 8) | 6;

-        up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

-    }

-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (up < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = up;

-    }

-    this_mv.row += 4;

-    down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (down < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = down;

-    }

-    // now check 1 more diagonal

-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

-//  for(whichdir=0;whichdir<4;whichdir++)

-//  {

-    this_mv = startmv;

-    switch (whichdir)

-    {

-    case 0:

-        if (startmv.row & 7)

-        {

-            this_mv.row -= 2;

-            if (startmv.col & 7)

-            {

-                this_mv.col -= 2;

-                diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-            }

-            else

-            {

-                this_mv.col = (startmv.col - 8) | 6;

-                diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

-            }

-        }

-        else

-        {

-            this_mv.row = (startmv.row - 8) | 6;

-            if (startmv.col & 7)

-            {

-                this_mv.col -= 2;

-                diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

-            }

-            else

-            {

-                this_mv.col = (startmv.col - 8) | 6;

-                diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);

-            }

-        }

-        break;

-    case 1:

-        this_mv.col += 2;

-        if (startmv.row & 7)

-        {

-            this_mv.row -= 2;

-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-        }

-        else

-        {

-            this_mv.row = (startmv.row - 8) | 6;

-            diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

-        }

-        break;

-    case 2:

-        this_mv.row += 2;

-        if (startmv.col & 7)

-        {

-            this_mv.col -= 2;

-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-        }

-        else

-        {

-            this_mv.col = (startmv.col - 8) | 6;

-            diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

-        }

-        break;

-    case 3:

-        this_mv.col += 2;

-        this_mv.row += 2;

-        diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-        break;

-    }

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-//  }

-    return bestmse;

-}

-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])

-{

-    int bestmse = INT_MAX;

-    MV startmv;

-    //MV this_mv;

-    MV this_mv;

-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;

-    unsigned char *z = (*(b->base_src) + b->src);

-    int left, right, up, down, diag;

-    unsigned int sse;

-    // Trap uncodable vectors

-    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))

-    {

-        bestmv->row <<= 3;

-        bestmv->col <<= 3;

-        return INT_MAX;

-    }

-    // central mv

-    bestmv->row <<= 3;

-    bestmv->col <<= 3;

-    startmv = *bestmv;

-    // calculate central point error

-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);

-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

-    // go left then right and check error

-    this_mv.row = startmv.row;

-    this_mv.col = ((startmv.col - 8) | 4);

-    left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);

-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (left < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = left;

-    }

-    this_mv.col += 8;

-    right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);

-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (right < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = right;

-    }

-    // go up then down and check error

-    this_mv.col = startmv.col;

-    this_mv.row = ((startmv.row - 8) | 4);

-    up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (up < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = up;

-    }

-    this_mv.row += 8;

-    down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);

-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (down < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = down;

-    }

-    // somewhat strangely not doing all the diagonals for half pel is slower than doing them.

-#if 0

-    // now check 1 more diagonal -

-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

-    this_mv = startmv;

-    switch (whichdir)

-    {

-    case 0:

-        this_mv.col = (this_mv.col - 8) | 4;

-        this_mv.row = (this_mv.row - 8) | 4;

-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

-        break;

-    case 1:

-        this_mv.col += 4;

-        this_mv.row = (this_mv.row - 8) | 4;

-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

-        break;

-    case 2:

-        this_mv.col = (this_mv.col - 8) | 4;

-        this_mv.row += 4;

-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);

-        break;

-    case 3:

-        this_mv.col += 4;

-        this_mv.row += 4;

-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);

-        break;

-    }

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-#else

-    this_mv.col = (this_mv.col - 8) | 4;

-    this_mv.row = (this_mv.row - 8) | 4;

-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-    this_mv.col += 8;

-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-    this_mv.col = (this_mv.col - 8) | 4;

-    this_mv.row = startmv.row + 4;

-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-    this_mv.col += 8;

-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

-    if (diag < bestmse)

-    {

-        *bestmv = this_mv;

-        bestmse = diag;

-    }

-#endif

-    return bestmse;

-}

-#ifdef ENTROPY_STATS

-void print_mode_context(void)

-{

-    FILE *f = fopen("modecont.c", "w");

-    int i, j;

-    fprintf(f, "#include \"entropy.h\"\n");

-    fprintf(f, "const int vp8_mode_contexts[6][4] =\n");

-    fprintf(f, "{\n");

-    for (j = 0; j < 6; j++)

-    {

-        fprintf(f, "  { // %d \n", j);

-        fprintf(f, "    ");

-        for (i = 0; i < 4; i++)

-        {

-            int overal_prob;

-            int this_prob;

-            int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];

-            // Overall probs

-            count = mv_mode_cts[i][0] + mv_mode_cts[i][1];

-            if (count)

-                overal_prob = 256 * mv_mode_cts[i][0] / count;

-            else

-                overal_prob = 128;

-            if (overal_prob == 0)

-                overal_prob = 1;

-            // context probs

-            count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];

-            if (count)

-                this_prob = 256 * mv_ref_ct[j][i][0] / count;

-            else

-                this_prob = 128;

-            if (this_prob == 0)

-                this_prob = 1;

-            fprintf(f, "%5d, ", this_prob);

-            //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);

-            //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);

-        }

-        fprintf(f, "  },\n");

-    }

-    fprintf(f, "};\n");

-    fclose(f);

-}

-/* MV ref count ENTROPY_STATS stats code */

-#ifdef ENTROPY_STATS

-void init_mv_ref_counts()

-{

-    vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));

-    vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));

-}

-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])

-{

-    if (m == ZEROMV)

-    {

-        ++mv_ref_ct [ct[0]] [0] [0];

-        ++mv_mode_cts[0][0];

-    }

-    else

-    {

-        ++mv_ref_ct [ct[0]] [0] [1];

-        ++mv_mode_cts[0][1];

-        if (m == NEARESTMV)

-        {

-            ++mv_ref_ct [ct[1]] [1] [0];

-            ++mv_mode_cts[1][0];

-        }

-        else

-        {

-            ++mv_ref_ct [ct[1]] [1] [1];

-            ++mv_mode_cts[1][1];

-            if (m == NEARMV)

-            {

-                ++mv_ref_ct [ct[2]] [2] [0];

-                ++mv_mode_cts[2][0];

-            }

-            else

-            {

-                ++mv_ref_ct [ct[2]] [2] [1];

-                ++mv_mode_cts[2][1];

-                if (m == NEWMV)

-                {

-                    ++mv_ref_ct [ct[3]] [3] [0];

-                    ++mv_mode_cts[3][0];

-                }

-                else

-                {

-                    ++mv_ref_ct [ct[3]] [3] [1];

-                    ++mv_mode_cts[3][1];

-                }

-            }

-        }

-    }

-}

-#endif/* END MV ref count ENTROPY_STATS stats code */

-#endif

--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm

+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm

@@ -9,9 +9,9 @@

-    EXPORT  |vp8_sub_pixel_variance16x16s_4_0_neon|

-    EXPORT  |vp8_sub_pixel_variance16x16s_0_4_neon|

-    EXPORT  |vp8_sub_pixel_variance16x16s_4_4_neon|

+    EXPORT  |vp8_variance_halfpixvar16x16_h_neon|

+    EXPORT  |vp8_variance_halfpixvar16x16_v_neon|

+    EXPORT  |vp8_variance_halfpixvar16x16_hv_neon|

     EXPORT  |vp8_sub_pixel_variance16x16s_neon|

ARM

     REQUIRE8

@@ -20,7 +20,7 @@

     AREA ||.text||, CODE, READONLY, ALIGN=2

 ;================================================

-;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon

+;unsigned int vp8_variance_halfpixvar16x16_h_neon

;(

 ;    unsigned char  *src_ptr, r0

 ;    int  src_pixels_per_line,  r1

@@ -29,7 +29,7 @@

 ;    unsigned int *sse

;);

 ;================================================

-|vp8_sub_pixel_variance16x16s_4_0_neon| PROC

+|vp8_variance_halfpixvar16x16_h_neon| PROC

     push            {lr}

     mov             r12, #4                  ;loop counter

@@ -120,7 +120,7 @@

     ENDP

 ;================================================

-;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon

+;unsigned int vp8_variance_halfpixvar16x16_v_neon

;(

 ;    unsigned char  *src_ptr, r0

 ;    int  src_pixels_per_line,  r1

@@ -129,7 +129,7 @@

 ;    unsigned int *sse

;);

 ;================================================

-|vp8_sub_pixel_variance16x16s_0_4_neon| PROC

+|vp8_variance_halfpixvar16x16_v_neon| PROC

     push            {lr}

     mov             r12, #4                     ;loop counter

@@ -216,7 +216,7 @@

     ENDP

 ;================================================

-;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon

+;unsigned int vp8_variance_halfpixvar16x16_hv_neon

;(

 ;    unsigned char  *src_ptr, r0

 ;    int  src_pixels_per_line,  r1

@@ -225,7 +225,7 @@

 ;    unsigned int *sse

;);

 ;================================================

-|vp8_sub_pixel_variance16x16s_4_4_neon| PROC

+|vp8_variance_halfpixvar16x16_hv_neon| PROC

     push            {lr}

     vld1.u8         {d0, d1, d2, d3}, [r0], r1      ;load src data

--- a/vp8/encoder/arm/variance_arm.h

+++ b/vp8/encoder/arm/variance_arm.h

@@ -30,6 +30,9 @@

 //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);

 //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);

 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);

+extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);

+extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);

+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);

 //extern prototype_getmbss(vp8_get_mb_ss_c);

 extern prototype_variance(vp8_mse16x16_neon);

@@ -83,6 +86,15 @@

 #undef  vp8_variance_subpixvar16x16

 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon

+#undef  vp8_variance_halfpixvar16x16_h

+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon

+#undef  vp8_variance_halfpixvar16x16_v

+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon

+#undef  vp8_variance_halfpixvar16x16_hv

+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon

 //#undef  vp8_variance_getmbss

 //#define vp8_variance_getmbss vp8_get_mb_ss_c

--- a/vp8/encoder/firstpass.c

+++ b/vp8/encoder/firstpass.c

@@ -462,12 +462,11 @@

     int step_param = 3;                                       //3;          // Dont search over full range for first pass

     int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;

     int n;

-    vp8_variance_fn_ptr_t v_fn_ptr;

+    vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];

     int new_mv_mode_penalty = 256;

+    // override the default variance function to use MSE

     v_fn_ptr.vf    = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);

-    v_fn_ptr.sdf   = cpi->fn_ptr.sdf;

-    v_fn_ptr.sdx4df = cpi->fn_ptr.sdx4df;

     // Set up pointers for this macro block recon buffer

     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;

--- a/vp8/encoder/mcomp.c

+++ b/vp8/encoder/mcomp.c

@@ -186,7 +186,7 @@

 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)

 #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector

 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc

-#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.

+#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.

 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;

 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost

 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best

@@ -195,7 +195,7 @@

 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }

-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])

+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;

     unsigned char *z = (*(b->base_src) + b->src);

@@ -220,7 +220,7 @@

     bestmv->col <<= 3;

     // calculate central point error

-    besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);

+    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);

     besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)

@@ -309,7 +309,7 @@

 #undef CHECK_BETTER

 #undef MIN

 #undef MAX

-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])

+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

     int bestmse = INT_MAX;

     MV startmv;

@@ -336,13 +336,13 @@

     startmv = *bestmv;

     // calculate central point error

-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);

+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);

     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

     // go left then right and check error

     this_mv.row = startmv.row;

     this_mv.col = ((startmv.col - 8) | 4);

-    left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);

+    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);

     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (left < bestmse)

@@ -352,7 +352,7 @@

     this_mv.col += 8;

-    right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);

+    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);

     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (right < bestmse)

@@ -364,7 +364,7 @@

     // go up then down and check error

     this_mv.col = startmv.col;

     this_mv.row = ((startmv.row - 8) | 4);

-    up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);

+    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (up < bestmse)

@@ -374,7 +374,7 @@

     this_mv.row += 8;

-    down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);

+    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);

     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (down < bestmse)

@@ -386,10 +386,6 @@

     // now check 1 more diagonal

     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);

-    // whichdir must be 0-4. Therefore, one of the cases below

-    // must run through. However, because there is no default

-    // and diag is not set elsewhere, we get a compile warning

-    diag = 0;

     //for(whichdir =0;whichdir<4;whichdir++)

//{

     this_mv = startmv;

@@ -399,22 +395,22 @@

     case 0:

         this_mv.col = (this_mv.col - 8) | 4;

         this_mv.row = (this_mv.row - 8) | 4;

-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

         break;

     case 1:

         this_mv.col += 4;

         this_mv.row = (this_mv.row - 8) | 4;

-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

         break;

     case 2:

         this_mv.col = (this_mv.col - 8) | 4;

         this_mv.row += 4;

-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);

         break;

     case 3:

         this_mv.col += 4;

         this_mv.row += 4;

-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);

         break;

@@ -446,12 +442,12 @@

     if (startmv.col & 7)

         this_mv.col = startmv.col - 2;

-        left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+        left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

     else

         this_mv.col = (startmv.col - 8) | 6;

-        left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);

+        left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);

     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

@@ -463,7 +459,7 @@

     this_mv.col += 4;

-    right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+    right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (right < bestmse)

@@ -478,12 +474,12 @@

     if (startmv.row & 7)

         this_mv.row = startmv.row - 2;

-        up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+        up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

     else

         this_mv.row = (startmv.row - 8) | 6;

-        up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

+        up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

@@ -495,7 +491,7 @@

     this_mv.row += 4;

-    down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+    down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (down < bestmse)

@@ -523,12 +519,12 @@

             if (startmv.col & 7)

                 this_mv.col -= 2;

-                diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+                diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

             else

                 this_mv.col = (startmv.col - 8) | 6;

-                diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

+                diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

         else

@@ -538,12 +534,12 @@

             if (startmv.col & 7)

                 this_mv.col -= 2;

-                diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

+                diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

             else

                 this_mv.col = (startmv.col - 8) | 6;

-                diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);

+                diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);

@@ -554,12 +550,12 @@

         if (startmv.row & 7)

             this_mv.row -= 2;

-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

         else

             this_mv.row = (startmv.row - 8) | 6;

-            diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

+            diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

         break;

@@ -569,12 +565,12 @@

         if (startmv.col & 7)

             this_mv.col -= 2;

-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

         else

             this_mv.col = (startmv.col - 8) | 6;

-            diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

+            diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

         break;

@@ -581,7 +577,7 @@

     case 3:

         this_mv.col += 2;

         this_mv.row += 2;

-        diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+        diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

         break;

@@ -598,7 +594,7 @@

     return bestmse;

-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])

+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

     int bestmse = INT_MAX;

     MV startmv;

@@ -623,13 +619,13 @@

     startmv = *bestmv;

     // calculate central point error

-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);

+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);

     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

     // go left then right and check error

     this_mv.row = startmv.row;

     this_mv.col = ((startmv.col - 8) | 4);

-    left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);

+    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);

     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (left < bestmse)

@@ -639,7 +635,7 @@

     this_mv.col += 8;

-    right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);

+    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);

     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (right < bestmse)

@@ -651,7 +647,7 @@

     // go up then down and check error

     this_mv.col = startmv.col;

     this_mv.row = ((startmv.row - 8) | 4);

-    up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);

+    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (up < bestmse)

@@ -661,7 +657,7 @@

     this_mv.row += 8;

-    down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);

+    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);

     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (down < bestmse)

@@ -681,22 +677,22 @@

     case 0:

         this_mv.col = (this_mv.col - 8) | 4;

         this_mv.row = (this_mv.row - 8) | 4;

-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

         break;

     case 1:

         this_mv.col += 4;

         this_mv.row = (this_mv.row - 8) | 4;

-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

         break;

     case 2:

         this_mv.col = (this_mv.col - 8) | 4;

         this_mv.row += 4;

-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);

         break;

     case 3:

         this_mv.col += 4;

         this_mv.row += 4;

-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+        diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);

         break;

@@ -711,7 +707,7 @@

 #else

     this_mv.col = (this_mv.col - 8) | 4;

     this_mv.row = (this_mv.row - 8) | 4;

-    diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+    diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

@@ -721,7 +717,7 @@

     this_mv.col += 8;

-    diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+    diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

@@ -732,7 +728,7 @@

     this_mv.col = (this_mv.col - 8) | 4;

     this_mv.row = startmv.row + 4;

-    diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+    diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);

     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

@@ -742,7 +738,7 @@

     this_mv.col += 8;

-    diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);

+    diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);

     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

@@ -758,7 +754,7 @@

 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)

 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector

-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.

+#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.

 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost

 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best

 static const MV next_chkpts[6][3] =

@@ -780,8 +776,7 @@

     int search_param,

     int error_per_bit,

     int *num00,

-    vp8_variance_fn_t vf,

-    vp8_sad_fn_t      sf,

+    const vp8_variance_fn_ptr_t *vfp,

     int *mvsadcost[2],

     int *mvcost[2]

@@ -896,7 +891,7 @@

     best_mv->row = br;

     best_mv->col = bc;

-    return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;

+    return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;

 #undef MVC

 #undef PRE

--- a/vp8/encoder/mcomp.h

+++ b/vp8/encoder/mcomp.h

@@ -42,14 +42,15 @@

     int search_param,

     int error_per_bit,

     int *num00,

-    vp8_variance_fn_t vf,

-    vp8_sad_fn_t sf,

+    const vp8_variance_fn_ptr_t *vf,

     int *mvsadcost[2],

     int *mvcost[2]

);

-typedef int (fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]);

+typedef int (fractional_mv_step_fp)

+    (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,

+     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]);

 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;

 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;

 extern fractional_mv_step_fp vp8_find_best_half_pixel_step;

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -2334,11 +2334,50 @@

     vp8cx_create_encoder_threads(cpi);

-    cpi->fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);

-    cpi->fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);

-    cpi->fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);

-    cpi->fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);

-    cpi->fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);

+    cpi->fn_ptr[BLOCK_16X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);

+    cpi->fn_ptr[BLOCK_16X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);

+    cpi->fn_ptr[BLOCK_16X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);

+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);

+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);

+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);

+    cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);

+    cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);

+    cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);

+    cpi->fn_ptr[BLOCK_16X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);

+    cpi->fn_ptr[BLOCK_16X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);

+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;

+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;

+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;

+    cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);

+    cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);

+    cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);

+    cpi->fn_ptr[BLOCK_8X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);

+    cpi->fn_ptr[BLOCK_8X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);

+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;

+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;

+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;

+    cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);

+    cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);

+    cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);

+    cpi->fn_ptr[BLOCK_8X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);

+    cpi->fn_ptr[BLOCK_8X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);

+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;

+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;

+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;

+    cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);

+    cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);

+    cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);

+    cpi->fn_ptr[BLOCK_4X4].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);

+    cpi->fn_ptr[BLOCK_4X4].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);

+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;

+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;

+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;

+    cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);

+    cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);

 #if !(CONFIG_REALTIME_ONLY)

     cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -229,6 +229,16 @@

     vp8_search_rtcd_vtable_t    search;

 } VP8_ENCODER_RTCD;

+enum

+{

+    BLOCK_16X8,

+    BLOCK_8X16,

+    BLOCK_8X8,

+    BLOCK_4X4,

+    BLOCK_16X16,

+    BLOCK_MAX_SEGMENTS

+};

 typedef struct

@@ -591,7 +601,7 @@

     fractional_mv_step_fp *find_fractional_mv_step;

     vp8_full_search_fn_t full_search_sad;

     vp8_diamond_search_fn_t diamond_search_sad;

-    vp8_variance_fn_ptr_t fn_ptr;

+    vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];

     unsigned int time_receive_data;

     unsigned int time_compress_data;

     unsigned int time_pick_lpf;

--- a/vp8/encoder/pickinter.c

+++ b/vp8/encoder/pickinter.c

@@ -50,14 +50,13 @@

 extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);

-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])

+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

     (void) b;

     (void) d;

     (void) ref_mv;

     (void) error_per_bit;

-    (void) svf;

-    (void) vf;

+    (void) vfp;

     (void) mvcost;

     bestmv->row <<= 3;

     bestmv->col <<= 3;

@@ -65,7 +64,7 @@

-static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, unsigned int *sse)

+static int get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse)

     BLOCK *b = &mb->block[0];

@@ -81,11 +80,11 @@

     if (xoffset | yoffset)

-        return svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);

+        return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);

     else

-        return vf(what, what_stride, in_what, in_what_stride, sse);

+        return vfp->vf(what, what_stride, in_what, in_what_stride, sse);

@@ -719,13 +718,13 @@

             if (cpi->sf.search_method == HEX)

-                bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);

+                bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);

                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

             else

-                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9

+                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9

                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

@@ -744,7 +743,7 @@

                         num00--;

                     else

-                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9

+                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9

                         if (thissme < bestsme)

@@ -765,7 +764,7 @@

         if (bestsme < INT_MAX)

-            cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, cpi->fn_ptr.svf, cpi->fn_ptr.vf, cpi->mb.mvcost);

+            cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);

         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

@@ -795,7 +794,7 @@

             x->e_mbd.block[0].bmi.mode = this_mode;

             x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;

-            distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse));

+            distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));

             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -1130,6 +1130,8 @@

     int bsd = 0;

     int bestsegmentyrate = 0;

+    static const int segmentation_to_sseshift[4] = {3, 3, 2, 0};

     // FIX TO Rd error outrange bug PGW 9 june 2004

     B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,

                                     ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,

@@ -1151,10 +1153,10 @@

         int rate = 0;

         int sbr = 0;

         int sbd = 0;

-        int UNINITIALIZED_IS_SAFE(sseshift);

+        int sseshift;

         int segmentyrate = 0;

-        vp8_variance_fn_ptr_t v_fn_ptr;

+        vp8_variance_fn_ptr_t *v_fn_ptr;

         ENTROPY_CONTEXT_PLANES t_above, t_left;

         ENTROPY_CONTEXT *ta;

@@ -1174,42 +1176,8 @@

         br = 0;

         bd = 0;

-        switch (segmentation)

-        {

-        case 0:

-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);

-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);

-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);

-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);

-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);

-            sseshift = 3;

-            break;

-        case 1:

-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);

-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);

-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);

-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);

-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);

-            sseshift = 3;

-            break;

-        case 2:

-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);

-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);

-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);

-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);

-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);

-            sseshift = 2;

-            break;

-        case 3:

-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);

-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);

-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);

-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);

-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);

-            sseshift = 0;

-            break;

-        }

+        v_fn_ptr = &cpi->fn_ptr[segmentation];

+        sseshift = segmentation_to_sseshift[segmentation];

         labels = vp8_mbsplits[segmentation];

         label_count = vp8_count_labels(labels);

@@ -1281,10 +1249,10 @@

                         int sadpb = x->sadperbit4;

                         if (cpi->sf.search_method == HEX)

-                            bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr.vf, v_fn_ptr.sdf, x->mvsadcost, mvcost);

+                            bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);

                         else

-                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);

+                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);

                             n = num00;

                             num00 = 0;

@@ -1297,7 +1265,7 @@

                                     num00--;

                                 else

-                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);

+                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);

                                     if (thissme < bestsme)

@@ -1312,7 +1280,7 @@

                         // Should we do a full search (best quality only)

                         if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000)

-                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, &v_fn_ptr, x->mvcost, x->mvsadcost);

+                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost);

                             if (thissme < bestsme)

@@ -1330,9 +1298,9 @@

                     if (bestsme < INT_MAX)

                         if (!fullpixel)

-                            cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);

+                            cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost);

                         else

-                            vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);

+                            vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost);

@@ -1852,13 +1820,13 @@

                     if (cpi->sf.search_method == HEX)

-                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);

+                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);

                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

                     else

-                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9

+                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9

                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

@@ -1877,7 +1845,7 @@

                                 num00--;

                             else

-                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9

+                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9

                                 if (thissme < bestsme)

@@ -1914,7 +1882,7 @@

                     search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;

                         int sadpb = x->sadperbit16 >> 2;

-                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr, x->mvcost, x->mvsadcost);

+                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost);

                     // Barrier threshold to initiating full search

@@ -1939,7 +1907,7 @@

                 if (bestsme < INT_MAX)

                     // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11

-                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, cpi->fn_ptr.svf, cpi->fn_ptr.vf, x->mvcost);

+                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);

                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

--- a/vp8/encoder/temporal_filter.c

+++ b/vp8/encoder/temporal_filter.c

@@ -234,7 +234,7 @@

             &best_ref_mv1, &d->bmi.mv.as_mv,

             step_param,

             sadpb/*x->errorperbit*/,

-            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,

+            &num00, &cpi->fn_ptr[BLOCK_16X16],

             mvsadcost, mvcost);

     else

@@ -245,7 +245,7 @@

             &best_ref_mv1, &d->bmi.mv.as_mv,

             step_param,

             sadpb / 2/*x->errorperbit*/,

-            &num00, &cpi->fn_ptr,

+            &num00, &cpi->fn_ptr[BLOCK_16X16],

             mvsadcost, mvcost); //sadpb < 9

         // Further step/diamond searches as necessary

@@ -267,7 +267,7 @@

                     &best_ref_mv1, &d->bmi.mv.as_mv,

                     step_param + n,

                     sadpb / 4/*x->errorperbit*/,

-                    &num00, &cpi->fn_ptr,

+                    &num00, &cpi->fn_ptr[BLOCK_16X16],

                     mvsadcost, mvcost); //sadpb = 9

                 if (thissme < bestsme)

@@ -291,8 +291,8 @@

         bestsme = cpi->find_fractional_mv_step(x, b, d,

                     &d->bmi.mv.as_mv, &best_ref_mv1,

-                    x->errorperbit, cpi->fn_ptr.svf,

-                    cpi->fn_ptr.vf, cpi->mb.mvcost);

+                    x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],

+                    cpi->mb.mvcost);

 #endif

--- a/vp8/encoder/variance.h

+++ b/vp8/encoder/variance.h

@@ -219,6 +219,21 @@

 #endif

 extern prototype_subpixvariance(vp8_variance_subpixvar16x16);

+#ifndef vp8_variance_halfpixvar16x16_h

+#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c

+#endif

+extern prototype_variance(vp8_variance_halfpixvar16x16_h);

+#ifndef vp8_variance_halfpixvar16x16_v

+#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c

+#endif

+extern prototype_variance(vp8_variance_halfpixvar16x16_v);

+#ifndef vp8_variance_halfpixvar16x16_hv

+#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c

+#endif

+extern prototype_variance(vp8_variance_halfpixvar16x16_hv);

 #ifndef vp8_variance_subpixmse16x16

 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c

 #endif

@@ -283,6 +298,9 @@

     vp8_subpixvariance_fn_t  subpixvar8x16;

     vp8_subpixvariance_fn_t  subpixvar16x8;

     vp8_subpixvariance_fn_t  subpixvar16x16;

+    vp8_variance_fn_t        halfpixvar16x16_h;

+    vp8_variance_fn_t        halfpixvar16x16_v;

+    vp8_variance_fn_t        halfpixvar16x16_hv;

     vp8_subpixvariance_fn_t  subpixmse16x16;

     vp8_getmbss_fn_t         getmbss;

@@ -309,11 +327,14 @@

 typedef struct

-    vp8_sad_fn_t  sdf;

-    vp8_sad_multi_fn_t sdx3f;

-    vp8_sad_multi_d_fn_t sdx4df;

-    vp8_variance_fn_t vf;

+    vp8_sad_fn_t            sdf;

+    vp8_variance_fn_t       vf;

     vp8_subpixvariance_fn_t svf;

+    vp8_variance_fn_t       svf_halfpix_h;

+    vp8_variance_fn_t       svf_halfpix_v;

+    vp8_variance_fn_t       svf_halfpix_hv;

+    vp8_sad_multi_fn_t      sdx3f;

+    vp8_sad_multi_d_fn_t    sdx4df;

 } vp8_variance_fn_ptr_t;

 #if CONFIG_RUNTIME_CPU_DETECT

@@ -321,8 +342,5 @@

 #else

 #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn

 #endif

-/* TODO: Determine if this USEBILINEAR flag is necessary. */

-#define USEBILINEAR

 #endif

--- a/vp8/encoder/variance_c.c

+++ b/vp8/encoder/variance_c.c

@@ -24,7 +24,6 @@

};

-#ifdef USEBILINEAR

 const int VP8_FILTER_WEIGHT = 128;

 const int VP8_FILTER_SHIFT  =   7;

 const int vp8_bilinear_taps[8][2] =

@@ -461,6 +460,19 @@

     return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);

+unsigned int vp8_half_pixel_variance16x16_c(

+    const unsigned char *src_ptr,

+    int  source_stride,

+    const unsigned char *ref_ptr,

+    int  recon_stride,

+    unsigned int *sse)

+{

+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,

+                                         ref_ptr, recon_stride, sse);

+}

 unsigned int vp8_sub_pixel_mse16x16_c

     const unsigned char  *src_ptr,

@@ -525,4 +537,3 @@

     return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);

-#endif

--- a/vp8/vp8cx_arm.mk

+++ b/vp8/vp8cx_arm.mk

@@ -19,7 +19,6 @@

 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c

 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c

 VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c

-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c

 VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c

--

⑨