shithub: libvpx

--- a/CHANGELOG

+++ b/CHANGELOG

@@ -1,3 +1,32 @@

+2012-12-21 v1.2.0

+  This release acts as a checkpoint for a large amount of internal refactoring

+  and testing. It also contains a number of small bugfixes, so all users are

+  encouraged to upgrade.

+  - Upgrading:

+    This release is ABI and API compatible with Duclair (v1.0.0). Users

+    of older releases should refer to the Upgrading notes in this

+    document for that release.

+  - Enhancements:

+      VP8 optimizations for MIPS dspr2

+      vpxenc: add -quiet option

+  - Speed:

+      Encoder and decoder speed is consistent with the Eider release.

+  - Quality:

+      In general, quality is consistent with the Eider release.

+      Minor tweaks to ARNR filtering

+      Minor improvements to real time encoding with multiple temporal layers

+  - Bug Fixes:

+      Fixes multithreaded encoder race condition in loopfilter

+      Fixes multi-resolution threaded encoding

+      Fix potential encoder dead-lock after picture resize

 2012-05-09 v1.1.0 "Eider"

   This introduces a number of enhancements, mostly focused on real-time

   encoding. In addition, it fixes a decoder bug (first introduced in

--- a/build/make/Android.mk

+++ b/build/make/Android.mk

@@ -27,7 +27,7 @@

 # Android.mk file in the libvpx directory:

 # LOCAL_PATH := $(call my-dir)

 # include $(CLEAR_VARS)

-# include libvpx/build/make/Android.mk

+# include jni/libvpx/build/make/Android.mk

 # There are currently two TARGET_ARCH_ABI targets for ARM.

 # armeabi and armeabi-v7a.  armeabi-v7a is selected by creating an

--- a/build/make/ads2gas.pl

+++ b/build/make/ads2gas.pl

@@ -61,26 +61,26 @@

     s/:SHR:/ >> /g;

     # Convert ELSE to .else

-    s/ELSE/.else/g;

+    s/\bELSE\b/.else/g;

     # Convert ENDIF to .endif

-    s/ENDIF/.endif/g;

+    s/\bENDIF\b/.endif/g;

     # Convert ELSEIF to .elseif

-    s/ELSEIF/.elseif/g;

+    s/\bELSEIF\b/.elseif/g;

     # Convert LTORG to .ltorg

-    s/LTORG/.ltorg/g;

+    s/\bLTORG\b/.ltorg/g;

     # Convert endfunc to nothing.

-    s/endfunc//ig;

+    s/\bendfunc\b//ig;

     # Convert FUNCTION to nothing.

-    s/FUNCTION//g;

-    s/function//g;

+    s/\bFUNCTION\b//g;

+    s/\bfunction\b//g;

-    s/ENTRY//g;

-    s/MSARMASM/0/g;

+    s/\bENTRY\b//g;

+    s/\bMSARMASM\b/0/g;

     s/^\s+end\s+$//g;

     # Convert IF :DEF:to .if

@@ -149,11 +149,15 @@

     s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;

     # ALIGN directive

-    s/ALIGN/.balign/g;

+    s/\bALIGN\b/.balign/g;

     # ARM code

     s/\sARM/.arm/g;

+    # push/pop

+    s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g;

+    s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g;

     # NEON code

     s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g;

     s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g;

@@ -189,7 +193,7 @@

     s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/;

     # Begin macro definition

-    if (/MACRO/) {

+    if (/\bMACRO\b/) {

         $_ = <STDIN>;

         s/^/.macro/;

         s/\$//g;                # remove formal param reference

@@ -198,7 +202,7 @@

     # For macros, use \ to reference formal params

     s/\$/\\/g;                  # End macro definition

-    s/MEND/.endm/;              # No need to tell it where to stop assembling

+    s/\bMEND\b/.endm/;              # No need to tell it where to stop assembling

     next if /^\s*END\s*$/;

     print;

     print "$comment_sub$comment\n" if defined $comment;

--- a/build/make/configure.sh

+++ b/build/make/configure.sh

@@ -277,6 +277,7 @@

 # Toolchain Check Functions

 check_cmd() {

+    enabled external_build && return

     log "$@"

     "$@" >>${logfile} 2>&1

@@ -767,6 +768,7 @@

;;

         armv5te)

             soft_enable edsp

+            disable fast_unaligned

;;

         esac

@@ -1000,7 +1002,11 @@

         soft_enable sse2

         soft_enable sse3

         soft_enable ssse3

-        soft_enable sse4_1

+        if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then

+            RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "

+        else

+            soft_enable sse4_1

+        fi

         case  ${tgt_os} in

             win*)

@@ -1174,9 +1180,6 @@

fi

;;

     esac

-    # for sysconf(3) and friends.

-    check_header unistd.h

     # glibc needs these

     if enabled linux; then

--- a/configure

+++ b/configure

@@ -303,6 +303,7 @@

     ${EXPERIMENT_LIST}

 CMDLINE_SELECT="

+    external_build

     extra_warnings

     werror

     install_docs

@@ -502,7 +503,7 @@

fi

fi

fi

-    if [ -z "$CC" ]; then

+    if [ -z "$CC" ] || enabled external_build; then

         echo "Bypassing toolchain for environment detection."

         enable external_build

         check_header() {

@@ -511,6 +512,7 @@

             shift

             var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`

             disable $var

+            # Headers common to all environments

             case $header in

                 stdio.h)

                     true;

@@ -522,6 +524,25 @@

                     done

                     ${result:-true}

             esac && enable $var

+            # Specialize windows and POSIX environments.

+            case $toolchain in

+                *-win*-*)

+                    case $header-$toolchain in

+                        stdint*-gcc) true;;

+                        *) false;;

+                    esac && enable $var

+                    ;;

+                *)

+                    case $header in

+                        stdint.h) true;;

+                        pthread.h) true;;

+                        sys/mman.h) true;;

+                        unistd.h) true;;

+                        *) false;;

+                    esac && enable $var

+            esac

+            enabled $var

         check_ld() {

             true

@@ -535,6 +556,7 @@

     check_header stdint.h

     check_header pthread.h

     check_header sys/mman.h

+    check_header unistd.h # for sysconf(3) and friends.

     check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports

@@ -642,6 +664,10 @@

;;

         *-android-*)

             # GTestLog must be modified to use Android logging utilities.

+        ;;

+        *-darwin-*)

+            # iOS/ARM builds do not work with gtest. This does not match

+            # x86 targets.

;;

*)

             check_cxx "$@" <<EOF && soft_enable unit_tests

--- /dev/null

+++ b/examples/decode_with_partial_drops.txt

@@ -1,0 +1,238 @@

+@TEMPLATE decoder_tmpl.c

+Decode With Partial Drops Example

+=========================

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION

+This is an example utility which drops a series of frames (or parts of frames),

+as specified on the command line. This is useful for observing the error

+recovery features of the codec.

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_INCLUDES

+#include <time.h>

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_INCLUDES

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HELPERS

+struct parsed_header

+{

+    char key_frame;

+    int version;

+    char show_frame;

+    int first_part_size;

+};

+int next_packet(struct parsed_header* hdr, int pos, int length, int mtu)

+{

+    int size = 0;

+    int remaining = length - pos;

+    /* Uncompressed part is 3 bytes for P frames and 10 bytes for I frames */

+    int uncomp_part_size = (hdr->key_frame ? 10 : 3);

+    /* number of bytes yet to send from header and the first partition */

+    int remainFirst = uncomp_part_size + hdr->first_part_size - pos;

+    if (remainFirst > 0)

+    {

+        if (remainFirst <= mtu)

+        {

+            size = remainFirst;

+        }

+        else

+        {

+            size = mtu;

+        }

+        return size;

+    }

+    /* second partition; just slot it up according to MTU */

+    if (remaining <= mtu)

+    {

+        size = remaining;

+        return size;

+    }

+    return mtu;

+}

+void throw_packets(unsigned char* frame, int* size, int loss_rate,

+                   int* thrown, int* kept)

+{

+    unsigned char loss_frame[256*1024];

+    int pkg_size = 1;

+    int pos = 0;

+    int loss_pos = 0;

+    struct parsed_header hdr;

+    unsigned int tmp;

+    int mtu = 1500;

+    if (*size < 3)

+    {

+        return;

+    }

+    putc('|', stdout);

+    /* parse uncompressed 3 bytes */

+    tmp = (frame[2] << 16) | (frame[1] << 8) | frame[0];

+    hdr.key_frame = !(tmp & 0x1); /* inverse logic */

+    hdr.version = (tmp >> 1) & 0x7;

+    hdr.show_frame = (tmp >> 4) & 0x1;

+    hdr.first_part_size = (tmp >> 5) & 0x7FFFF;

+    /* don't drop key frames */

+    if (hdr.key_frame)

+    {

+        int i;

+        *kept = *size/mtu + ((*size % mtu > 0) ? 1 : 0); /* approximate */

+        for (i=0; i < *kept; i++)

+            putc('.', stdout);

+        return;

+    }

+    while ((pkg_size = next_packet(&hdr, pos, *size, mtu)) > 0)

+    {

+        int loss_event = ((rand() + 1.0)/(RAND_MAX + 1.0) < loss_rate/100.0);

+        if (*thrown == 0 && !loss_event)

+        {

+            memcpy(loss_frame + loss_pos, frame + pos, pkg_size);

+            loss_pos += pkg_size;

+            (*kept)++;

+            putc('.', stdout);

+        }

+        else

+        {

+            (*thrown)++;

+            putc('X', stdout);

+        }

+        pos += pkg_size;

+    }

+    memcpy(frame, loss_frame, loss_pos);

+    memset(frame + loss_pos, 0, *size - loss_pos);

+    *size = loss_pos;

+}

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HELPERS

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INIT

+/* Initialize codec */

+flags = VPX_CODEC_USE_ERROR_CONCEALMENT;

+res = vpx_codec_dec_init(&codec, interface, &dec_cfg, flags);

+if(res)

+    die_codec(&codec, "Failed to initialize decoder");

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INIT

+Usage

+-----

+This example adds a single argument to the `simple_decoder` example,

+which specifies the range or pattern of frames to drop. The parameter is

+parsed as follows:

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USAGE

+if(argc < 4 || argc > 6)

+    die("Usage: %s <infile> <outfile> [-t <num threads>] <N-M|N/M|L,S>\n",

+        argv[0]);

+{

+    char *nptr;

+    int arg_num = 3;

+    if (argc == 6 && strncmp(argv[arg_num++], "-t", 2) == 0)

+        dec_cfg.threads = strtol(argv[arg_num++], NULL, 0);

+    n = strtol(argv[arg_num], &nptr, 0);

+    mode = (*nptr == '\0' || *nptr == ',') ? 2 : (*nptr == '-') ? 1 : 0;

+    m = strtol(nptr+1, NULL, 0);

+    if((!n && !m) || (*nptr != '-' && *nptr != '/' &&

+        *nptr != '\0' && *nptr != ','))

+        die("Couldn't parse pattern %s\n", argv[3]);

+}

+seed = (m > 0) ? m : (unsigned int)time(NULL);

+srand(seed);thrown_frame = 0;

+printf("Seed: %u\n", seed);

+printf("Threads: %d\n", dec_cfg.threads);

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USAGE

+Dropping A Range Of Frames

+--------------------------

+To drop a range of frames, specify the starting frame and the ending

+frame to drop, separated by a dash. The following command will drop

+frames 5 through 10 (base 1).

+  $ ./decode_with_partial_drops in.ivf out.i420 5-10

+Dropping A Pattern Of Frames

+----------------------------

+To drop a pattern of frames, specify the number of frames to drop and

+the number of frames after which to repeat the pattern, separated by

+a forward-slash. The following command will drop 3 of 7 frames.

+Specifically, it will decode 4 frames, then drop 3 frames, and then

+repeat.

+  $ ./decode_with_partial_drops in.ivf out.i420 3/7

+Dropping Random Parts Of Frames

+-------------------------------

+A third argument tuple is available to split the frame into 1500 bytes pieces

+and randomly drop pieces rather than frames. The frame will be split at

+partition boundaries where possible. The following example will seed the RNG

+with the seed 123 and drop approximately 5% of the pieces. Pieces which

+are depending on an already dropped piece will also be dropped.

+  $ ./decode_with_partial_drops in.ivf out.i420 5,123

+Extra Variables

+---------------

+This example maintains the pattern passed on the command line in the

+`n`, `m`, and `is_range` variables:

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_VARS

+int              n, m, mode;

+unsigned int     seed;

+int              thrown=0, kept=0;

+int              thrown_frame=0, kept_frame=0;

+vpx_codec_dec_cfg_t  dec_cfg = {0};

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_VARS

+Making The Drop Decision

+------------------------

+The example decides whether to drop the frame based on the current

+frame number, immediately before decoding the frame.

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE

+/* Decide whether to throw parts of the frame or the whole frame

+   depending on the drop mode */

+thrown_frame = 0;

+kept_frame = 0;

+switch (mode)

+{

+case 0:

+    if (m - (frame_cnt-1)%m <= n)

+    {

+        frame_sz = 0;

+    }

+    break;

+case 1:

+    if (frame_cnt >= n && frame_cnt <= m)

+    {

+        frame_sz = 0;

+    }

+    break;

+case 2:

+    throw_packets(frame, &frame_sz, n, &thrown_frame, &kept_frame);

+    break;

+default: break;

+}

+if (mode < 2)

+{

+    if (frame_sz == 0)

+    {

+        putc('X', stdout);

+        thrown_frame++;

+    }

+    else

+    {

+        putc('.', stdout);

+        kept_frame++;

+    }

+}

+thrown += thrown_frame;

+kept += kept_frame;

+fflush(stdout);

+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE

--- a/libs.mk

+++ b/libs.mk

@@ -61,8 +61,16 @@

 CODEC_SRCS-yes += CHANGELOG

 CODEC_SRCS-yes += libs.mk

+# If this is a universal (fat) binary, then all the subarchitectures have

+# already been built and our job is to stitch them together. The

+# BUILD_LIBVPX variable indicates whether we should be building

+# (compiling, linking) the library. The LIPO_LIBVPX variable indicates

+# that we're stitching.

+$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)

 include $(SRC_PATH_BARE)/vpx/vpx_codec.mk

 CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))

+CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))

 include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk

 CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))

@@ -70,6 +78,9 @@

 include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk

 CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))

+include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk

+CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))

 ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)

   VP8_PREFIX=vp8/

   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk

@@ -79,11 +90,8 @@

   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk

   CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))

   CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))

-  CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h

-  CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk

   INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h

   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%

-  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h

   CODEC_DOC_SECTIONS += vp8 vp8_encoder

 endif

@@ -91,10 +99,8 @@

   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk

   CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))

   CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))

-  CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h

   INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h

   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%

-  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h

   CODEC_DOC_SECTIONS += vp8 vp8_decoder

 endif

@@ -155,30 +161,13 @@

 INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)

 endif

-# If this is a universal (fat) binary, then all the subarchitectures have

-# already been built and our job is to stitch them together. The

-# BUILD_LIBVPX variable indicates whether we should be building

-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates

-# that we're stitching.

-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)

 CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh

 CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h

+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h

+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h

 CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c

 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c

-ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm

 CODEC_SRCS-$(BUILD_LIBVPX) += third_party/x86inc/x86inc.asm

-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c

-endif

-CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c

-CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h

 CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com

 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc

 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec

@@ -202,8 +191,7 @@

 INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a

 endif

-CODEC_SRCS=$(filter-out %_offsets.c,\

-           $(filter-out %_test.cc,$(call enabled,CODEC_SRCS)))

+CODEC_SRCS=$(call enabled,CODEC_SRCS)

 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(CODEC_SRCS)

 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)

@@ -306,6 +294,7 @@

 define libvpx_symlink_template

 $(1): $(2)

 	@echo "    [LN]     $(2) $$@"

+	$(qexec)mkdir -p $$(dir $$@)

 	$(qexec)ln -sf $(2) $$@

 endef

@@ -314,7 +303,7 @@

     $(BUILD_PFX)$(LIBVPX_SO)))

 $(eval $(call libvpx_symlink_template,\

     $(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)),\

-    $(DIST_DIR)/$(LIBSUBDIR)/$(LIBVPX_SO)))

+    $(LIBVPX_SO)))

 INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)

@@ -375,10 +364,6 @@

 $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)

 CLEAN-OBJS += $(BUILD_PFX)vpx_version.h

-CODEC_DOC_SRCS += vpx/vpx_codec.h \

-                  vpx/vpx_decoder.h \

-                  vpx/vpx_encoder.h \

-                  vpx/vpx_image.h

##

 ## libvpx test directives

--- a/test/datarate_test.cc

+++ b/test/datarate_test.cc

@@ -59,9 +59,13 @@

     /* Test the buffer model here before subtracting the frame. Do so because

      * the way the leaky bucket model works in libvpx is to allow the buffer to

      * empty - and then stop showing frames until we've got enough bits to

-     * show one. */

-    ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "

-        << pkt->data.frame.pts;

+     * show one. As noted in comment below (issue 495), this does not currently

+     * apply to key frames. For now exclude key frames in condition below. */

+    bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true: false;

+    if (!key_frame) {

+      ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "

+          << pkt->data.frame.pts;

+    }

     const int frame_size_in_bits = pkt->data.frame.sz * 8;

@@ -125,7 +129,12 @@

   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,

                                        30, 1, 0, 140);

-  for (int i = 70; i < 700; i += 200) {

+  // There is an issue for low bitrates in real-time mode, where the

+  // effective_datarate slightly overshoots the target bitrate.

+  // This is same the issue as noted about (#495).

+  // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100),

+  // when the issue is resolved.

+  for (int i = 100; i < 800; i += 200) {

     cfg_.rc_target_bitrate = i;

     ResetModel();

     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

--- a/test/decode_test_driver.cc

+++ b/test/decode_test_driver.cc

@@ -9,6 +9,7 @@

*/

 #include "test/decode_test_driver.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/register_state_check.h"

 #include "test/video_source.h"

 namespace libvpx_test {

@@ -21,8 +22,9 @@

     ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError();

-  const vpx_codec_err_t res_dec = vpx_codec_decode(&decoder_,

-                                                   cxdata, size, NULL, 0);

+  vpx_codec_err_t res_dec;

+  REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_,

+                                                  cxdata, size, NULL, 0));

   ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError();

--- a/test/encode_test_driver.cc

+++ b/test/encode_test_driver.cc

@@ -12,6 +12,7 @@

 #if CONFIG_VP8_DECODER

 #include "test/decode_test_driver.h"

 #endif

+#include "test/register_state_check.h"

 #include "test/video_source.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

@@ -58,9 +59,10 @@

   // Encode the frame

-  res = vpx_codec_encode(&encoder_,

-                         video.img(), video.pts(), video.duration(),

-                         frame_flags, deadline_);

+  REGISTER_STATE_CHECK(

+      res = vpx_codec_encode(&encoder_,

+                             video.img(), video.pts(), video.duration(),

+                             frame_flags, deadline_));

   ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();

--- a/test/idctllm_test.cc

+++ b/test/idctllm_test.cc

@@ -13,6 +13,7 @@

 #include "vpx_config.h"

 #include "vp8_rtcd.h"

+#include "test/register_state_check.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

 typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,

@@ -54,7 +55,7 @@

     int i;

-    UUT(input, output, 16, output, 16);

+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

     for(i=0; i<256; i++)

         if((i&0xF) < 4 && i<64)

@@ -68,7 +69,7 @@

     int i;

     input[0] = 4;

-    UUT(input, output, 16, output, 16);

+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

     for(i=0; i<256; i++)

         if((i&0xF) < 4 && i<64)

@@ -85,7 +86,7 @@

         predict[i] = i;

     input[0] = 4;

-    UUT(input, predict, 16, output, 16);

+    REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));

     for(i=0; i<256; i++)

         if((i&0xF) < 4 && i<64)

@@ -101,7 +102,7 @@

     for(i=0; i<16; i++)

         input[i] = i;

-    UUT(input, output, 16, output, 16);

+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

     for(i=0; i<256; i++)

         if((i&0xF) > 3 || i>63)

--- a/test/intrapred_test.cc

+++ b/test/intrapred_test.cc

@@ -11,6 +11,7 @@

 #include <string.h>

 #include "test/acm_random.h"

+#include "test/register_state_check.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {

 #include "vpx_config.h"

@@ -246,8 +247,10 @@

   virtual void Predict(MB_PREDICTION_MODE mode) {

     mb_.mode_info_context->mbmi.mode = mode;

-    pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[0] - 1, kStride,

-             data_ptr_[0], kStride);

+    REGISTER_STATE_CHECK(pred_fn_(&mb_,

+                                  data_ptr_[0] - kStride,

+                                  data_ptr_[0] - 1, kStride,

+                                  data_ptr_[0], kStride));

   intra_pred_y_fn_t pred_fn_;

--- a/test/pp_filter_test.cc

+++ b/test/pp_filter_test.cc

@@ -7,6 +7,7 @@

  *  in the file PATENTS.  All contributing project authors may

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include "test/register_state_check.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {

 #include "vpx_config.h"

@@ -74,8 +75,8 @@

   // Initialize pixels in the output to 99.

   (void)vpx_memset(dst_image, 99, output_size);

-  GetParam()(src_image_ptr, dst_image_ptr, input_stride,

-             output_stride, block_width, flimits, 16);

+  REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride,

+                                  output_stride, block_width, flimits, 16));

   static const uint8_t expected_data[block_height] = {

     4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4

--- /dev/null

+++ b/test/register_state_check.h

@@ -1,0 +1,95 @@

+/*

+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_

+#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_

+#ifdef _WIN64

+#define _WIN32_LEAN_AND_MEAN

+#include <windows.h>

+#include <winnt.h>

+#include "third_party/googletest/src/include/gtest/gtest.h"

+namespace testing {

+namespace internal {

+inline bool operator==(const M128A& lhs, const M128A& rhs) {

+  return (lhs.Low == rhs.Low && lhs.High == rhs.High);

+}

+}  // namespace internal

+}  // namespace testing

+namespace libvpx_test {

+// Compares the state of xmm[6-15] at construction with their state at

+// destruction. These registers should be preserved by the callee on

+// Windows x64.

+// Usage:

+// {

+//   RegisterStateCheck reg_check;

+//   FunctionToVerify();

+// }

+class RegisterStateCheck {

+ public:

+  RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }

+  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }

+ private:

+  static bool StoreRegisters(CONTEXT* const context) {

+    const HANDLE this_thread = GetCurrentThread();

+    EXPECT_TRUE(this_thread != NULL);

+    context->ContextFlags = CONTEXT_FLOATING_POINT;

+    const bool context_saved = GetThreadContext(this_thread, context) == TRUE;

+    EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();

+    return context_saved;

+  }

+  // Compares the register state. Returns true if the states match.

+  bool Check() const {

+    if (!initialized_) return false;

+    CONTEXT post_context;

+    if (!StoreRegisters(&post_context)) return false;

+    const M128A* xmm_pre = &pre_context_.Xmm6;

+    const M128A* xmm_post = &post_context.Xmm6;

+    for (int i = 6; i <= 15; ++i) {

+      EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";

+      ++xmm_pre;

+      ++xmm_post;

+    }

+    return !testing::Test::HasNonfatalFailure();

+  }

+  bool initialized_;

+  CONTEXT pre_context_;

+};

+#define REGISTER_STATE_CHECK(statement) do { \

+  libvpx_test::RegisterStateCheck reg_check; \

+  statement;                               \

+} while (false)

+}  // namespace libvpx_test

+#else  // !_WIN64

+namespace libvpx_test {

+class RegisterStateCheck {};

+#define REGISTER_STATE_CHECK(statement) statement

+}  // namespace libvpx_test

+#endif  // _WIN64

+#endif  // LIBVPX_TEST_REGISTER_STATE_CHECK_H_

--- a/test/sad_test.cc

+++ b/test/sad_test.cc

@@ -21,6 +21,7 @@

 #include "test/acm_random.h"

+#include "test/register_state_check.h"

 #include "test/util.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

@@ -65,9 +66,11 @@

   sad_m_by_n_fn_t sad_fn_;

   virtual unsigned int SAD(unsigned int max_sad) {

-    return sad_fn_(source_data_, source_stride_,

-                   reference_data_, reference_stride_,

-                   max_sad);

+    unsigned int ret;

+    REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_,

+                                       reference_data_, reference_stride_,

+                                       max_sad));

+    return ret;

   // Sum of Absolute Differences. Given two blocks, calculate the absolute

--- a/test/sixtap_predict_test.cc

+++ b/test/sixtap_predict_test.cc

@@ -12,6 +12,7 @@

 #include <stdlib.h>

 #include <string.h>

 #include "test/acm_random.h"

+#include "test/register_state_check.h"

 #include "test/util.h"

 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {

@@ -136,8 +137,8 @@

   uint8_t *src = const_cast<uint8_t*>(test_data);

-  sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,

-                  2, 2, dst_, kDstStride);

+  REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,

+                                       2, 2, dst_, kDstStride));

   for (int i = 0; i < height_; ++i)

     for (int j = 0; j < width_; ++j)

@@ -162,8 +163,9 @@

                                 xoffset, yoffset, dst_c_, kDstStride);

       // Run test.

-      sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,

-                      xoffset, yoffset, dst_, kDstStride);

+      REGISTER_STATE_CHECK(

+          sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,

+                          xoffset, yoffset, dst_, kDstStride));

       for (int i = 0; i < height_; ++i)

         for (int j = 0; j < width_; ++j)

--- a/test/subtract_test.cc

+++ b/test/subtract_test.cc

@@ -10,6 +10,7 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "test/acm_random.h"

+#include "test/register_state_check.h"

 extern "C" {

 #include "vpx_config.h"

 #include "vp8_rtcd.h"

@@ -77,7 +78,7 @@

       predictor += kDiffPredStride;

-    GetParam()(&be, &bd, kDiffPredStride);

+    REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));

     base_src = *be.base_src;

     src_diff = be.src_diff;

--- a/test/test.mk

+++ b/test/test.mk

@@ -1,3 +1,4 @@

+LIBVPX_TEST_SRCS-yes += register_state_check.h

 LIBVPX_TEST_SRCS-yes += test.mk

 LIBVPX_TEST_SRCS-yes += acm_random.h

@@ -59,16 +60,18 @@

 # These tests require both the encoder and decoder to be built.

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes)

 LIBVPX_TEST_SRCS-yes                   += vp9_boolcoder_test.cc

+# IDCT test currently depends on FDCT function

+LIBVPX_TEST_SRCS-yes                   += idct8x8_test.cc

 endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc

 #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc

+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_TX32X32),yesyes)

 LIBVPX_TEST_SRCS-yes += dct32x32_test.cc

 endif

-LIBVPX_TEST_SRCS-yes += idct8x8_test.cc

-LIBVPX_TEST_SRCS-yes += variance_test.cc

 endif # VP9

--- a/test/test_libvpx.cc

+++ b/test/test_libvpx.cc

@@ -9,9 +9,10 @@

*/

 #include <string>

 #include "vpx_config.h"

-#if ARCH_X86 || ARCH_X86_64

 extern "C" {

+#if ARCH_X86 || ARCH_X86_64

 #include "vpx_ports/x86.h"

+#endif

 #if CONFIG_VP8

 extern void vp8_rtcd();

 #endif

@@ -19,7 +20,6 @@

 extern void vp9_rtcd();

 #endif

-#endif

 #include "third_party/googletest/src/include/gtest/gtest.h"

 static void append_gtest_filter(const char *str) {

@@ -47,11 +47,14 @@

     append_gtest_filter(":-SSE4_1/*");

 #endif

+#if !CONFIG_SHARED

+  /* Shared library builds don't support whitebox tests that exercise internal symbols. */

 #if CONFIG_VP8

   vp8_rtcd();

 #endif

 #if CONFIG_VP9

   vp9_rtcd();

+#endif

 #endif

   return RUN_ALL_TESTS();

--- a/vp8/common/loopfilter.c

+++ b/vp8/common/loopfilter.c

@@ -567,46 +567,28 @@

     int mb_cols = post->y_width >> 4;

     int mb_rows = post->y_height >> 4;

-    int linestocopy, i;

+    int linestocopy;

     loop_filter_info_n *lfi_n = &cm->lf_info;

     loop_filter_info lfi;

     int filter_level;

-    int alt_flt_enabled = mbd->segmentation_enabled;

     FRAME_TYPE frame_type = cm->frame_type;

     const MODE_INFO *mode_info_context;

-    int lvl_seg[MAX_MB_SEGMENTS];

+#if 0

+    if(default_filt_lvl == 0) /* no filter applied */

+        return;

+#endif

+    /* Initialize the loop filter for this frame. */

+    vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);

     /* number of MB rows to use in partial filtering */

     linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;

     linestocopy = linestocopy ? linestocopy << 4 : 16;     /* 16 lines per MB */

-    /* Note the baseline filter values for each segment */

-    /* See vp8_loop_filter_frame_init. Rather than call that for each change

-     * to default_filt_lvl, copy the relevant calculation here.

-     */

-    if (alt_flt_enabled)

-    {

-        for (i = 0; i < MAX_MB_SEGMENTS; i++)

-        {    /* Abs value */

-            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)

-            {

-                lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];

-            }

-            /* Delta Value */

-            else

-            {

-                lvl_seg[i] = default_filt_lvl

-                        + mbd->segment_feature_data[MB_LVL_ALT_LF][i];

-                lvl_seg[i] = (lvl_seg[i] > 0) ?

-                        ((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;

-            }

-        }

-    }

     /* Set up the buffer pointers; partial image starts at ~middle of frame */

     y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;

     mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);

@@ -620,10 +602,12 @@

                            mode_info_context->mbmi.mode != SPLITMV &&

                            mode_info_context->mbmi.mb_skip_coeff);

-            if (alt_flt_enabled)

-                filter_level = lvl_seg[mode_info_context->mbmi.segment_id];

-            else

-                filter_level = default_filt_lvl;

+            const int mode_index =

+                lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];

+            const int seg = mode_info_context->mbmi.segment_id;

+            const int ref_frame = mode_info_context->mbmi.ref_frame;

+            filter_level = lfi_n->lvl[seg][ref_frame][mode_index];

             if (filter_level)

--- a/vp8/common/x86/loopfilter_block_sse2.asm

+++ b/vp8/common/x86/loopfilter_block_sse2.asm

@@ -150,6 +150,7 @@

     push    rbp

     mov     rbp, rsp

+    SAVE_XMM 11

     push    r12

     push    r13

     mov     thresh, arg(4)

@@ -258,6 +259,7 @@

 %ifidn __OUTPUT_FORMAT__,x64

     pop    r13

     pop    r12

+    RESTORE_XMM

     pop    rbp

 %endif

--- a/vp8/common/x86/recon_sse2.asm

+++ b/vp8/common/x86/recon_sse2.asm

@@ -890,6 +890,7 @@

     push        rbp

     mov         rbp, rsp

     SHADOW_ARGS_TO_STACK 5

+    SAVE_XMM 7

     push        rsi

     push        rdi

     GET_GOT     rbx

@@ -957,6 +958,7 @@

     RESTORE_GOT

     pop         rdi

     pop         rsi

+    RESTORE_XMM

     UNSHADOW_ARGS

     pop         rbp

ret

--- a/vp8/common/x86/subpixel_ssse3.asm

+++ b/vp8/common/x86/subpixel_ssse3.asm

@@ -352,6 +352,7 @@

     pop rdi

     pop rsi

     RESTORE_GOT

+    RESTORE_XMM

     UNSHADOW_ARGS

     pop         rbp

ret

--- a/vp8/decoder/threading.c

+++ b/vp8/decoder/threading.c

@@ -29,6 +29,13 @@

 #include "error_concealment.h"

 #endif

+#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))

+#define CALLOC_ARRAY_ALIGNED(p, n, algn) do {                      \

+  CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n)));  \

+  memset((p), 0, (n) * sizeof(*(p)));                              \

+} while (0)

 extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);

 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)

@@ -668,11 +675,10 @@

         pbi->b_multithreaded_rd = 1;

         pbi->decoding_thread_count = core_count - 1;

-        CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));

-        CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));

-        CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));

-        vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);

-        CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));

+        CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);

+        CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);

+        CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);

+        CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);

         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)

@@ -796,32 +802,32 @@

         uv_width = width >>1;

         /* Allocate an int for each mb row. */

-        CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));

+        CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);

         /* Allocate memory for above_row buffers. */

-        CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));

-        for (i=0; i< pc->mb_rows; i++)

+        CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);

+        for (i = 0; i < pc->mb_rows; i++)

             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));

-        CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));

-        for (i=0; i< pc->mb_rows; i++)

+        CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);

+        for (i = 0; i < pc->mb_rows; i++)

             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));

-        CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));

-        for (i=0; i< pc->mb_rows; i++)

+        CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);

+        for (i = 0; i < pc->mb_rows; i++)

             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));

         /* Allocate memory for left_col buffers. */

-        CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));

-        for (i=0; i< pc->mb_rows; i++)

+        CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);

+        for (i = 0; i < pc->mb_rows; i++)

             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));

-        CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));

-        for (i=0; i< pc->mb_rows; i++)

+        CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);

+        for (i = 0; i < pc->mb_rows; i++)

             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));

-        CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));

-        for (i=0; i< pc->mb_rows; i++)

+        CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);

+        for (i = 0; i < pc->mb_rows; i++)

             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));

--- a/vp8/encoder/bitstream.c

+++ b/vp8/encoder/bitstream.c

@@ -118,7 +118,7 @@

         update_mode(

             w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree,

-            Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count

+            Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count

);

@@ -127,7 +127,7 @@

         update_mode(

             w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree,

-            Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->uv_mode_count

+            Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count

);

@@ -493,7 +493,7 @@

 void vp8_convert_rfct_to_prob(VP8_COMP *const cpi)

-    const int *const rfct = cpi->count_mb_ref_frame_usage;

+    const int *const rfct = cpi->mb.count_mb_ref_frame_usage;

     const int rf_intra = rfct[INTRA_FRAME];

     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];

@@ -539,7 +539,7 @@

         int total_mbs = pc->mb_rows * pc->mb_cols;

-        prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs;

+        prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs;

         if (prob_skip_false <= 1)

             prob_skip_false = 1;

@@ -730,7 +730,7 @@

         int total_mbs = c->mb_rows * c->mb_cols;

-        prob_skip_false = (total_mbs - cpi->skip_true_count ) * 256 / total_mbs;

+        prob_skip_false = (total_mbs - cpi->mb.skip_true_count ) * 256 / total_mbs;

         if (prob_skip_false <= 1)

             prob_skip_false = 1;

@@ -851,6 +851,7 @@

 static int independent_coef_context_savings(VP8_COMP *cpi)

+    MACROBLOCK *const x = & cpi->mb;

     int savings = 0;

     int i = 0;

do

@@ -867,7 +868,7 @@

*/

             probs = (const unsigned int (*)[MAX_ENTROPY_TOKENS])

-                                                    cpi->coef_counts[i][j];

+                x->coef_counts[i][j];

             /* Reset to default probabilities at key frames */

             if (cpi->common.frame_type == KEY_FRAME)

@@ -926,6 +927,7 @@

 static int default_coef_context_savings(VP8_COMP *cpi)

+    MACROBLOCK *const x = & cpi->mb;

     int savings = 0;

     int i = 0;

do

@@ -945,7 +947,7 @@

                     MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,

                     cpi->frame_coef_probs [i][j][k],

                     cpi->frame_branch_ct [i][j][k],

-                    cpi->coef_counts [i][j][k],

+                    x->coef_counts [i][j][k],

                     256, 1

);

@@ -994,7 +996,7 @@

     int savings = 0;

-    const int *const rfct = cpi->count_mb_ref_frame_usage;

+    const int *const rfct = cpi->mb.count_mb_ref_frame_usage;

     const int rf_intra = rfct[INTRA_FRAME];

     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];

     int new_intra, new_last, new_garf, oldtotal, newtotal;

--- a/vp8/encoder/block.h

+++ b/vp8/encoder/block.h

@@ -18,6 +18,9 @@

 #include "vp8/common/entropy.h"

 #include "vpx_ports/mem.h"

+#define MAX_MODES 20

+#define MAX_ERROR_BINS 1024

 /* motion search site */

 typedef struct

@@ -127,7 +130,26 @@

     unsigned char need_to_clamp_best_mvs;

 #endif

+    int skip_true_count;

+    unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];

+    unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */

+    int ymode_count [VP8_YMODES];        /* intra MB type cts this frame */

+    int uv_mode_count[VP8_UV_MODES];     /* intra MB type cts this frame */

+    int64_t prediction_error;

+    int64_t intra_error;

+    int count_mb_ref_frame_usage[MAX_REF_FRAMES];

+    int rd_thresh_mult[MAX_MODES];

+    int rd_threshes[MAX_MODES];

+    unsigned int mbs_tested_so_far;

+    unsigned int mode_test_hit_counts[MAX_MODES];

+    int zbin_mode_boost_enabled;

+    int zbin_mode_boost;

+    int last_zbin_mode_boost;

+    int last_zbin_over_quant;

+    int zbin_over_quant;

+    int error_bins[MAX_ERROR_BINS];

     void (*short_fdct4x4)(short *input, short *output, int pitch);

     void (*short_fdct8x4)(short *input, short *output, int pitch);

--- a/vp8/encoder/denoising.c

+++ b/vp8/encoder/denoising.c

@@ -140,8 +140,7 @@

     int i;

     assert(denoiser);

-    /* don't need one for intra start at 1 */

-    for (i = 1; i < MAX_REF_FRAMES; i++)

+    for (i = 0; i < MAX_REF_FRAMES; i++)

         denoiser->yv12_running_avg[i].flags = 0;

@@ -175,8 +174,7 @@

     int i;

     assert(denoiser);

-    /* we don't have one for intra ref frame */

-    for (i = 1; i < MAX_REF_FRAMES ; i++)

+    for (i = 0; i < MAX_REF_FRAMES ; i++)

         vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);

@@ -291,7 +289,7 @@

         /* Filter. */

         decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,

-                                       &denoiser->yv12_running_avg[LAST_FRAME],

+                                       &denoiser->yv12_running_avg[INTRA_FRAME],

x,

                                        motion_magnitude2,

                                        recon_yoffset, recon_uvoffset);

@@ -303,7 +301,7 @@

*/

         vp8_copy_mem16x16(

                 x->thismb, 16,

-                denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset,

-                denoiser->yv12_running_avg[LAST_FRAME].y_stride);

+                denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,

+                denoiser->yv12_running_avg[INTRA_FRAME].y_stride);

--- a/vp8/encoder/encodeframe.c

+++ b/vp8/encoder/encodeframe.c

@@ -33,7 +33,7 @@

 #endif

 #include "encodeframe.h"

-extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;

+extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;

 extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,

                                      int prob_intra,

                                      int prob_last,

@@ -45,7 +45,6 @@

 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,

                                       MACROBLOCK *x,

                                       MB_ROW_COMP *mbr_ei,

-                                      int mb_row,

                                       int count);

 static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );

@@ -530,7 +529,8 @@

              * segmentation map

*/

             if ((cpi->current_layer == 0) &&

-                (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled))

+                (cpi->cyclic_refresh_mode_enabled &&

+                 xd->segmentation_enabled))

                 cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;

@@ -642,10 +642,6 @@

     xd->left_context = &cm->left_context;

-    vp8_zero(cpi->count_mb_ref_frame_usage)

-    vp8_zero(cpi->ymode_count)

-    vp8_zero(cpi->uv_mode_count)

     x->mvc = cm->fc.mvc;

     vpx_memset(cm->above_context, 0,

@@ -674,8 +670,45 @@

     xd->fullpixel_mask = 0xffffffff;

     if(cm->full_pixel)

         xd->fullpixel_mask = 0xfffffff8;

+    vp8_zero(x->coef_counts);

+    vp8_zero(x->ymode_count);

+    vp8_zero(x->uv_mode_count)

+    x->prediction_error = 0;

+    x->intra_error = 0;

+    vp8_zero(x->count_mb_ref_frame_usage);

+static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread)

+{

+    int i = 0;

+    do

+    {

+        int j = 0;

+        do

+        {

+            int k = 0;

+            do

+            {

+                /* at every context */

+                /* calc probs and branch cts for this frame only */

+                int t = 0;      /* token/prob index */

+                do

+                {

+                    x->coef_counts [i][j][k][t] +=

+                        x_thread->coef_counts [i][j][k][t];

+                }

+                while (++t < ENTROPY_NODES);

+            }

+            while (++k < PREV_COEF_CONTEXTS);

+        }

+        while (++j < COEF_BANDS);

+    }

+    while (++i < BLOCK_TYPES);

+}

 void vp8_encode_frame(VP8_COMP *cpi)

     int mb_row;

@@ -717,9 +750,7 @@

         xd->subpixel_predict16x16   = vp8_bilinear_predict16x16;

-    cpi->prediction_error = 0;

-    cpi->intra_error = 0;

-    cpi->skip_true_count = 0;

+    cpi->mb.skip_true_count = 0;

     cpi->tok_count = 0;

 #if 0

@@ -730,13 +761,11 @@

     xd->mode_info_context = cm->mi;

-    vp8_zero(cpi->MVcount);

+    vp8_zero(cpi->mb.MVcount);

-    vp8_zero(cpi->coef_counts);

     vp8cx_frame_init_quantizer(cpi);

-    vp8_initialize_rd_consts(cpi,

+    vp8_initialize_rd_consts(cpi, x,

                              vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));

     vp8cx_initialize_me_consts(cpi, cm->base_qindex);

@@ -775,7 +804,8 @@

             int i;

-            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);

+            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,

+                                      cpi->encoding_thread_count);

             for (i = 0; i < cm->mb_rows; i++)

                 cpi->mt_current_mb_col[i] = -1;

@@ -837,7 +867,42 @@

             for (i = 0; i < cpi->encoding_thread_count; i++)

+                int mode_count;

+                int c_idx;

                 totalrate += cpi->mb_row_ei[i].totalrate;

+                cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count;

+                for(mode_count = 0; mode_count < VP8_YMODES; mode_count++)

+                    cpi->mb.ymode_count[mode_count] +=

+                        cpi->mb_row_ei[i].mb.ymode_count[mode_count];

+                for(mode_count = 0; mode_count < VP8_UV_MODES; mode_count++)

+                    cpi->mb.uv_mode_count[mode_count] +=

+                        cpi->mb_row_ei[i].mb.uv_mode_count[mode_count];

+                for(c_idx = 0; c_idx < MVvals; c_idx++)

+                {

+                    cpi->mb.MVcount[0][c_idx] +=

+                        cpi->mb_row_ei[i].mb.MVcount[0][c_idx];

+                    cpi->mb.MVcount[1][c_idx] +=

+                        cpi->mb_row_ei[i].mb.MVcount[1][c_idx];

+                }

+                cpi->mb.prediction_error +=

+                    cpi->mb_row_ei[i].mb.prediction_error;

+                cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error;

+                for(c_idx = 0; c_idx < MAX_REF_FRAMES; c_idx++)

+                    cpi->mb.count_mb_ref_frame_usage[c_idx] +=

+                        cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx];

+                for(c_idx = 0; c_idx < MAX_ERROR_BINS; c_idx++)

+                    cpi->mb.error_bins[c_idx] +=

+                        cpi->mb_row_ei[i].mb.error_bins[c_idx];

+                /* add up counts for each thread */

+                sum_coef_counts(x, &cpi->mb_row_ei[i].mb);

@@ -844,6 +909,7 @@

         else

 #endif

             /* for each macroblock row in image */

             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)

@@ -929,13 +995,14 @@

         int tot_modes;

-        tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]

-                    + cpi->count_mb_ref_frame_usage[LAST_FRAME]

-                    + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]

-                    + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];

+        tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]

+                    + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]

+                    + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]

+                    + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];

         if (tot_modes)

-            cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;

+            cpi->this_frame_percent_intra =

+                cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;

@@ -1065,8 +1132,8 @@

 #endif

-    ++cpi->ymode_count[m];

-    ++cpi->uv_mode_count[uvm];

+    ++x->ymode_count[m];

+    ++x->uv_mode_count[uvm];

@@ -1093,15 +1160,16 @@

 #endif

-int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)

+int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,

+                                  TOKENEXTRA **t)

     MACROBLOCKD *xd = &x->e_mbd;

     int rate;

     if (cpi->sf.RD && cpi->compressor_speed != 2)

-        vp8_rd_pick_intra_mode(cpi, x, &rate);

+        vp8_rd_pick_intra_mode(x, &rate);

     else

-        vp8_pick_intra_mode(cpi, x, &rate);

+        vp8_pick_intra_mode(x, &rate);

     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)

@@ -1118,7 +1186,7 @@

     sum_intra_stats(cpi, x);

-    vp8_tokenize_mb(cpi, &x->e_mbd, t);

+    vp8_tokenize_mb(cpi, x, t);

     if (xd->mode_info_context->mbmi.mode != B_PRED)

         vp8_inverse_transform_mby(xd);

@@ -1165,17 +1233,17 @@

     if (cpi->sf.RD)

-        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;

+        int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;

         /* Are we using the fast quantizer for the mode selection? */

         if(cpi->sf.use_fastquant_for_pick)

-            cpi->mb.quantize_b      = vp8_fast_quantize_b;

-            cpi->mb.quantize_b_pair = vp8_fast_quantize_b_pair;

+            x->quantize_b      = vp8_fast_quantize_b;

+            x->quantize_b_pair = vp8_fast_quantize_b_pair;

             /* the fast quantizer does not use zbin_extra, so

              * do not recalculate */

-            cpi->zbin_mode_boost_enabled = 0;

+            x->zbin_mode_boost_enabled = 0;

         vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,

                                &distortion, &intra_error);

@@ -1183,12 +1251,12 @@

         /* switch back to the regular quantizer for the encode */

         if (cpi->sf.improved_quant)

-            cpi->mb.quantize_b      = vp8_regular_quantize_b;

-            cpi->mb.quantize_b_pair = vp8_regular_quantize_b_pair;

+            x->quantize_b      = vp8_regular_quantize_b;

+            x->quantize_b_pair = vp8_regular_quantize_b_pair;

         /* restore cpi->zbin_mode_boost_enabled */

-        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;

+        x->zbin_mode_boost_enabled = zbin_mode_boost_enabled;

     else

@@ -1197,8 +1265,8 @@

                             &distortion, &intra_error, mb_row, mb_col);

-    cpi->prediction_error += distortion;

-    cpi->intra_error += intra_error;

+    x->prediction_error += distortion;

+    x->intra_error += intra_error;

     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)

@@ -1234,8 +1302,8 @@

         /* Experimental code. Special case for gf and arf zeromv modes.

          * Increase zbin size to supress noise

*/

-        cpi->zbin_mode_boost = 0;

-        if (cpi->zbin_mode_boost_enabled)

+        x->zbin_mode_boost = 0;

+        if (x->zbin_mode_boost_enabled)

             if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME )

@@ -1242,14 +1310,14 @@

                 if (xd->mode_info_context->mbmi.mode == ZEROMV)

                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)

-                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;

+                        x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;

                     else

-                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;

+                        x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;

                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)

-                    cpi->zbin_mode_boost = 0;

+                    x->zbin_mode_boost = 0;

                 else

-                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;

+                    x->zbin_mode_boost = MV_ZBIN_BOOST;

@@ -1259,7 +1327,7 @@

             vp8_update_zbin_extra(cpi, x);

-    cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;

+    x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;

     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)

@@ -1304,7 +1372,7 @@

     if (!x->skip)

-        vp8_tokenize_mb(cpi, xd, t);

+        vp8_tokenize_mb(cpi, x, t);

         if (xd->mode_info_context->mbmi.mode != B_PRED)

             vp8_inverse_transform_mby(xd);

@@ -1321,12 +1389,12 @@

         if (cpi->common.mb_no_coeff_skip)

-            cpi->skip_true_count ++;

+            x->skip_true_count ++;

             vp8_fix_contexts(xd);

         else

-            vp8_stuff_mb(cpi, xd, t);

+            vp8_stuff_mb(cpi, x, t);

--- a/vp8/encoder/encodemv.c

+++ b/vp8/encoder/encodemv.c

@@ -363,10 +363,12 @@

     active_section = 4;

 #endif

     write_component_probs(

-        w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->MVcount[0], 0, &flags[0]

+        w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0],

+        cpi->mb.MVcount[0], 0, &flags[0]

);

     write_component_probs(

-        w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->MVcount[1], 1, &flags[1]

+        w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1],

+        cpi->mb.MVcount[1], 1, &flags[1]

);

     if (flags[0] || flags[1])

--- a/vp8/encoder/ethreading.c

+++ b/vp8/encoder/ethreading.c

@@ -17,12 +17,6 @@

 #if CONFIG_MULTITHREAD

-extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,

-                                         TOKENEXTRA **t,

-                                         int recon_yoffset, int recon_uvoffset,

-                                         int mb_row, int mb_col);

-extern int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,

-                                         TOKENEXTRA **t);

 extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);

 extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);

@@ -220,7 +214,9 @@

                          * vp8cx_encode_inter_macroblock()) back into the

                          * global segmentation map

*/

-                        if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)

+                        if ((cpi->current_layer == 0) &&

+                            (cpi->cyclic_refresh_mode_enabled &&

+                             xd->segmentation_enabled))

                             const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;

                             cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id;

@@ -422,6 +418,17 @@

             zd->block[i].dequant = zd->dequant_uv;

         zd->block[24].dequant = zd->dequant_y2;

 #endif

+        vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));

+        vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult,

+                   sizeof(x->rd_thresh_mult));

+        z->zbin_over_quant = x->zbin_over_quant;

+        z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;

+        z->zbin_mode_boost = x->zbin_mode_boost;

+        vpx_memset(z->error_bins, 0, sizeof(z->error_bins));

@@ -428,7 +435,6 @@

 void vp8cx_init_mbrthread_data(VP8_COMP *cpi,

                                MACROBLOCK *x,

                                MB_ROW_COMP *mbr_ei,

-                               int mb_row,

                                int count

@@ -436,7 +442,6 @@

     VP8_COMMON *const cm = & cpi->common;

     MACROBLOCKD *const xd = & x->e_mbd;

     int i;

-    (void) mb_row;

     for (i = 0; i < count; i++)

@@ -477,6 +482,15 @@

         mbd->fullpixel_mask = 0xffffffff;

         if(cm->full_pixel)

             mbd->fullpixel_mask = 0xfffffff8;

+        vp8_zero(mb->coef_counts);

+        vp8_zero(x->ymode_count);

+        mb->skip_true_count = 0;

+        vp8_zero(mb->MVcount);

+        mb->prediction_error = 0;

+        mb->intra_error = 0;

+        vp8_zero(mb->count_mb_ref_frame_usage);

+        mb->mbs_tested_so_far = 0;

--- a/vp8/encoder/firstpass.c

+++ b/vp8/encoder/firstpass.c

@@ -570,7 +570,7 @@

     /* Initialise the MV cost table to the defaults */

         int flag[2] = {1, 1};

-        vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));

+        vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));

         vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));

         vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -239,7 +239,7 @@

     lc->rate_correction_factor           = cpi->rate_correction_factor;

     lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor;

     lc->gf_rate_correction_factor        = cpi->gf_rate_correction_factor;

-    lc->zbin_over_quant                  = cpi->zbin_over_quant;

+    lc->zbin_over_quant                  = cpi->mb.zbin_over_quant;

     lc->inter_frame_target               = cpi->inter_frame_target;

     lc->total_byte_count                 = cpi->total_byte_count;

     lc->filter_level                     = cpi->common.filter_level;

@@ -247,8 +247,8 @@

     lc->last_frame_percent_intra         = cpi->last_frame_percent_intra;

     memcpy (lc->count_mb_ref_frame_usage,

-            cpi->count_mb_ref_frame_usage,

-            sizeof(cpi->count_mb_ref_frame_usage));

+            cpi->mb.count_mb_ref_frame_usage,

+            sizeof(cpi->mb.count_mb_ref_frame_usage));

 static void restore_layer_context(VP8_COMP *cpi, const int layer)

@@ -277,7 +277,7 @@

     cpi->rate_correction_factor           = lc->rate_correction_factor;

     cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor;

     cpi->gf_rate_correction_factor        = lc->gf_rate_correction_factor;

-    cpi->zbin_over_quant                  = lc->zbin_over_quant;

+    cpi->mb.zbin_over_quant                  = lc->zbin_over_quant;

     cpi->inter_frame_target               = lc->inter_frame_target;

     cpi->total_byte_count                 = lc->total_byte_count;

     cpi->common.filter_level              = lc->filter_level;

@@ -284,9 +284,9 @@

     cpi->last_frame_percent_intra         = lc->last_frame_percent_intra;

-    memcpy (cpi->count_mb_ref_frame_usage,

+    memcpy (cpi->mb.count_mb_ref_frame_usage,

             lc->count_mb_ref_frame_usage,

-            sizeof(cpi->count_mb_ref_frame_usage));

+            sizeof(cpi->mb.count_mb_ref_frame_usage));

 static void setup_features(VP8_COMP *cpi)

@@ -356,8 +356,6 @@

     /* Activity mask based per mb zbin adjustments */

     vpx_free(cpi->mb_activity_map);

     cpi->mb_activity_map = 0;

-    vpx_free(cpi->mb_norm_activity_map);

-    cpi->mb_norm_activity_map = 0;

     vpx_free(cpi->mb.pip);

     cpi->mb.pip = 0;

@@ -643,11 +641,10 @@

     for (i = 0; i < MAX_MODES; i ++)

         cpi->mode_check_freq[i] = 0;

-        cpi->mode_test_hit_counts[i] = 0;

         cpi->mode_chosen_counts[i] = 0;

-    cpi->mbs_tested_so_far = 0;

+    cpi->mb.mbs_tested_so_far = 0;

     /* best quality defaults */

     sf->RD = 1;

@@ -841,7 +838,7 @@

             for (i = 0; i < min; i++)

-                sum += cpi->error_bins[i];

+                sum += cpi->mb.error_bins[i];

             total_skip = sum;

@@ -850,7 +847,7 @@

             /* i starts from 2 to make sure thresh started from 2048 */

             for (; i < 1024; i++)

-                sum += cpi->error_bins[i];

+                sum += cpi->mb.error_bins[i];

                 if (10 * sum >= (unsigned int)(cpi->Speed - 6)*(total_mbs - total_skip))

                     break;

@@ -905,7 +902,7 @@

         if (Speed >= 15)

             sf->half_pixel_search = 0;

-        vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins));

+        vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));

     }; /* switch */

@@ -1080,10 +1077,7 @@

     /* Data used for real time vc mode to see if gf needs refreshing */

-    cpi->inter_zz_count = 0;

     cpi->zeromv_count = 0;

-    cpi->gf_bad_count = 0;

-    cpi->gf_update_recommended = 0;

     /* Structures used to monitor GF usage */

@@ -1098,11 +1092,6 @@

                     vpx_calloc(sizeof(*cpi->mb_activity_map),

                     cm->mb_rows * cm->mb_cols));

-    vpx_free(cpi->mb_norm_activity_map);

-    CHECK_MEM_ERROR(cpi->mb_norm_activity_map,

-                    vpx_calloc(sizeof(*cpi->mb_norm_activity_map),

-                    cm->mb_rows * cm->mb_cols));

     /* allocate memory for storing last frame's MVs for MV prediction. */

     vpx_free(cpi->lfmv);

     CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows+2) * (cm->mb_cols+2),

@@ -1932,7 +1921,7 @@

     /* Set starting values of RD threshold multipliers (128 = *1) */

     for (i = 0; i < MAX_MODES; i++)

-        cpi->rd_thresh_mult[i] = 128;

+        cpi->mb.rd_thresh_mult[i] = 128;

 #ifdef ENTROPY_STATS

@@ -2010,7 +1999,7 @@

     cpi->refining_search_sad = vp8_refining_search_sad;

     /* make sure frame 1 is okay */

-    cpi->error_bins[0] = cpi->common.MBs;

+    cpi->mb.error_bins[0] = cpi->common.MBs;

     /* vp8cx_init_quantizer() is first called here. Add check in

      * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only

@@ -2783,10 +2772,14 @@

         if (cpi->common.frames_since_golden > 1)

-            cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->count_mb_ref_frame_usage[INTRA_FRAME];

-            cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->count_mb_ref_frame_usage[LAST_FRAME];

-            cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->count_mb_ref_frame_usage[GOLDEN_FRAME];

-            cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->count_mb_ref_frame_usage[ALTREF_FRAME];

+            cpi->recent_ref_frame_usage[INTRA_FRAME] +=

+                cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];

+            cpi->recent_ref_frame_usage[LAST_FRAME] +=

+                cpi->mb.count_mb_ref_frame_usage[LAST_FRAME];

+            cpi->recent_ref_frame_usage[GOLDEN_FRAME] +=

+                cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME];

+            cpi->recent_ref_frame_usage[ALTREF_FRAME] +=

+                cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];

@@ -2798,7 +2791,7 @@

     VP8_COMMON *cm = &cpi->common;

-    const int *const rfct = cpi->count_mb_ref_frame_usage;

+    const int *const rfct = cpi->mb.count_mb_ref_frame_usage;

     const int rf_intra = rfct[INTRA_FRAME];

     const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];

@@ -2865,40 +2858,19 @@

     if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0))

-        double change = 1.0 * abs((int)(cpi->intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error);

-        double change2 = 1.0 * abs((int)(cpi->prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error);

+        double change = 1.0 * abs((int)(cpi->mb.intra_error -

+            cpi->last_intra_error)) / (1 + cpi->last_intra_error);

+        double change2 = 1.0 * abs((int)(cpi->mb.prediction_error -

+            cpi->last_prediction_error)) / (1 + cpi->last_prediction_error);

         double minerror = cm->MBs * 256;

-#if 0

+        cpi->last_intra_error = cpi->mb.intra_error;

+        cpi->last_prediction_error = cpi->mb.prediction_error;

-        if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15

-            && cpi->prediction_error > minerror

+        if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15

+            && cpi->mb.prediction_error > minerror

             && (change > .25 || change2 > .25))

-            FILE *f = fopen("intra_inter.stt", "a");

-            if (cpi->prediction_error <= 0)

-                cpi->prediction_error = 1;

-            fprintf(f, "%d %d %d %d %14.4f\n",

-                    cm->current_video_frame,

-                    (int) cpi->prediction_error,

-                    (int) cpi->intra_error,

-                    (int)((10 * cpi->intra_error) / cpi->prediction_error),

-                    change);

-            fclose(f);

-        }

-#endif

-        cpi->last_intra_error = cpi->intra_error;

-        cpi->last_prediction_error = cpi->prediction_error;

-        if (10 * cpi->intra_error / (1 + cpi->prediction_error) < 15

-            && cpi->prediction_error > minerror

-            && (change > .25 || change2 > .25))

-        {

             /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > cpi->last_frame_percent_intra + 3*/

             return 1;

@@ -3160,6 +3132,57 @@

         cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame;

 #endif

+#if CONFIG_TEMPORAL_DENOISING

+    if (cpi->oxcf.noise_sensitivity)

+    {

+        /* we shouldn't have to keep multiple copies as we know in advance which

+         * buffer we should start - for now to get something up and running

+         * I've chosen to copy the buffers

+         */

+        if (cm->frame_type == KEY_FRAME)

+        {

+            int i;

+            vp8_yv12_copy_frame(

+                    cpi->Source,

+                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);

+            vp8_yv12_extend_frame_borders(

+                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);

+            for (i = 2; i < MAX_REF_FRAMES - 1; i++)

+                vp8_yv12_copy_frame(

+                        &cpi->denoiser.yv12_running_avg[LAST_FRAME],

+                        &cpi->denoiser.yv12_running_avg[i]);

+        }

+        else /* For non key frames */

+        {

+            vp8_yv12_extend_frame_borders(

+                    &cpi->denoiser.yv12_running_avg[INTRA_FRAME]);

+            if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf)

+            {

+                vp8_yv12_copy_frame(

+                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME],

+                        &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]);

+            }

+            if (cm->refresh_golden_frame || cm->copy_buffer_to_gf)

+            {

+                vp8_yv12_copy_frame(

+                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME],

+                        &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]);

+            }

+            if(cm->refresh_last_frame)

+            {

+                vp8_yv12_copy_frame(

+                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME],

+                        &cpi->denoiser.yv12_running_avg[LAST_FRAME]);

+            }

+        }

+    }

+#endif

 void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)

@@ -3203,52 +3226,7 @@

     vp8_yv12_extend_frame_borders(cm->frame_to_show);

-#if CONFIG_TEMPORAL_DENOISING

-    if (cpi->oxcf.noise_sensitivity)

-    {

-        /* we shouldn't have to keep multiple copies as we know in advance which

-         * buffer we should start - for now to get something up and running

-         * I've chosen to copy the buffers

-         */

-        if (cm->frame_type == KEY_FRAME)

-        {

-            int i;

-            vp8_yv12_copy_frame(

-                    cpi->Source,

-                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);

-            vp8_yv12_extend_frame_borders(

-                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);

-            for (i = 2; i < MAX_REF_FRAMES - 1; i++)

-                vp8_yv12_copy_frame(

-                        cpi->Source,

-                        &cpi->denoiser.yv12_running_avg[i]);

-        }

-        else /* For non key frames */

-        {

-            vp8_yv12_extend_frame_borders(

-                    &cpi->denoiser.yv12_running_avg[LAST_FRAME]);

-            if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf)

-            {

-                vp8_yv12_copy_frame(

-                        &cpi->denoiser.yv12_running_avg[LAST_FRAME],

-                        &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]);

-            }

-            if (cm->refresh_golden_frame || cm->copy_buffer_to_gf)

-            {

-                vp8_yv12_copy_frame(

-                        &cpi->denoiser.yv12_running_avg[LAST_FRAME],

-                        &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]);

-            }

-        }

-    }

-#endif

 static void encode_frame_to_data_rate

@@ -3331,19 +3309,19 @@

     cm->copy_buffer_to_arf = 0;

     /* Clear zbin over-quant value and mode boost values. */

-    cpi->zbin_over_quant = 0;

-    cpi->zbin_mode_boost = 0;

+    cpi->mb.zbin_over_quant = 0;

+    cpi->mb.zbin_mode_boost = 0;

     /* Enable or disable mode based tweaking of the zbin

      * For 2 Pass Only used where GF/ARF prediction quality

      * is above a threshold

*/

-    cpi->zbin_mode_boost_enabled = 1;

+    cpi->mb.zbin_mode_boost_enabled = 1;

     if (cpi->pass == 2)

         if ( cpi->gfu_boost <= 400 )

-            cpi->zbin_mode_boost_enabled = 0;

+            cpi->mb.zbin_mode_boost_enabled = 0;

@@ -3410,7 +3388,7 @@

         /* Reset the RD threshold multipliers to default of * 1 (128) */

         for (i = 0; i < MAX_MODES; i++)

-            cpi->rd_thresh_mult[i] = 128;

+            cpi->mb.rd_thresh_mult[i] = 128;

@@ -4099,8 +4077,9 @@

                 q_low = (Q < q_high) ? (Q + 1) : q_high;

                 /* If we are using over quant do the same for zbin_oq_low */

-                if (cpi->zbin_over_quant > 0)

-                    zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;

+                if (cpi->mb.zbin_over_quant > 0)

+                    zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ?

+                        (cpi->mb.zbin_over_quant + 1) : zbin_oq_high;

                 if (undershoot_seen)

@@ -4116,11 +4095,13 @@

                      * is max)

*/

                     if (Q < MAXQ)

-                        cpi->zbin_over_quant = 0;

+                        cpi->mb.zbin_over_quant = 0;

                     else

-                        zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;

-                        cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;

+                        zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ?

+                            (cpi->mb.zbin_over_quant + 1) : zbin_oq_high;

+                        cpi->mb.zbin_over_quant =

+                            (zbin_oq_high + zbin_oq_low) / 2;

                 else

@@ -4133,7 +4114,9 @@

                     Q = vp8_regulate_q(cpi, cpi->this_frame_target);

-                    while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10))

+                    while (((Q < q_low) ||

+                        (cpi->mb.zbin_over_quant < zbin_oq_low)) &&

+                        (Retries < 10))

                         vp8_update_rate_correction_factors(cpi, 0);

                         Q = vp8_regulate_q(cpi, cpi->this_frame_target);

@@ -4146,12 +4129,13 @@

             /* Frame is too small */

             else

-                if (cpi->zbin_over_quant == 0)

+                if (cpi->mb.zbin_over_quant == 0)

                     /* Lower q_high if not using over quant */

                     q_high = (Q > q_low) ? (Q - 1) : q_low;

                 else

                     /* else lower zbin_oq_high */

-                    zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low;

+                    zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ?

+                        (cpi->mb.zbin_over_quant - 1) : zbin_oq_low;

                 if (overshoot_seen)

@@ -4167,9 +4151,10 @@

                      * is max)

*/

                     if (Q < MAXQ)

-                        cpi->zbin_over_quant = 0;

+                        cpi->mb.zbin_over_quant = 0;

                     else

-                        cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;

+                        cpi->mb.zbin_over_quant =

+                            (zbin_oq_high + zbin_oq_low) / 2;

                 else

@@ -4192,7 +4177,9 @@

                         q_low = Q;

-                    while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10))

+                    while (((Q > q_high) ||

+                        (cpi->mb.zbin_over_quant > zbin_oq_high)) &&

+                        (Retries < 10))

                         vp8_update_rate_correction_factors(cpi, 0);

                         Q = vp8_regulate_q(cpi, cpi->this_frame_target);

@@ -4210,7 +4197,9 @@

                 Q = q_low;

             /* Clamp cpi->zbin_over_quant */

-            cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->zbin_over_quant;

+            cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ?

+                zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ?

+                    zbin_oq_high : cpi->mb.zbin_over_quant;

             Loop = Q != last_q;

@@ -4292,7 +4281,6 @@

         /* Point to beginning of MODE_INFO arrays. */

         MODE_INFO *tmp = cm->mi;

-        cpi->inter_zz_count = 0;

         cpi->zeromv_count = 0;

         if(cm->frame_type != KEY_FRAME)

@@ -4301,8 +4289,6 @@

                 for (mb_col = 0; mb_col < cm->mb_cols; mb_col ++)

-                    if(tmp->mbmi.mode == ZEROMV && tmp->mbmi.ref_frame == LAST_FRAME)

-                        cpi->inter_zz_count++;

                     if(tmp->mbmi.mode == ZEROMV)

                         cpi->zeromv_count++;

                     tmp++;

@@ -4732,67 +4718,6 @@

-static void check_gf_quality(VP8_COMP *cpi)

-{

-    VP8_COMMON *cm = &cpi->common;

-    int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);

-    int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols);

-    int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols);

-    /* Gf refresh is not currently being signalled */

-    if (cpi->gf_update_recommended == 0)

-    {

-        if (cpi->common.frames_since_golden > 7)

-        {

-            /* Low use of gf */

-            if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15))

-            {

-                /* ...but last frame zero zero usage is reasonbable so a

-                 * new gf might be appropriate

-                 */

-                if (last_ref_zz_useage >= 25)

-                {

-                    cpi->gf_bad_count ++;

-                    /* Check that the condition is stable */

-                    if (cpi->gf_bad_count >= 8)

-                    {

-                        cpi->gf_update_recommended = 1;

-                        cpi->gf_bad_count = 0;

-                    }

-                }

-                else

-                    /* Restart count as the background is not stable enough */

-                    cpi->gf_bad_count = 0;

-            }

-            else

-                /* Gf useage has picked up so reset count */

-                cpi->gf_bad_count = 0;

-        }

-    }

-    /* If the signal is set but has not been read should we cancel it. */

-    else if (last_ref_zz_useage < 15)

-    {

-        cpi->gf_update_recommended = 0;

-        cpi->gf_bad_count = 0;

-    }

-#if 0

-    {

-        FILE *f = fopen("gfneeded.stt", "a");

-        fprintf(f, "%10d %10d %10d %10d %10ld \n",

-                cm->current_video_frame,

-                cpi->common.frames_since_golden,

-                gf_active_pct, gf_ref_usage_pct,

-                cpi->gf_update_recommended);

-        fclose(f);

-    }

-#endif

-}

 #if !(CONFIG_REALTIME_ONLY)

 static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags)

@@ -5096,8 +5021,6 @@

     if (cpi->compressor_speed == 2)

-        if (cpi->oxcf.number_of_layers == 1)

-            check_gf_quality(cpi);

         vpx_usec_timer_start(&tsctimer);

         vpx_usec_timer_start(&ticktimer);

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -43,8 +43,8 @@

 #define AF_THRESH   25

 #define AF_THRESH2  100

 #define ARF_DECAY_THRESH 12

-#define MAX_MODES 20

 #define MIN_THRESHMULT  32

 #define MAX_THRESHMULT  512

@@ -349,13 +349,9 @@

     int ambient_err;

     unsigned int mode_check_freq[MAX_MODES];

-    unsigned int mode_test_hit_counts[MAX_MODES];

     unsigned int mode_chosen_counts[MAX_MODES];

-    unsigned int mbs_tested_so_far;

-    int rd_thresh_mult[MAX_MODES];

     int rd_baseline_thresh[MAX_MODES];

-    int rd_threshes[MAX_MODES];

     int RDMULT;

     int RDDIV ;

@@ -363,9 +359,7 @@

     CODING_CONTEXT coding_context;

     /* Rate targetting variables */

-    int64_t prediction_error;

     int64_t last_prediction_error;

-    int64_t intra_error;

     int64_t last_intra_error;

     int this_frame_target;

@@ -418,12 +412,6 @@

     int ni_frames;

     int avg_frame_qindex;

-    int zbin_over_quant;

-    int zbin_mode_boost;

-    int zbin_mode_boost_enabled;

-    int last_zbin_over_quant;

-    int last_zbin_mode_boost;

     int64_t total_byte_count;

     int buffered_mode;

@@ -452,13 +440,6 @@

     int drop_frames_allowed; /* Are we permitted to drop frames? */

     int drop_frame;          /* Drop this frame? */

-    int ymode_count [VP8_YMODES];        /* intra MB type cts this frame */

-    int uv_mode_count[VP8_UV_MODES];     /* intra MB type cts this frame */

-    unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */

-    unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */

     vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];

     char update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];

@@ -486,7 +467,6 @@

     int Speed;

     int compressor_speed;

-    int interquantizer;

     int auto_gold;

     int auto_adjust_gold_quantizer;

     int auto_worst_q;

@@ -502,7 +482,6 @@

     int last_skip_probs_q[3];

     int recent_ref_frame_usage[MAX_REF_FRAMES];

-    int count_mb_ref_frame_usage[MAX_REF_FRAMES];

     int this_frame_percent_intra;

     int last_frame_percent_intra;

@@ -509,18 +488,10 @@

     int ref_frame_flags;

     SPEED_FEATURES sf;

-    int error_bins[1024];

-    /* Data used for real time conferencing mode to help determine if it

-     * would be good to update the gf

-     */

-    int inter_zz_count;

     /* Count ZEROMV on all reference frames. */

     int zeromv_count;

     int lf_zeromv_pct;

-    int gf_bad_count;

-    int gf_update_recommended;

-    int skip_true_count;

     unsigned char *segmentation_map;

     signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];

@@ -659,7 +630,6 @@

     /* Per MB activity measurement */

     unsigned int activity_avg;

     unsigned int * mb_activity_map;

-    int * mb_norm_activity_map;

     /* Record of which MBs still refer to last golden frame either

      * directly or through 0,0

@@ -723,13 +693,10 @@

     } rd_costs;

 } VP8_COMP;

-void control_data_rate(VP8_COMP *cpi);

+void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,

+                        unsigned char *dest_end, unsigned long *size);

-void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, unsigned long *size);

-int rd_cost_intra_mb(MACROBLOCKD *x);

-void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);

+void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **);

 void vp8_set_speed_features(VP8_COMP *cpi);

--- a/vp8/encoder/pickinter.c

+++ b/vp8/encoder/pickinter.c

@@ -389,15 +389,16 @@

-static void update_mvcount(VP8_COMP *cpi, MACROBLOCKD *xd, int_mv *best_ref_mv)

+static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)

+    MACROBLOCKD *xd = &x->e_mbd;

     /* Split MV modes currently not supported when RD is nopt enabled,

      * therefore, only need to modify MVcount in NEWMV mode. */

     if (xd->mode_info_context->mbmi.mode == NEWMV)

-        cpi->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row -

+        x->MVcount[0][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.row -

                                       best_ref_mv->as_mv.row) >> 1)]++;

-        cpi->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col -

+        x->MVcount[1][mv_max+((xd->mode_info_context->mbmi.mv.as_mv.col -

                                       best_ref_mv->as_mv.col) >> 1)]++;

@@ -679,7 +680,7 @@

     get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);

     /* Count of the number of MBs tested so far this frame */

-    cpi->mbs_tested_so_far++;

+    x->mbs_tested_so_far++;

     *returnintra = INT_MAX;

     x->skip = 0;

@@ -700,7 +701,7 @@

         int this_rd = INT_MAX;

         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];

-        if (best_rd <= cpi->rd_threshes[mode_index])

+        if (best_rd <= x->rd_threshes[mode_index])

             continue;

         if (this_ref_frame < 0)

@@ -745,22 +746,22 @@

         /* Check to see if the testing frequency for this mode is at its max

          * If so then prevent it from being tested and increase the threshold

          * for its testing */

-        if (cpi->mode_test_hit_counts[mode_index] &&

+        if (x->mode_test_hit_counts[mode_index] &&

                                          (cpi->mode_check_freq[mode_index] > 1))

-            if (cpi->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *

-                                         cpi->mode_test_hit_counts[mode_index]))

+            if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] *

+                                         x->mode_test_hit_counts[mode_index]))

                 /* Increase the threshold for coding this mode to make it less

                  * likely to be chosen */

-                cpi->rd_thresh_mult[mode_index] += 4;

+                x->rd_thresh_mult[mode_index] += 4;

-                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

-                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

+                if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

+                    x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

-                cpi->rd_threshes[mode_index] =

+                x->rd_threshes[mode_index] =

                                  (cpi->rd_baseline_thresh[mode_index] >> 7) *

-                                 cpi->rd_thresh_mult[mode_index];

+                                 x->rd_thresh_mult[mode_index];

                 continue;

@@ -768,7 +769,7 @@

         /* We have now reached the point where we are going to test the current

          * mode so increment the counter for the number of times it has been

          * tested */

-        cpi->mode_test_hit_counts[mode_index] ++;

+        x->mode_test_hit_counts[mode_index] ++;

         rate2 = 0;

         distortion2 = 0;

@@ -1108,12 +1109,12 @@

             /* Testing this mode gave rise to an improvement in best error

              * score. Lower threshold a bit for next time

*/

-            cpi->rd_thresh_mult[mode_index] =

-                     (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?

-                     cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;

-            cpi->rd_threshes[mode_index] =

+            x->rd_thresh_mult[mode_index] =

+                     (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?

+                     x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;

+            x->rd_threshes[mode_index] =

                                    (cpi->rd_baseline_thresh[mode_index] >> 7) *

-                                   cpi->rd_thresh_mult[mode_index];

+                                   x->rd_thresh_mult[mode_index];

         /* If the mode did not help improve the best error case then raise the

@@ -1121,14 +1122,14 @@

*/

         else

-            cpi->rd_thresh_mult[mode_index] += 4;

+            x->rd_thresh_mult[mode_index] += 4;

-            if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

-                cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

+            if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

+                x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

-            cpi->rd_threshes[mode_index] =

+            x->rd_threshes[mode_index] =

                          (cpi->rd_baseline_thresh[mode_index] >> 7) *

-                         cpi->rd_thresh_mult[mode_index];

+                         x->rd_thresh_mult[mode_index];

         if (x->skip)

@@ -1138,16 +1139,16 @@

     /* Reduce the activation RD thresholds for the best choice mode */

     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))

-        int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 3);

+        int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3);

-        cpi->rd_thresh_mult[best_mode_index] =

-                        (cpi->rd_thresh_mult[best_mode_index]

+        x->rd_thresh_mult[best_mode_index] =

+                        (x->rd_thresh_mult[best_mode_index]

                         >= (MIN_THRESHMULT + best_adjustment)) ?

-                        cpi->rd_thresh_mult[best_mode_index] - best_adjustment :

+                        x->rd_thresh_mult[best_mode_index] - best_adjustment :

                         MIN_THRESHMULT;

-        cpi->rd_threshes[best_mode_index] =

+        x->rd_threshes[best_mode_index] =

                         (cpi->rd_baseline_thresh[best_mode_index] >> 7) *

-                        cpi->rd_thresh_mult[best_mode_index];

+                        x->rd_thresh_mult[best_mode_index];

@@ -1159,7 +1160,7 @@

             this_rdbin = 1023;

-        cpi->error_bins[this_rdbin] ++;

+        x->error_bins[this_rdbin] ++;

 #if CONFIG_TEMPORAL_DENOISING

@@ -1240,11 +1241,11 @@

       != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])

         best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;

-    update_mvcount(cpi, &x->e_mbd, &best_ref_mv);

+    update_mvcount(cpi, x, &best_ref_mv);

-void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)

+void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)

     int error4x4, error16x16 = INT_MAX;

     int rate, best_rate = 0, distortion, best_sse;

--- a/vp8/encoder/pickinter.h

+++ b/vp8/encoder/pickinter.h

@@ -18,7 +18,7 @@

                                 int recon_uvoffset, int *returnrate,

                                 int *returndistortion, int *returnintra,

                                 int mb_row, int mb_col);

-extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);

+extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate);

 extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,

                                       const vp8_variance_fn_ptr_t *vfp,

--- a/vp8/encoder/quantize.c

+++ b/vp8/encoder/quantize.c

@@ -587,20 +587,20 @@

 #define ZBIN_EXTRA_Y \

     (( cpi->common.Y1dequant[QIndex][1] *  \

-    ( cpi->zbin_over_quant +  \

-      cpi->zbin_mode_boost +  \

+    ( x->zbin_over_quant +  \

+      x->zbin_mode_boost +  \

       x->act_zbin_adj ) ) >> 7)

 #define ZBIN_EXTRA_UV \

     (( cpi->common.UVdequant[QIndex][1] *  \

-    ( cpi->zbin_over_quant +  \

-      cpi->zbin_mode_boost +  \

+    ( x->zbin_over_quant +  \

+      x->zbin_mode_boost +  \

       x->act_zbin_adj ) ) >> 7)

 #define ZBIN_EXTRA_Y2 \

     (( cpi->common.Y2dequant[QIndex][1] *  \

-    ( (cpi->zbin_over_quant / 2) +  \

-       cpi->zbin_mode_boost +  \

+    ( (x->zbin_over_quant / 2) +  \

+       x->zbin_mode_boost +  \

        x->act_zbin_adj ) ) >> 7)

 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)

@@ -702,15 +702,15 @@

         /* save this macroblock QIndex for vp8_update_zbin_extra() */

         x->q_index = QIndex;

-        cpi->last_zbin_over_quant = cpi->zbin_over_quant;

-        cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;

+        x->last_zbin_over_quant = x->zbin_over_quant;

+        x->last_zbin_mode_boost = x->zbin_mode_boost;

         x->last_act_zbin_adj = x->act_zbin_adj;

-    else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant

-            || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost

+    else if(x->last_zbin_over_quant != x->zbin_over_quant

+            || x->last_zbin_mode_boost != x->zbin_mode_boost

             || x->last_act_zbin_adj != x->act_zbin_adj)

         /* Y */

@@ -729,8 +729,8 @@

         zbin_extra = ZBIN_EXTRA_Y2;

         x->block[24].zbin_extra = (short)zbin_extra;

-        cpi->last_zbin_over_quant = cpi->zbin_over_quant;

-        cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;

+        x->last_zbin_over_quant = x->zbin_over_quant;

+        x->last_zbin_mode_boost = x->zbin_mode_boost;

         x->last_act_zbin_adj = x->act_zbin_adj;

@@ -764,7 +764,7 @@

 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)

     /* Clear Zbin mode boost for default case */

-    cpi->zbin_mode_boost = 0;

+    cpi->mb.zbin_mode_boost = 0;

     /* MB level quantizer setup */

     vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);

--- a/vp8/encoder/ratectrl.c

+++ b/vp8/encoder/ratectrl.c

@@ -242,8 +242,8 @@

     vp8_copy(cc->ymode_prob,   cpi->common.fc.ymode_prob);

     vp8_copy(cc->uv_mode_prob,  cpi->common.fc.uv_mode_prob);

-    vp8_copy(cc->ymode_count, cpi->ymode_count);

-    vp8_copy(cc->uv_mode_count, cpi->uv_mode_count);

+    vp8_copy(cc->ymode_count, cpi->mb.ymode_count);

+    vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count);

     /* Stats */

@@ -280,8 +280,8 @@

     vp8_copy(cpi->common.fc.ymode_prob,   cc->ymode_prob);

     vp8_copy(cpi->common.fc.uv_mode_prob,  cc->uv_mode_prob);

-    vp8_copy(cpi->ymode_count, cc->ymode_count);

-    vp8_copy(cpi->uv_mode_count, cc->uv_mode_count);

+    vp8_copy(cpi->mb.ymode_count, cc->ymode_count);

+    vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count);

     /* Stats */

 #ifdef MODE_STATS

@@ -1109,7 +1109,9 @@

     else

-        if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)

+        if (cpi->oxcf.number_of_layers == 1 &&

+           (cpi->common.refresh_alt_ref_frame ||

+            cpi->common.refresh_golden_frame))

             rate_correction_factor = cpi->gf_rate_correction_factor;

         else

             rate_correction_factor = cpi->rate_correction_factor;

@@ -1122,9 +1124,9 @@

     projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS));

     /* Make some allowance for cpi->zbin_over_quant */

-    if (cpi->zbin_over_quant > 0)

+    if (cpi->mb.zbin_over_quant > 0)

-        int Z = cpi->zbin_over_quant;

+        int Z = cpi->mb.zbin_over_quant;

         double Factor = 0.99;

         double factor_adjustment = 0.01 / 256.0;

@@ -1186,7 +1188,9 @@

         cpi->key_frame_rate_correction_factor = rate_correction_factor;

     else

-        if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)

+        if (cpi->oxcf.number_of_layers == 1 &&

+           (cpi->common.refresh_alt_ref_frame ||

+            cpi->common.refresh_golden_frame))

             cpi->gf_rate_correction_factor = rate_correction_factor;

         else

             cpi->rate_correction_factor = rate_correction_factor;

@@ -1199,7 +1203,7 @@

     int Q = cpi->active_worst_quality;

     /* Reset Zbin OQ value */

-    cpi->zbin_over_quant = 0;

+    cpi->mb.zbin_over_quant = 0;

     if (cpi->oxcf.fixed_q >= 0)

@@ -1209,11 +1213,13 @@

             Q = cpi->oxcf.key_q;

-        else if (cpi->common.refresh_alt_ref_frame)

+        else if (cpi->oxcf.number_of_layers == 1 &&

+            cpi->common.refresh_alt_ref_frame)

             Q = cpi->oxcf.alt_q;

-        else if (cpi->common.refresh_golden_frame)

+        else if (cpi->oxcf.number_of_layers == 1  &&

+            cpi->common.refresh_golden_frame)

             Q = cpi->oxcf.gold_q;

@@ -1232,7 +1238,9 @@

             correction_factor = cpi->key_frame_rate_correction_factor;

         else

-            if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)

+            if (cpi->oxcf.number_of_layers == 1 &&

+               (cpi->common.refresh_alt_ref_frame ||

+                cpi->common.refresh_golden_frame))

                 correction_factor = cpi->gf_rate_correction_factor;

             else

                 correction_factor = cpi->rate_correction_factor;

@@ -1281,7 +1289,10 @@

             if (cpi->common.frame_type == KEY_FRAME)

                 zbin_oqmax = 0;

-            else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))

+            else if (cpi->oxcf.number_of_layers == 1 &&

+                (cpi->common.refresh_alt_ref_frame ||

+                (cpi->common.refresh_golden_frame &&

+                 !cpi->source_alt_ref_active)))

                 zbin_oqmax = 16;

             else

                 zbin_oqmax = ZBIN_OQ_MAX;

@@ -1307,12 +1318,12 @@

              * normal maximum by expanding the zero bin and hence

              * decreasing the number of low magnitude non zero coefficients.

*/

-            while (cpi->zbin_over_quant < zbin_oqmax)

+            while (cpi->mb.zbin_over_quant < zbin_oqmax)

-                cpi->zbin_over_quant ++;

+                cpi->mb.zbin_over_quant ++;

-                if (cpi->zbin_over_quant > zbin_oqmax)

-                    cpi->zbin_over_quant = zbin_oqmax;

+                if (cpi->mb.zbin_over_quant > zbin_oqmax)

+                    cpi->mb.zbin_over_quant = zbin_oqmax;

                 /* Adjust bits_per_mb_at_this_q estimate */

                 bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -223,7 +223,7 @@

     cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];

-void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)

+void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)

     int q;

     int i;

@@ -238,15 +238,15 @@

     cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));

     /* Extend rate multiplier along side quantizer zbin increases */

-    if (cpi->zbin_over_quant  > 0)

+    if (cpi->mb.zbin_over_quant  > 0)

         double oq_factor;

         double modq;

         /* Experimental code using the same basic equation as used for Q above

-         * The units of cpi->zbin_over_quant are 1/128 of Q bin size

+         * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size

*/

-        oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);

+        oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);

         modq = (int)((double)capped_q * oq_factor);

         cpi->RDMULT = (int)(rdconst * (modq * modq));

@@ -265,6 +265,11 @@

     vp8_set_speed_features(cpi);

+    for (i = 0; i < MAX_MODES; i++)

+    {

+        x->mode_test_hit_counts[i] = 0;

+    }

     q = (int)pow(Qvalue, 1.25);

     if (q < 8)

@@ -279,14 +284,14 @@

             if (cpi->sf.thresh_mult[i] < INT_MAX)

-                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;

+                x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;

             else

-                cpi->rd_threshes[i] = INT_MAX;

+                x->rd_threshes[i] = INT_MAX;

-            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];

+            cpi->rd_baseline_thresh[i] = x->rd_threshes[i];

     else

@@ -297,14 +302,14 @@

             if (cpi->sf.thresh_mult[i] < (INT_MAX / q))

-                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;

+                x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;

             else

-                cpi->rd_threshes[i] = INT_MAX;

+                x->rd_threshes[i] = INT_MAX;

-            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];

+            cpi->rd_baseline_thresh[i] = x->rd_threshes[i];

@@ -625,7 +630,6 @@

     d[12] = p[12];

 static int rd_pick_intra4x4block(

-    VP8_COMP *cpi,

     MACROBLOCK *x,

     BLOCK *be,

     BLOCKD *b,

@@ -701,7 +705,7 @@

     return best_rd;

-static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,

+static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,

                                      int *rate_y, int *Distortion, int best_rd)

     MACROBLOCKD *const xd = &mb->e_mbd;

@@ -741,7 +745,7 @@

         total_rd += rd_pick_intra4x4block(

-            cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,

+            mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,

             ta + vp8_block2above[i],

             tl + vp8_block2left[i], &r, &ry, &d);

@@ -766,8 +770,7 @@

-static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,

-                                      MACROBLOCK *x,

+static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,

                                       int *Rate,

                                       int *rate_y,

                                       int *Distortion)

@@ -869,7 +872,8 @@

     return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);

-static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)

+static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,

+                                    int *rate_tokenonly, int *distortion)

     MB_PREDICTION_MODE mode;

     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);

@@ -1739,9 +1743,9 @@

             if (x->partition_info->bmi[i].mode == NEW4X4)

-                cpi->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row

+                x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row

                                           - best_ref_mv->as_mv.row) >> 1)]++;

-                cpi->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col

+                x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col

                                           - best_ref_mv->as_mv.col) >> 1)]++;

@@ -1748,9 +1752,9 @@

     else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)

-        cpi->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row

+        x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row

                                           - best_ref_mv->as_mv.row) >> 1)]++;

-        cpi->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col

+        x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col

                                           - best_ref_mv->as_mv.col) >> 1)]++;

@@ -2011,7 +2015,7 @@

     *returnintra = INT_MAX;

     /* Count of the number of MBs tested so far this frame */

-    cpi->mbs_tested_so_far++;

+    x->mbs_tested_so_far++;

     x->skip = 0;

@@ -2023,7 +2027,7 @@

         int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];

         /* Test best rd so far against threshold for trying this mode. */

-        if (best_mode.rd <= cpi->rd_threshes[mode_index])

+        if (best_mode.rd <= x->rd_threshes[mode_index])

             continue;

         if (this_ref_frame < 0)

@@ -2069,19 +2073,21 @@

          * max If so then prevent it from being tested and increase the

          * threshold for its testing

*/

-        if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))

+        if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))

-            if (cpi->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])

+            if (x->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])

                 /* Increase the threshold for coding this mode to make it

                  * less likely to be chosen

*/

-                cpi->rd_thresh_mult[mode_index] += 4;

+                x->rd_thresh_mult[mode_index] += 4;

-                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

-                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

+                if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

+                    x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

-                cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];

+                x->rd_threshes[mode_index] =

+                    (cpi->rd_baseline_thresh[mode_index] >> 7) *

+                    x->rd_thresh_mult[mode_index];

                 continue;

@@ -2091,28 +2097,28 @@

          * current mode so increment the counter for the number of times

          * it has been tested

*/

-        cpi->mode_test_hit_counts[mode_index] ++;

+        x->mode_test_hit_counts[mode_index] ++;

         /* Experimental code. Special case for gf and arf zeromv modes.

          * Increase zbin size to supress noise

*/

-        if (cpi->zbin_mode_boost_enabled)

+        if (x->zbin_mode_boost_enabled)

             if ( this_ref_frame == INTRA_FRAME )

-                cpi->zbin_mode_boost = 0;

+                x->zbin_mode_boost = 0;

             else

                 if (vp8_mode_order[mode_index] == ZEROMV)

                     if (this_ref_frame != LAST_FRAME)

-                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;

+                        x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;

                     else

-                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;

+                        x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;

                 else if (vp8_mode_order[mode_index] == SPLITMV)

-                    cpi->zbin_mode_boost = 0;

+                    x->zbin_mode_boost = 0;

                 else

-                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;

+                    x->zbin_mode_boost = MV_ZBIN_BOOST;

             vp8_update_zbin_extra(cpi, x);

@@ -2120,7 +2126,7 @@

         if(!uv_intra_done && this_ref_frame == INTRA_FRAME)

-            rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate,

+            rd_pick_intra_mbuv_mode(x, &uv_intra_rate,

                                     &uv_intra_rate_tokenonly,

                                     &uv_intra_distortion);

             uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;

@@ -2146,7 +2152,7 @@

              * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]

*/

             int distortion;

-            tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rd.rate_y, &distortion, best_mode.yrd);

+            tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);

             rd.rate2 += rate;

             rd.distortion2 += distortion;

@@ -2171,8 +2177,10 @@

             int this_rd_thresh;

             int distortion;

-            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? cpi->rd_threshes[THR_NEW1] : cpi->rd_threshes[THR_NEW3];

-            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? cpi->rd_threshes[THR_NEW2] : this_rd_thresh;

+            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?

+                x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];

+            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?

+                x->rd_threshes[THR_NEW2] : this_rd_thresh;

             tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,

                                                      best_mode.yrd, mdcounts,

@@ -2465,8 +2473,9 @@

             /* Testing this mode gave rise to an improvement in best error

              * score. Lower threshold a bit for next time

*/

-            cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;

-            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];

+            x->rd_thresh_mult[mode_index] =

+                (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?

+                    x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;

         /* If the mode did not help improve the best error case then raise

@@ -2474,13 +2483,14 @@

*/

         else

-            cpi->rd_thresh_mult[mode_index] += 4;

+            x->rd_thresh_mult[mode_index] += 4;

-            if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

-                cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

-            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];

+            if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)

+                x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;

+        x->rd_threshes[mode_index] =

+            (cpi->rd_baseline_thresh[mode_index] >> 7) *

+                x->rd_thresh_mult[mode_index];

         if (x->skip)

             break;

@@ -2490,10 +2500,16 @@

     /* Reduce the activation RD thresholds for the best choice mode */

     if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))

-        int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);

+        int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);

-        cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;

-        cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];

+        x->rd_thresh_mult[best_mode_index] =

+            (x->rd_thresh_mult[best_mode_index] >=

+                (MIN_THRESHMULT + best_adjustment)) ?

+                    x->rd_thresh_mult[best_mode_index] - best_adjustment :

+                    MIN_THRESHMULT;

+        x->rd_threshes[best_mode_index] =

+            (cpi->rd_baseline_thresh[best_mode_index] >> 7) *

+                x->rd_thresh_mult[best_mode_index];

     /* Note how often each mode chosen as best */

@@ -2595,7 +2611,7 @@

     rd_update_mvcount(cpi, x, &best_ref_mv);

-void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)

+void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)

     int error4x4, error16x16;

     int rate4x4, rate16x16 = 0, rateuv;

@@ -2607,15 +2623,13 @@

     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;

-    rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);

+    rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);

     rate = rateuv;

-    error16x16 = rd_pick_intra16x16mby_mode(cpi, x,

-                                            &rate16x16, &rate16x16_tokenonly,

+    error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,

                                             &dist16x16);

-    error4x4 = rd_pick_intra4x4mby_modes(cpi, x,

-                                         &rate4x4, &rate4x4_tokenonly,

+    error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,

                                          &dist4x4, error16x16);

     if (error4x4 < error16x16)

--- a/vp8/encoder/rdopt.h

+++ b/vp8/encoder/rdopt.h

@@ -65,9 +65,9 @@

-extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);

+extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue);

 extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);

-extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);

+extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);

 static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb,

--- a/vp8/encoder/tokenize.c

+++ b/vp8/encoder/tokenize.c

@@ -23,7 +23,7 @@

 #ifdef ENTROPY_STATS

 _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];

 #endif

-void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;

+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;

 void vp8_fix_contexts(MACROBLOCKD *x);

 #include "dct_value_tokens.h"

@@ -102,11 +102,12 @@

 static void tokenize2nd_order_b

-    MACROBLOCKD *x,

+    MACROBLOCK *x,

     TOKENEXTRA **tp,

     VP8_COMP *cpi

+    MACROBLOCKD *xd = &x->e_mbd;

     int pt;             /* near block/prev token context index */

     int c;              /* start at DC */

     TOKENEXTRA *t = *tp;/* store tokens starting here */

@@ -117,11 +118,11 @@

     int band, rc, v, token;

     int eob;

-    b = x->block + 24;

+    b = xd->block + 24;

     qcoeff_ptr = b->qcoeff;

-    a = (ENTROPY_CONTEXT *)x->above_context + 8;

-    l = (ENTROPY_CONTEXT *)x->left_context + 8;

-    eob = x->eobs[24];

+    a = (ENTROPY_CONTEXT *)xd->above_context + 8;

+    l = (ENTROPY_CONTEXT *)xd->left_context + 8;

+    eob = xd->eobs[24];

     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);

     if(!eob)

@@ -131,7 +132,7 @@

         t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];

         t->skip_eob_node = 0;

-        ++cpi->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];

+        ++x->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];

         t++;

         *tp = t;

         *a = *l = 0;

@@ -145,7 +146,7 @@

     t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];

     t->skip_eob_node = 0;

-    ++cpi->coef_counts       [1] [0] [pt] [token];

+    ++x->coef_counts       [1] [0] [pt] [token];

     pt = vp8_prev_token_class[token];

     t++;

     c = 1;

@@ -164,7 +165,7 @@

         t->skip_eob_node = ((pt == 0));

-        ++cpi->coef_counts       [1] [band] [pt] [token];

+        ++x->coef_counts       [1] [band] [pt] [token];

         pt = vp8_prev_token_class[token];

         t++;

@@ -177,7 +178,7 @@

         t->skip_eob_node = 0;

-        ++cpi->coef_counts       [1] [band] [pt] [DCT_EOB_TOKEN];

+        ++x->coef_counts       [1] [band] [pt] [DCT_EOB_TOKEN];

         t++;

@@ -189,12 +190,13 @@

 static void tokenize1st_order_b

-    MACROBLOCKD *x,

+    MACROBLOCK *x,

     TOKENEXTRA **tp,

     int type,           /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */

     VP8_COMP *cpi

+    MACROBLOCKD *xd = &x->e_mbd;

     unsigned int block;

     const BLOCKD *b;

     int pt;             /* near block/prev token context index */

@@ -207,7 +209,7 @@

     int band, rc, v;

     int tmp1, tmp2;

-    b = x->block;

+    b = xd->block;

     /* Luma */

     for (block = 0; block < 16; block++, b++)

@@ -214,8 +216,8 @@

         tmp1 = vp8_block2above[block];

         tmp2 = vp8_block2left[block];

         qcoeff_ptr = b->qcoeff;

-        a = (ENTROPY_CONTEXT *)x->above_context + tmp1;

-        l = (ENTROPY_CONTEXT *)x->left_context + tmp2;

+        a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;

+        l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;

         VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);

@@ -228,7 +230,7 @@

             t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];

             t->skip_eob_node = 0;

-            ++cpi->coef_counts       [type] [c] [pt] [DCT_EOB_TOKEN];

+            ++x->coef_counts       [type] [c] [pt] [DCT_EOB_TOKEN];

             t++;

             *tp = t;

             *a = *l = 0;

@@ -243,7 +245,7 @@

         t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];

         t->skip_eob_node = 0;

-        ++cpi->coef_counts       [type] [c] [pt] [token];

+        ++x->coef_counts       [type] [c] [pt] [token];

         pt = vp8_prev_token_class[token];

         t++;

         c++;

@@ -261,7 +263,7 @@

             t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];

             t->skip_eob_node = (pt == 0);

-            ++cpi->coef_counts       [type] [band] [pt] [token];

+            ++x->coef_counts       [type] [band] [pt] [token];

             pt = vp8_prev_token_class[token];

             t++;

@@ -273,7 +275,7 @@

             t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];

             t->skip_eob_node = 0;

-            ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];

+            ++x->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];

             t++;

@@ -287,8 +289,8 @@

         tmp1 = vp8_block2above[block];

         tmp2 = vp8_block2left[block];

         qcoeff_ptr = b->qcoeff;

-        a = (ENTROPY_CONTEXT *)x->above_context + tmp1;

-        l = (ENTROPY_CONTEXT *)x->left_context + tmp2;

+        a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;

+        l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;

         VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);

@@ -299,7 +301,7 @@

             t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];

             t->skip_eob_node = 0;

-            ++cpi->coef_counts       [2] [0] [pt] [DCT_EOB_TOKEN];

+            ++x->coef_counts       [2] [0] [pt] [DCT_EOB_TOKEN];

             t++;

             *tp = t;

             *a = *l = 0;

@@ -314,7 +316,7 @@

         t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];

         t->skip_eob_node = 0;

-        ++cpi->coef_counts       [2] [0] [pt] [token];

+        ++x->coef_counts       [2] [0] [pt] [token];

         pt = vp8_prev_token_class[token];

         t++;

         c = 1;

@@ -333,7 +335,7 @@

             t->skip_eob_node = (pt == 0);

-            ++cpi->coef_counts       [2] [band] [pt] [token];

+            ++x->coef_counts       [2] [band] [pt] [token];

             pt = vp8_prev_token_class[token];

             t++;

@@ -346,7 +348,7 @@

             t->skip_eob_node = 0;

-            ++cpi->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];

+            ++x->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];

             t++;

@@ -374,16 +376,18 @@

-void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)

+void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)

+    MACROBLOCKD *xd = &x->e_mbd;

     int plane_type;

     int has_y2_block;

-    has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED

-                    && x->mode_info_context->mbmi.mode != SPLITMV);

+    has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED

+                    && xd->mode_info_context->mbmi.mode != SPLITMV);

-    x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block);

-    if (x->mode_info_context->mbmi.mb_skip_coeff)

+    xd->mode_info_context->mbmi.mb_skip_coeff =

+        mb_is_skippable(xd, has_y2_block);

+    if (xd->mode_info_context->mbmi.mb_skip_coeff)

         if (!cpi->common.mb_no_coeff_skip)

@@ -391,8 +395,8 @@

         else

-            vp8_fix_contexts(x);

-            cpi->skip_true_count++;

+            vp8_fix_contexts(xd);

+            x->skip_true_count++;

         return;

@@ -488,7 +492,8 @@

     TOKENEXTRA **tp,

     ENTROPY_CONTEXT *a,

     ENTROPY_CONTEXT *l,

-    VP8_COMP *cpi

+    VP8_COMP *cpi,

+    MACROBLOCK *x

     int pt; /* near block/prev token context index */

@@ -498,13 +503,12 @@

     t->Token = DCT_EOB_TOKEN;

     t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];

     t->skip_eob_node = 0;

-    ++cpi->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];

+    ++x->coef_counts       [1] [0] [pt] [DCT_EOB_TOKEN];

     ++t;

     *tp = t;

     pt = 0;

     *a = *l = pt;

 static void stuff1st_order_b

@@ -513,7 +517,8 @@

     ENTROPY_CONTEXT *a,

     ENTROPY_CONTEXT *l,

     int type,

-    VP8_COMP *cpi

+    VP8_COMP *cpi,

+    MACROBLOCK *x

     int pt; /* near block/prev token context index */

@@ -524,13 +529,13 @@

     t->Token = DCT_EOB_TOKEN;

     t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];

     t->skip_eob_node = 0;

-    ++cpi->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];

+    ++x->coef_counts       [type] [band] [pt] [DCT_EOB_TOKEN];

     ++t;

     *tp = t;

     pt = 0; /* 0 <-> all coeff data is zero */

     *a = *l = pt;

 static

 void stuff1st_order_buv

@@ -537,7 +542,8 @@

     TOKENEXTRA **tp,

     ENTROPY_CONTEXT *a,

     ENTROPY_CONTEXT *l,

-    VP8_COMP *cpi

+    VP8_COMP *cpi,

+    MACROBLOCK *x

     int pt; /* near block/prev token context index */

@@ -547,26 +553,26 @@

     t->Token = DCT_EOB_TOKEN;

     t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];

     t->skip_eob_node = 0;

-    ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];

+    ++x->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];

     ++t;

     *tp = t;

     pt = 0; /* 0 <-> all coeff data is zero */

     *a = *l = pt;

-void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)

+void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)

-    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;

-    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;

+    MACROBLOCKD *xd = &x->e_mbd;

+    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context;

+    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context;

     int plane_type;

     int b;

     plane_type = 3;

-    if((x->mode_info_context->mbmi.mode != B_PRED

-                        && x->mode_info_context->mbmi.mode != SPLITMV))

+    if((xd->mode_info_context->mbmi.mode != B_PRED

+                        && xd->mode_info_context->mbmi.mode != SPLITMV))

         stuff2nd_order_b(t,

-                     A + vp8_block2above[24], L + vp8_block2left[24], cpi);

+                     A + vp8_block2above[24], L + vp8_block2left[24], cpi, x);

         plane_type = 0;

@@ -573,12 +579,12 @@

     for (b = 0; b < 16; b++)

         stuff1st_order_b(t,

                          A + vp8_block2above[b],

-                         L + vp8_block2left[b], plane_type, cpi);

+                         L + vp8_block2left[b], plane_type, cpi, x);

     for (b = 16; b < 24; b++)

         stuff1st_order_buv(t,

                            A + vp8_block2above[b],

-                           L + vp8_block2left[b], cpi);

+                           L + vp8_block2left[b], cpi, x);

 void vp8_fix_contexts(MACROBLOCKD *x)

--- a/vp8/encoder/x86/denoising_sse2.c

+++ b/vp8/encoder/x86/denoising_sse2.c

@@ -15,6 +15,7 @@

 #include "vp8_rtcd.h"

 #include <emmintrin.h>

+#include "vpx_ports/emmintrin_compat.h"

 union sum_union {

     __m128i v;

--- a/vp8/vp8cx.mk

+++ b/vp8/vp8cx.mk

@@ -20,16 +20,9 @@

   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk

 endif

-VP8_CX_SRCS-yes += vp8_cx_iface.c

+VP8_CX_SRCS-yes += vp8cx.mk

-# encoder

-#INCLUDES += algo/vpx_common/vpx_mem/include

-#INCLUDES += common

-#INCLUDES += common

-#INCLUDES += common

-#INCLUDES += algo/vpx_ref/cpu_id/include

-#INCLUDES += common

-#INCLUDES += encoder

+VP8_CX_SRCS-yes += vp8_cx_iface.c

 VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c

 VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h

--- a/vp8/vp8cx_arm.mk

+++ b/vp8/vp8cx_arm.mk

@@ -9,7 +9,7 @@

##

-#VP8_CX_SRCS list is modified according to different platforms.

+VP8_CX_SRCS-$(ARCH_ARM)  += vp8cx_arm.mk

 #File list for arm

 # encoder

--- a/vp8/vp8dx.mk

+++ b/vp8/vp8dx.mk

@@ -16,6 +16,8 @@

 VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)

 VP8_DX_SRCS_REMOVE-no  += $(VP8_COMMON_SRCS_REMOVE-no)

+VP8_DX_SRCS-yes += vp8dx.mk

 VP8_DX_SRCS-yes += vp8_dx_iface.c

 VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c

--- a/vp9/common/generic/vp9_systemdependent.c

+++ b/vp9/common/generic/vp9_systemdependent.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9_rtcd.h"

 #include "vp9/common/vp9_subpixel.h"

 #include "vp9/common/vp9_loopfilter.h"

--- a/vp9/common/vp9_alloccommon.c

+++ b/vp9/common/vp9_alloccommon.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/common/vp9_blockd.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vp9/common/vp9_onyxc_int.h"

--- a/vp9/common/vp9_blockd.h

+++ b/vp9/common/vp9_blockd.h

@@ -14,7 +14,7 @@

 void vpx_log(const char *format, ...);

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_scale/yv12config.h"

 #include "vp9/common/vp9_mv.h"

 #include "vp9/common/vp9_treecoder.h"

--- a/vp9/common/vp9_idctllm.c

+++ b/vp9/common/vp9_idctllm.c

@@ -24,7 +24,7 @@

  **************************************************************************/

 #include <assert.h>

 #include <math.h>

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/common/vp9_systemdependent.h"

 #include "vp9/common/vp9_blockd.h"

 #include "vp9/common/vp9_common.h"

@@ -33,60 +33,6 @@

 static const int sinpi8sqrt2      = 35468;

 static const int rounding = 0;

-// TODO: these transforms can be further converted into integer forms

-//       for complexity optimization

-static const float idct_4[16] = {

-  0.500000000000000,   0.653281482438188,   0.500000000000000,   0.270598050073099,

-  0.500000000000000,   0.270598050073099,  -0.500000000000000,  -0.653281482438188,

-  0.500000000000000,  -0.270598050073099,  -0.500000000000000,   0.653281482438188,

-  0.500000000000000,  -0.653281482438188,   0.500000000000000,  -0.270598050073099

-};

-static const float iadst_4[16] = {

-  0.228013428883779,   0.577350269189626,   0.656538502008139,   0.428525073124360,

-  0.428525073124360,   0.577350269189626,  -0.228013428883779,  -0.656538502008139,

-  0.577350269189626,                   0,  -0.577350269189626,   0.577350269189626,

-  0.656538502008139,  -0.577350269189626,   0.428525073124359,  -0.228013428883779

-};

-static const float idct_8[64] = {

-  0.353553390593274,   0.490392640201615,   0.461939766255643,   0.415734806151273,

-  0.353553390593274,   0.277785116509801,   0.191341716182545,   0.097545161008064,

-  0.353553390593274,   0.415734806151273,   0.191341716182545,  -0.097545161008064,

- -0.353553390593274,  -0.490392640201615,  -0.461939766255643,  -0.277785116509801,

-  0.353553390593274,   0.277785116509801,  -0.191341716182545,  -0.490392640201615,

- -0.353553390593274,   0.097545161008064,   0.461939766255643,   0.415734806151273,

-  0.353553390593274,   0.097545161008064,  -0.461939766255643,  -0.277785116509801,

-  0.353553390593274,   0.415734806151273,  -0.191341716182545,  -0.490392640201615,

-  0.353553390593274,  -0.097545161008064,  -0.461939766255643,   0.277785116509801,

-  0.353553390593274,  -0.415734806151273,  -0.191341716182545,   0.490392640201615,

-  0.353553390593274,  -0.277785116509801,  -0.191341716182545,   0.490392640201615,

- -0.353553390593274,  -0.097545161008064,   0.461939766255643,  -0.415734806151273,

-  0.353553390593274,  -0.415734806151273,   0.191341716182545,   0.097545161008064,

- -0.353553390593274,   0.490392640201615,  -0.461939766255643,   0.277785116509801,

-  0.353553390593274,  -0.490392640201615,   0.461939766255643,  -0.415734806151273,

-  0.353553390593274,  -0.277785116509801,   0.191341716182545,  -0.097545161008064

-};

-static const float iadst_8[64] = {

-  0.089131608307533,   0.255357107325376,   0.387095214016349,   0.466553967085785,

-  0.483002021635509,   0.434217976756762,   0.326790388032145,   0.175227946595735,

-  0.175227946595735,   0.434217976756762,   0.466553967085785,   0.255357107325376,

- -0.089131608307533,  -0.387095214016348,  -0.483002021635509,  -0.326790388032145,

-  0.255357107325376,   0.483002021635509,   0.175227946595735,  -0.326790388032145,

- -0.466553967085785,  -0.089131608307533,   0.387095214016349,   0.434217976756762,

-  0.326790388032145,   0.387095214016349,  -0.255357107325376,  -0.434217976756762,

-  0.175227946595735,   0.466553967085786,  -0.089131608307534,  -0.483002021635509,

-  0.387095214016349,   0.175227946595735,  -0.483002021635509,   0.089131608307533,

-  0.434217976756762,  -0.326790388032145,  -0.255357107325377,   0.466553967085785,

-  0.434217976756762,  -0.089131608307533,  -0.326790388032145,   0.483002021635509,

- -0.255357107325376,  -0.175227946595735,   0.466553967085785,  -0.387095214016348,

-  0.466553967085785,  -0.326790388032145,   0.089131608307533,   0.175227946595735,

- -0.387095214016348,   0.483002021635509,  -0.434217976756762,   0.255357107325376,

-  0.483002021635509,  -0.466553967085785,   0.434217976756762,  -0.387095214016348,

-  0.326790388032145,  -0.255357107325375,   0.175227946595736,  -0.089131608307532

-};

 static const int16_t idct_i4[16] = {

   8192,  10703,  8192,   4433,

   8192,   4433, -8192, -10703,

@@ -139,75 +85,7 @@

    5354, -4184,  2871, -1460

};

-static float idct_16[256] = {

-  0.250000,  0.351851,  0.346760,  0.338330,  0.326641,  0.311806,  0.293969,  0.273300,

-  0.250000,  0.224292,  0.196424,  0.166664,  0.135299,  0.102631,  0.068975,  0.034654,

-  0.250000,  0.338330,  0.293969,  0.224292,  0.135299,  0.034654, -0.068975, -0.166664,

- -0.250000, -0.311806, -0.346760, -0.351851, -0.326641, -0.273300, -0.196424, -0.102631,

-  0.250000,  0.311806,  0.196424,  0.034654, -0.135299, -0.273300, -0.346760, -0.338330,

- -0.250000, -0.102631,  0.068975,  0.224292,  0.326641,  0.351851,  0.293969,  0.166664,

-  0.250000,  0.273300,  0.068975, -0.166664, -0.326641, -0.338330, -0.196424,  0.034654,

-  0.250000,  0.351851,  0.293969,  0.102631, -0.135299, -0.311806, -0.346760, -0.224292,

-  0.250000,  0.224292, -0.068975, -0.311806, -0.326641, -0.102631,  0.196424,  0.351851,

-  0.250000, -0.034654, -0.293969, -0.338330, -0.135299,  0.166664,  0.346760,  0.273300,

-  0.250000,  0.166664, -0.196424, -0.351851, -0.135299,  0.224292,  0.346760,  0.102631,

- -0.250000, -0.338330, -0.068975,  0.273300,  0.326641,  0.034654, -0.293969, -0.311806,

-  0.250000,  0.102631, -0.293969, -0.273300,  0.135299,  0.351851,  0.068975, -0.311806,

- -0.250000,  0.166664,  0.346760,  0.034654, -0.326641, -0.224292,  0.196424,  0.338330,

-  0.250000,  0.034654, -0.346760, -0.102631,  0.326641,  0.166664, -0.293969, -0.224292,

-  0.250000,  0.273300, -0.196424, -0.311806,  0.135299,  0.338330, -0.068975, -0.351851,

-  0.250000, -0.034654, -0.346760,  0.102631,  0.326641, -0.166664, -0.293969,  0.224292,

-  0.250000, -0.273300, -0.196424,  0.311806,  0.135299, -0.338330, -0.068975,  0.351851,

-  0.250000, -0.102631, -0.293969,  0.273300,  0.135299, -0.351851,  0.068975,  0.311806,

- -0.250000, -0.166664,  0.346760, -0.034654, -0.326641,  0.224292,  0.196424, -0.338330,

-  0.250000, -0.166664, -0.196424,  0.351851, -0.135299, -0.224292,  0.346760, -0.102631,

- -0.250000,  0.338330, -0.068975, -0.273300,  0.326641, -0.034654, -0.293969,  0.311806,

-  0.250000, -0.224292, -0.068975,  0.311806, -0.326641,  0.102631,  0.196424, -0.351851,

-  0.250000,  0.034654, -0.293969,  0.338330, -0.135299, -0.166664,  0.346760, -0.273300,

-  0.250000, -0.273300,  0.068975,  0.166664, -0.326641,  0.338330, -0.196424, -0.034654,

-  0.250000, -0.351851,  0.293969, -0.102631, -0.135299,  0.311806, -0.346760,  0.224292,

-  0.250000, -0.311806,  0.196424, -0.034654, -0.135299,  0.273300, -0.346760,  0.338330,

- -0.250000,  0.102631,  0.068975, -0.224292,  0.326641, -0.351851,  0.293969, -0.166664,

-  0.250000, -0.338330,  0.293969, -0.224292,  0.135299, -0.034654, -0.068975,  0.166664,

- -0.250000,  0.311806, -0.346760,  0.351851, -0.326641,  0.273300, -0.196424,  0.102631,

-  0.250000, -0.351851,  0.346760, -0.338330,  0.326641, -0.311806,  0.293969, -0.273300,

-  0.250000, -0.224292,  0.196424, -0.166664,  0.135299, -0.102631,  0.068975, -0.034654

-};

-static float iadst_16[256] = {

-  0.033094,  0.098087,  0.159534,  0.215215,  0.263118,  0.301511,  0.329007,  0.344612,

-  0.347761,  0.338341,  0.316693,  0.283599,  0.240255,  0.188227,  0.129396,  0.065889,

-  0.065889,  0.188227,  0.283599,  0.338341,  0.344612,  0.301511,  0.215215,  0.098087,

- -0.033094, -0.159534, -0.263118, -0.329007, -0.347761, -0.316693, -0.240255, -0.129396,

-  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,  0.000000, -0.188227, -0.316693,

- -0.344612, -0.263118, -0.098087,  0.098087,  0.263118,  0.344612,  0.316693,  0.188227,

-  0.129396,  0.316693,  0.329007,  0.159534, -0.098087, -0.301511, -0.338341, -0.188227,

-  0.065889,  0.283599,  0.344612,  0.215215, -0.033094, -0.263118, -0.347761, -0.240255,

-  0.159534,  0.344612,  0.240255, -0.065889, -0.316693, -0.301511, -0.033094,  0.263118,

-  0.338341,  0.129396, -0.188227, -0.347761, -0.215215,  0.098087,  0.329007,  0.283599,

-  0.188227,  0.344612,  0.098087, -0.263118, -0.316693, -0.000000,  0.316693,  0.263118,

- -0.098087, -0.344612, -0.188227,  0.188227,  0.344612,  0.098087, -0.263118, -0.316693,

-  0.215215,  0.316693, -0.065889, -0.347761, -0.098087,  0.301511,  0.240255, -0.188227,

- -0.329007,  0.033094,  0.344612,  0.129396, -0.283599, -0.263118,  0.159534,  0.338341,

-  0.240255,  0.263118, -0.215215, -0.283599,  0.188227,  0.301511, -0.159534, -0.316693,

-  0.129396,  0.329007, -0.098087, -0.338341,  0.065889,  0.344612, -0.033094, -0.347761,

-  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,  0.000000, -0.344612,  0.098087,

-  0.316693, -0.188227, -0.263118,  0.263118,  0.188227, -0.316693, -0.098087,  0.344612,

-  0.283599,  0.098087, -0.347761,  0.129396,  0.263118, -0.301511, -0.065889,  0.344612,

- -0.159534, -0.240255,  0.316693,  0.033094, -0.338341,  0.188227,  0.215215, -0.329007,

-  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000,

- -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,  0.000000, -0.301511,  0.301511,

-  0.316693, -0.098087, -0.188227,  0.344612, -0.263118, -0.000000,  0.263118, -0.344612,

-  0.188227,  0.098087, -0.316693,  0.316693, -0.098087, -0.188227,  0.344612, -0.263118,

-  0.329007, -0.188227, -0.033094,  0.240255, -0.344612,  0.301511, -0.129396, -0.098087,

-  0.283599, -0.347761,  0.263118, -0.065889, -0.159534,  0.316693, -0.338341,  0.215215,

-  0.338341, -0.263118,  0.129396,  0.033094, -0.188227,  0.301511, -0.347761,  0.316693,

- -0.215215,  0.065889,  0.098087, -0.240255,  0.329007, -0.344612,  0.283599, -0.159534,

-  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,  0.000000, -0.098087,  0.188227,

- -0.263118,  0.316693, -0.344612,  0.344612, -0.316693,  0.263118, -0.188227,  0.098087,

-  0.347761, -0.344612,  0.338341, -0.329007,  0.316693, -0.301511,  0.283599, -0.263118,

-  0.240255, -0.215215,  0.188227, -0.159534,  0.129396, -0.098087,  0.065889, -0.033094

-};

 static const int16_t idct_i16[256] = {

    4096,  5765,  5681,  5543,  5352,  5109,  4816,  4478,

@@ -279,125 +157,7 @@

    3936, -3526,  3084, -2614,  2120, -1607,  1080,  -542

};

-void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch,

-                  TX_TYPE tx_type, int tx_dim) {

-  vp9_clear_system_state();  // Make it simd safe : __asm emms;

-  {

-    int i, j, k;

-    float bufa[256], bufb[256];  // buffers are for floating-point test purpose

-                                 // the implementation could be simplified in

-                                 // conjunction with integer transform

-    const int16_t *ip = input;

-    int16_t *op = output;

-    int shortpitch = pitch >> 1;

-    float *pfa = &bufa[0];

-    float *pfb = &bufb[0];

-    // pointers to vertical and horizontal transforms

-    const float *ptv, *pth;

-    assert(tx_type != DCT_DCT);

-    // load and convert residual array into floating-point

-    for(j = 0; j < tx_dim; j++) {

-      for(i = 0; i < tx_dim; i++) {

-        pfa[i] = (float)ip[i];

-      }

-      pfa += tx_dim;

-      ip  += tx_dim;

-    }

-    // vertical transformation

-    pfa = &bufa[0];

-    pfb = &bufb[0];

-    switch(tx_type) {

-      case ADST_ADST :

-      case ADST_DCT  :

-        ptv = (tx_dim == 4) ? &iadst_4[0] :

-                              ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);

-        break;

-      default :

-        ptv = (tx_dim == 4) ? &idct_4[0] :

-                              ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);

-        break;

-    }

-    for(j = 0; j < tx_dim; j++) {

-      for(i = 0; i < tx_dim; i++) {

-        pfb[i] = 0 ;

-        for(k = 0; k < tx_dim; k++) {

-          pfb[i] += ptv[k] * pfa[(k * tx_dim)];

-        }

-        pfa += 1;

-      }

-      pfb += tx_dim;

-      ptv += tx_dim;

-      pfa = &bufa[0];

-    }

-    // horizontal transformation

-    pfa = &bufa[0];

-    pfb = &bufb[0];

-    switch(tx_type) {

-      case ADST_ADST :

-      case  DCT_ADST :

-        pth = (tx_dim == 4) ? &iadst_4[0] :

-                              ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);

-        break;

-      default :

-        pth = (tx_dim == 4) ? &idct_4[0] :

-                              ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);

-        break;

-    }

-    for(j = 0; j < tx_dim; j++) {

-      for(i = 0; i < tx_dim; i++) {

-        pfa[i] = 0;

-        for(k = 0; k < tx_dim; k++) {

-          pfa[i] += pfb[k] * pth[k];

-        }

-        pth += tx_dim;

-       }

-      pfa += tx_dim;

-      pfb += tx_dim;

-      switch(tx_type) {

-        case ADST_ADST :

-        case  DCT_ADST :

-          pth = (tx_dim == 4) ? &iadst_4[0] :

-                                ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);

-          break;

-        default :

-          pth = (tx_dim == 4) ? &idct_4[0] :

-                                ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);

-          break;

-      }

-    }

-    // convert to short integer format and load BLOCKD buffer

-    op  = output;

-    pfa = &bufa[0];

-    for(j = 0; j < tx_dim; j++) {

-      for(i = 0; i < tx_dim; i++) {

-        op[i] = (pfa[i] > 0 ) ? (int16_t)( pfa[i] / 8 + 0.49) :

-                               -(int16_t)( - pfa[i] / 8 + 0.49);

-      }

-      op += shortpitch;

-      pfa += tx_dim;

-    }

-  }

-  vp9_clear_system_state(); // Make it simd safe : __asm emms;

-}

 /* Converted the transforms to integer form. */

 #define HORIZONTAL_SHIFT 14  // 16

 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)

@@ -404,8 +164,9 @@

 #define VERTICAL_SHIFT 17  // 15

 #define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)

 void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,

-                      TX_TYPE tx_type, int tx_dim) {

+                      TX_TYPE tx_type, int tx_dim, uint16_t eobs) {

   int i, j, k;

+  int nz_dim;

   int16_t imbuf[256];

   const int16_t *ip = input;

@@ -444,6 +205,19 @@

       break;

+  nz_dim = tx_dim;

+  if(tx_dim > 4) {

+    if(eobs < 36) {

+      vpx_memset(im, 0, 512);

+      nz_dim = 8;

+      if(eobs < 3) {

+        nz_dim = 2;

+      } else if(eobs < 10) {

+        nz_dim = 4;

+      }

+    }

+  }

   /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps

    * from right to left:

    * 1. horizontal transform: Y= Z*Transposed_M2

@@ -453,10 +227,10 @@

*/

   /* Horizontal transformation */

   for (j = 0; j < tx_dim; j++) {

-    for (i = 0; i < tx_dim; i++) {

+    for (i = 0; i < nz_dim; i++) {

       int temp = 0;

-      for (k = 0; k < tx_dim; k++) {

+      for (k = 0; k < nz_dim; k++) {

         temp += ip[k] * pth[k];

@@ -476,7 +250,7 @@

     for (j = 0; j < tx_dim; j++) {

       int temp = 0;

-      for (k = 0; k < tx_dim; k++) {

+      for (k = 0; k < nz_dim; k++) {

         temp += ptv[k] * im[k];

--- a/vp9/common/vp9_invtrans.c

+++ b/vp9/common/vp9_invtrans.c

@@ -52,7 +52,7 @@

     TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

       vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,

-                   tx_type, 4);

+                   tx_type, 4, xd->block[i].eob);

     } else {

       vp9_inverse_transform_b_4x4(xd, i, 32);

@@ -91,7 +91,8 @@

   for (i = 0; i < 9; i += 8) {

     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8);

+      vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,

+                 xd->block[i].eob);

     } else {

       vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],

                                   &blockd[i].diff[0], 32);

@@ -100,7 +101,8 @@

   for (i = 2; i < 11; i += 8) {

     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8);

+      vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,

+                 xd->block[i + 2].eob);

     } else {

       vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],

                                   &blockd[i].diff[0], 32);

@@ -132,7 +134,7 @@

   BLOCKD *bd = &xd->block[0];

   TX_TYPE tx_type = get_tx_type_16x16(xd, bd);

   if (tx_type != DCT_DCT) {

-    vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16);

+    vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob);

   } else {

     vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],

                                   &xd->block[0].diff[0], 32);

--- a/vp9/common/vp9_invtrans.h

+++ b/vp9/common/vp9_invtrans.h

@@ -11,7 +11,7 @@

 #ifndef VP9_COMMON_VP9_INVTRANS_H_

 #define VP9_COMMON_VP9_INVTRANS_H_

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx/vpx_integer.h"

 #include "vp9/common/vp9_blockd.h"

--- a/vp9/common/vp9_mbpitch.c

+++ b/vp9/common/vp9_mbpitch.c

@@ -39,7 +39,7 @@

   int block;

   uint8_t **y, **u, **v;

-  uint8_t **y2, **u2, **v2;

+  uint8_t **y2 = NULL, **u2 = NULL, **v2 = NULL;

   BLOCKD *blockd = xd->block;

   int stride;

--- a/vp9/common/vp9_postproc.c

+++ b/vp9/common/vp9_postproc.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_scale/yv12config.h"

 #include "vp9/common/vp9_postproc.h"

 #include "vp9/common/vp9_textblit.h"

@@ -32,7 +32,7 @@

     (0.071*(float)(t & 0xff)) + 128)

 /* global constants */

-#if CONFIG_POSTPROC_VISUALIZER

+#if 0 && CONFIG_POSTPROC_VISUALIZER

 static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {

   { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */

   { RGB_TO_YUV(0x00FF00) },   /* Green */

@@ -672,7 +672,7 @@

                         oci->post_proc_buffer.y_stride);

-#if CONFIG_POSTPROC_VISUALIZER

+#if 0 && CONFIG_POSTPROC_VISUALIZER

   if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {

     char message[512];

     sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",

--- a/vp9/common/vp9_recon.c

+++ b/vp9/common/vp9_recon.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9_rtcd.h"

 #include "vp9/common/vp9_blockd.h"

--- a/vp9/common/vp9_reconinter.c

+++ b/vp9/common/vp9_reconinter.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx/vpx_integer.h"

 #include "vp9/common/vp9_blockd.h"

 #include "vp9/common/vp9_reconinter.h"

--- a/vp9/common/vp9_reconintra.c

+++ b/vp9/common/vp9_reconintra.c

@@ -9,7 +9,7 @@

*/

 #include <stdio.h>

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9_rtcd.h"

 #include "vp9/common/vp9_reconintra.h"

 #include "vpx_mem/vpx_mem.h"

--- a/vp9/common/vp9_reconintra4x4.c

+++ b/vp9/common/vp9_reconintra4x4.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vp9/common/vp9_reconintra.h"

 #include "vp9_rtcd.h"

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -42,7 +42,7 @@

 # Dequant

 prototype void vp9_dequantize_b "struct blockd *x"

-specialize vp9_dequantize_b mmx

+specialize vp9_dequantize_b

 prototype void vp9_dequantize_b_2x2 "struct blockd *x"

 specialize vp9_dequantize_b_2x2

@@ -69,13 +69,13 @@

 specialize vp9_dequant_dc_idct_add

 prototype void vp9_dequant_dc_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dcs"

-specialize vp9_dequant_dc_idct_add_y_block mmx

+specialize vp9_dequant_dc_idct_add_y_block

 prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs"

-specialize vp9_dequant_idct_add_y_block mmx

+specialize vp9_dequant_idct_add_y_block

 prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs"

-specialize vp9_dequant_idct_add_uv_block mmx

+specialize vp9_dequant_idct_add_uv_block

 # RECON

@@ -218,6 +218,7 @@

 # post proc

+if [ "$CONFIG_POSTPROC" = "yes" ]; then

 prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit"

 specialize vp9_mbpost_proc_down mmx sse2

 vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm

@@ -233,6 +234,7 @@

 prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"

 specialize vp9_plane_add_noise mmx sse2

 vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt

+fi

 prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"

 specialize vp9_blend_mb_inner

@@ -343,10 +345,10 @@

 # dct

 prototype void vp9_short_idct4x4llm_1 "int16_t *input, int16_t *output, int pitch"

-specialize vp9_short_idct4x4llm_1 mmx

+specialize vp9_short_idct4x4llm_1

 prototype void vp9_short_idct4x4llm "int16_t *input, int16_t *output, int pitch"

-specialize vp9_short_idct4x4llm mmx

+specialize vp9_short_idct4x4llm

 prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch"

 specialize vp9_short_idct8x8

@@ -366,7 +368,7 @@

 prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch"

 specialize vp9_short_idct32x32

-prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim"

+prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"

 specialize vp9_ihtllm

--- a/vp9/common/vp9_systemdependent.h

+++ b/vp9/common/vp9_systemdependent.h

@@ -15,7 +15,7 @@

 #include <math.h>

 #endif

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #if ARCH_X86 || ARCH_X86_64

 void vpx_reset_mmx_state(void);

 #define vp9_clear_system_state() vpx_reset_mmx_state()

--- a/vp9/common/x86/vp9_asm_stubs.c

+++ b/vp9/common/x86/vp9_asm_stubs.c

@@ -9,11 +9,11 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_ports/mem.h"

 #include "vp9/common/vp9_subpixel.h"

-extern const short vp9_six_tap_mmx[16][6 * 8];

+extern const short vp9_six_tap_mmx[8][6 * 8];

 extern void vp9_filter_block1d_h6_mmx(unsigned char   *src_ptr,

                                       unsigned short  *output_ptr,

--- a/vp9/common/x86/vp9_filter_sse2.c

+++ b/vp9/common/x86/vp9_filter_sse2.c

@@ -11,6 +11,7 @@

 #include <assert.h> // for alignment checks

 #include <emmintrin.h> // SSE2

 #include "vp9/common/vp9_filter.h"

+#include "vpx_ports/emmintrin_compat.h"

 #include "vpx_ports/mem.h" // for DECLARE_ALIGNED

 #include "vp9_rtcd.h"

--- a/vp9/common/x86/vp9_idctllm_sse2.asm

+++ b/vp9/common/x86/vp9_idctllm_sse2.asm

@@ -21,7 +21,7 @@

 ;   int blk_stride      - 5

; )

-global sym(vp9_idct_dequant_0_2x_sse2)

+global sym(vp9_idct_dequant_0_2x_sse2) PRIVATE

 sym(vp9_idct_dequant_0_2x_sse2):

     push        rbp

     mov         rbp, rsp

@@ -97,7 +97,7 @@

     pop         rbp

ret

-global sym(vp9_idct_dequant_full_2x_sse2)

+global sym(vp9_idct_dequant_full_2x_sse2) PRIVATE

 sym(vp9_idct_dequant_full_2x_sse2):

     push        rbp

     mov         rbp, rsp

@@ -362,7 +362,7 @@

 ;   int dst_stride      - 4

 ;   short *dc           - 5

; )

-global sym(vp9_idct_dequant_dc_0_2x_sse2)

+global sym(vp9_idct_dequant_dc_0_2x_sse2) PRIVATE

 sym(vp9_idct_dequant_dc_0_2x_sse2):

     push        rbp

     mov         rbp, rsp

@@ -438,7 +438,7 @@

     pop         rbp

ret

-global sym(vp9_idct_dequant_dc_full_2x_sse2)

+global sym(vp9_idct_dequant_dc_full_2x_sse2) PRIVATE

 sym(vp9_idct_dequant_dc_full_2x_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_iwalsh_mmx.asm

+++ b/vp9/common/x86/vp9_iwalsh_mmx.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;void vp9_short_inv_walsh4x4_1_mmx(short *input, short *output)

-global sym(vp9_short_inv_walsh4x4_1_mmx)

+global sym(vp9_short_inv_walsh4x4_1_mmx) PRIVATE

 sym(vp9_short_inv_walsh4x4_1_mmx):

     push        rbp

     mov         rbp, rsp

@@ -48,7 +48,7 @@

ret

 ;void vp9_short_inv_walsh4x4_mmx(short *input, short *output)

-global sym(vp9_short_inv_walsh4x4_mmx)

+global sym(vp9_short_inv_walsh4x4_mmx) PRIVATE

 sym(vp9_short_inv_walsh4x4_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_iwalsh_sse2.asm

+++ b/vp9/common/x86/vp9_iwalsh_sse2.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;void vp9_short_inv_walsh4x4_sse2(short *input, short *output)

-global sym(vp9_short_inv_walsh4x4_sse2)

+global sym(vp9_short_inv_walsh4x4_sse2) PRIVATE

 sym(vp9_short_inv_walsh4x4_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_loopfilter_mmx.asm

+++ b/vp9/common/x86/vp9_loopfilter_mmx.asm

@@ -21,7 +21,7 @@

 ;    const char *thresh,

 ;    int  count

;)

-global sym(vp9_loop_filter_horizontal_edge_mmx)

+global sym(vp9_loop_filter_horizontal_edge_mmx) PRIVATE

 sym(vp9_loop_filter_horizontal_edge_mmx):

     push        rbp

     mov         rbp, rsp

@@ -233,7 +233,7 @@

 ;    const char *thresh,

 ;    int count

;)

-global sym(vp9_loop_filter_vertical_edge_mmx)

+global sym(vp9_loop_filter_vertical_edge_mmx) PRIVATE

 sym(vp9_loop_filter_vertical_edge_mmx):

     push        rbp

     mov         rbp, rsp

@@ -600,7 +600,7 @@

 ;    int  src_pixel_step,

 ;    const char *blimit

;)

-global sym(vp9_loop_filter_simple_horizontal_edge_mmx)

+global sym(vp9_loop_filter_simple_horizontal_edge_mmx) PRIVATE

 sym(vp9_loop_filter_simple_horizontal_edge_mmx):

     push        rbp

     mov         rbp, rsp

@@ -716,7 +716,7 @@

 ;    int  src_pixel_step,

 ;    const char *blimit

;)

-global sym(vp9_loop_filter_simple_vertical_edge_mmx)

+global sym(vp9_loop_filter_simple_vertical_edge_mmx) PRIVATE

 sym(vp9_loop_filter_simple_vertical_edge_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_loopfilter_sse2.asm

+++ b/vp9/common/x86/vp9_loopfilter_sse2.asm

@@ -281,7 +281,7 @@

 ;    const char    *thresh,

 ;    int            count

;)

-global sym(vp9_loop_filter_horizontal_edge_sse2)

+global sym(vp9_loop_filter_horizontal_edge_sse2) PRIVATE

 sym(vp9_loop_filter_horizontal_edge_sse2):

     push        rbp

     mov         rbp, rsp

@@ -331,7 +331,7 @@

 ;    const char    *thresh,

 ;    int            count

;)

-global sym(vp9_loop_filter_horizontal_edge_uv_sse2)

+global sym(vp9_loop_filter_horizontal_edge_uv_sse2) PRIVATE

 sym(vp9_loop_filter_horizontal_edge_uv_sse2):

     push        rbp

     mov         rbp, rsp

@@ -719,7 +719,7 @@

 ;    const char    *thresh,

 ;    int            count

;)

-global sym(vp9_loop_filter_vertical_edge_sse2)

+global sym(vp9_loop_filter_vertical_edge_sse2) PRIVATE

 sym(vp9_loop_filter_vertical_edge_sse2):

     push        rbp

     mov         rbp, rsp

@@ -786,7 +786,7 @@

 ;    const char    *thresh,

 ;    unsigned char *v

;)

-global sym(vp9_loop_filter_vertical_edge_uv_sse2)

+global sym(vp9_loop_filter_vertical_edge_uv_sse2) PRIVATE

 sym(vp9_loop_filter_vertical_edge_uv_sse2):

     push        rbp

     mov         rbp, rsp

@@ -851,7 +851,7 @@

 ;    int  src_pixel_step,

 ;    const char *blimit,

;)

-global sym(vp9_loop_filter_simple_horizontal_edge_sse2)

+global sym(vp9_loop_filter_simple_horizontal_edge_sse2) PRIVATE

 sym(vp9_loop_filter_simple_horizontal_edge_sse2):

     push        rbp

     mov         rbp, rsp

@@ -960,7 +960,7 @@

 ;    int  src_pixel_step,

 ;    const char *blimit,

;)

-global sym(vp9_loop_filter_simple_vertical_edge_sse2)

+global sym(vp9_loop_filter_simple_vertical_edge_sse2) PRIVATE

 sym(vp9_loop_filter_simple_vertical_edge_sse2):

     push        rbp         ; save old base pointer value.

     mov         rbp, rsp    ; set new base pointer value.

--- a/vp9/common/x86/vp9_loopfilter_x86.c

+++ b/vp9/common/x86/vp9_loopfilter_x86.c

@@ -11,6 +11,7 @@

 #include <emmintrin.h>  // SSE2

 #include "vpx_config.h"

 #include "vp9/common/vp9_loopfilter.h"

+#include "vpx_ports/emmintrin_compat.h"

 prototype_loopfilter(vp9_loop_filter_vertical_edge_mmx);

 prototype_loopfilter(vp9_loop_filter_horizontal_edge_mmx);

--- a/vp9/common/x86/vp9_mask_sse3.asm

+++ b/vp9/common/x86/vp9_mask_sse3.asm

@@ -25,7 +25,7 @@

 ;    int yt,

 ;    int ut,

 ;    int vt)

-global sym(vp8_makemask_sse3)

+global sym(vp8_makemask_sse3) PRIVATE

 sym(vp8_makemask_sse3):

     push        rbp

     mov         rbp, rsp

@@ -181,7 +181,7 @@

 ;void int vp8_growmaskmb_sse3(

 ;    unsigned char *om,

 ;    unsigned char *nm,

-global sym(vp8_growmaskmb_sse3)

+global sym(vp8_growmaskmb_sse3) PRIVATE

 sym(vp8_growmaskmb_sse3):

     push        rbp

     mov         rbp, rsp

@@ -234,7 +234,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    unsigned char *mask)

-global sym(vp8_sad16x16_masked_wmt)

+global sym(vp8_sad16x16_masked_wmt) PRIVATE

 sym(vp8_sad16x16_masked_wmt):

     push        rbp

     mov         rbp, rsp

@@ -288,7 +288,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    unsigned char *mask)

-global sym(vp8_sad16x16_unmasked_wmt)

+global sym(vp8_sad16x16_unmasked_wmt) PRIVATE

 sym(vp8_sad16x16_unmasked_wmt):

     push        rbp

     mov         rbp, rsp

@@ -343,7 +343,7 @@

 ;    unsigned char *dst_ptr,

 ;    int  dst_stride,

 ;    unsigned char *mask)

-global sym(vp8_masked_predictor_wmt)

+global sym(vp8_masked_predictor_wmt) PRIVATE

 sym(vp8_masked_predictor_wmt):

     push        rbp

     mov         rbp, rsp

@@ -395,7 +395,7 @@

 ;    unsigned char *dst_ptr,

 ;    int  dst_stride,

 ;    unsigned char *mask)

-global sym(vp8_masked_predictor_uv_wmt)

+global sym(vp8_masked_predictor_uv_wmt) PRIVATE

 sym(vp8_masked_predictor_uv_wmt):

     push        rbp

     mov         rbp, rsp

@@ -444,7 +444,7 @@

 ;unsigned int vp8_uv_from_y_mask(

 ;    unsigned char *ymask,

 ;    unsigned char *uvmask)

-global sym(vp8_uv_from_y_mask)

+global sym(vp8_uv_from_y_mask) PRIVATE

 sym(vp8_uv_from_y_mask):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_postproc_mmx.asm

+++ b/vp9/common/x86/vp9_postproc_mmx.asm

@@ -24,7 +24,7 @@

 ;    int cols,

 ;    int flimit

;)

-global sym(vp9_post_proc_down_and_across_mmx)

+global sym(vp9_post_proc_down_and_across_mmx) PRIVATE

 sym(vp9_post_proc_down_and_across_mmx):

     push        rbp

     mov         rbp, rsp

@@ -265,7 +265,7 @@

 ;void vp9_mbpost_proc_down_mmx(unsigned char *dst,

 ;                             int pitch, int rows, int cols,int flimit)

 extern sym(vp9_rv)

-global sym(vp9_mbpost_proc_down_mmx)

+global sym(vp9_mbpost_proc_down_mmx) PRIVATE

 sym(vp9_mbpost_proc_down_mmx):

     push        rbp

     mov         rbp, rsp

@@ -465,7 +465,7 @@

 ;                            unsigned char bothclamp[16],

 ;                            unsigned int Width, unsigned int Height, int Pitch)

 extern sym(rand)

-global sym(vp9_plane_add_noise_mmx)

+global sym(vp9_plane_add_noise_mmx) PRIVATE

 sym(vp9_plane_add_noise_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_postproc_sse2.asm

+++ b/vp9/common/x86/vp9_postproc_sse2.asm

@@ -21,7 +21,7 @@

 ;    int cols,

 ;    int flimit

;)

-global sym(vp9_post_proc_down_and_across_xmm)

+global sym(vp9_post_proc_down_and_across_xmm) PRIVATE

 sym(vp9_post_proc_down_and_across_xmm):

     push        rbp

     mov         rbp, rsp

@@ -251,7 +251,7 @@

 ;void vp9_mbpost_proc_down_xmm(unsigned char *dst,

 ;                            int pitch, int rows, int cols,int flimit)

 extern sym(vp9_rv)

-global sym(vp9_mbpost_proc_down_xmm)

+global sym(vp9_mbpost_proc_down_xmm) PRIVATE

 sym(vp9_mbpost_proc_down_xmm):

     push        rbp

     mov         rbp, rsp

@@ -451,7 +451,7 @@

 ;void vp9_mbpost_proc_across_ip_xmm(unsigned char *src,

 ;                                int pitch, int rows, int cols,int flimit)

-global sym(vp9_mbpost_proc_across_ip_xmm)

+global sym(vp9_mbpost_proc_across_ip_xmm) PRIVATE

 sym(vp9_mbpost_proc_across_ip_xmm):

     push        rbp

     mov         rbp, rsp

@@ -630,7 +630,7 @@

 ;                            unsigned char bothclamp[16],

 ;                            unsigned int Width, unsigned int Height, int Pitch)

 extern sym(rand)

-global sym(vp9_plane_add_noise_wmt)

+global sym(vp9_plane_add_noise_wmt) PRIVATE

 sym(vp9_plane_add_noise_wmt):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_recon_mmx.asm

+++ b/vp9/common/x86/vp9_recon_mmx.asm

@@ -11,7 +11,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;void vp9_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride)

-global sym(vp9_recon_b_mmx)

+global sym(vp9_recon_b_mmx) PRIVATE

 sym(vp9_recon_b_mmx):

     push        rbp

     mov         rbp, rsp

@@ -65,7 +65,7 @@

 ;    unsigned char *dst,

 ;    int dst_stride

 ;    )

-global sym(vp9_copy_mem8x8_mmx)

+global sym(vp9_copy_mem8x8_mmx) PRIVATE

 sym(vp9_copy_mem8x8_mmx):

     push        rbp

     mov         rbp, rsp

@@ -128,7 +128,7 @@

 ;    unsigned char *dst,

 ;    int dst_stride

 ;    )

-global sym(vp9_copy_mem8x4_mmx)

+global sym(vp9_copy_mem8x4_mmx) PRIVATE

 sym(vp9_copy_mem8x4_mmx):

     push        rbp

     mov         rbp, rsp

@@ -172,7 +172,7 @@

 ;    unsigned char *dst,

 ;    int dst_stride

 ;    )

-global sym(vp9_copy_mem16x16_mmx)

+global sym(vp9_copy_mem16x16_mmx) PRIVATE

 sym(vp9_copy_mem16x16_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_recon_sse2.asm

+++ b/vp9/common/x86/vp9_recon_sse2.asm

@@ -11,7 +11,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;void vp9_recon2b_sse2(unsigned char *s, short *q, unsigned char *d, int stride)

-global sym(vp9_recon2b_sse2)

+global sym(vp9_recon2b_sse2) PRIVATE

 sym(vp9_recon2b_sse2):

     push        rbp

     mov         rbp, rsp

@@ -62,7 +62,7 @@

 ;void vp9_recon4b_sse2(unsigned char *s, short *q, unsigned char *d, int stride)

-global sym(vp9_recon4b_sse2)

+global sym(vp9_recon4b_sse2) PRIVATE

 sym(vp9_recon4b_sse2):

     push        rbp

     mov         rbp, rsp

@@ -132,7 +132,7 @@

 ;    unsigned char *dst,

 ;    int dst_stride

 ;    )

-global sym(vp9_copy_mem16x16_sse2)

+global sym(vp9_copy_mem16x16_sse2) PRIVATE

 sym(vp9_copy_mem16x16_sse2):

     push        rbp

     mov         rbp, rsp

@@ -237,7 +237,7 @@

 ;    unsigned char *src,

 ;    int src_stride,

 ;    )

-global sym(vp9_intra_pred_uv_dc_mmx2)

+global sym(vp9_intra_pred_uv_dc_mmx2) PRIVATE

 sym(vp9_intra_pred_uv_dc_mmx2):

     push        rbp

     mov         rbp, rsp

@@ -310,7 +310,7 @@

 ;    unsigned char *src,

 ;    int src_stride,

 ;    )

-global sym(vp9_intra_pred_uv_dctop_mmx2)

+global sym(vp9_intra_pred_uv_dctop_mmx2) PRIVATE

 sym(vp9_intra_pred_uv_dctop_mmx2):

     push        rbp

     mov         rbp, rsp

@@ -363,7 +363,7 @@

 ;    unsigned char *src,

 ;    int src_stride,

 ;    )

-global sym(vp9_intra_pred_uv_dcleft_mmx2)

+global sym(vp9_intra_pred_uv_dcleft_mmx2) PRIVATE

 sym(vp9_intra_pred_uv_dcleft_mmx2):

     push        rbp

     mov         rbp, rsp

@@ -428,7 +428,7 @@

 ;    unsigned char *src,

 ;    int src_stride,

 ;    )

-global sym(vp9_intra_pred_uv_dc128_mmx)

+global sym(vp9_intra_pred_uv_dc128_mmx) PRIVATE

 sym(vp9_intra_pred_uv_dc128_mmx):

     push        rbp

     mov         rbp, rsp

@@ -465,7 +465,7 @@

 ;    int src_stride,

 ;    )

 %macro vp9_intra_pred_uv_tm 1

-global sym(vp9_intra_pred_uv_tm_%1)

+global sym(vp9_intra_pred_uv_tm_%1) PRIVATE

 sym(vp9_intra_pred_uv_tm_%1):

     push        rbp

     mov         rbp, rsp

@@ -545,7 +545,7 @@

 ;    unsigned char *src,

 ;    int src_stride,

 ;    )

-global sym(vp9_intra_pred_uv_ve_mmx)

+global sym(vp9_intra_pred_uv_ve_mmx) PRIVATE

 sym(vp9_intra_pred_uv_ve_mmx):

     push        rbp

     mov         rbp, rsp

@@ -585,7 +585,7 @@

 ;    int src_stride,

 ;    )

 %macro vp9_intra_pred_uv_ho 1

-global sym(vp9_intra_pred_uv_ho_%1)

+global sym(vp9_intra_pred_uv_ho_%1) PRIVATE

 sym(vp9_intra_pred_uv_ho_%1):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_recon_wrapper_sse2.c

+++ b/vp9/common/x86/vp9_recon_wrapper_sse2.c

@@ -8,7 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vp9/common/vp9_blockd.h"

--- a/vp9/common/x86/vp9_sadmxn_x86.c

+++ b/vp9/common/x86/vp9_sadmxn_x86.c

@@ -12,6 +12,7 @@

 #include "./vpx_config.h"

 #include "./vp9_rtcd.h"

 #include "vpx/vpx_integer.h"

+#include "vpx_ports/emmintrin_compat.h"

 #if HAVE_SSE2

 unsigned int vp9_sad16x3_sse2(

--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

@@ -30,7 +30,7 @@

 ;    unsigned int   output_height,

 ;    short *filter

;)

-global sym(vp9_filter_block1d8_v8_ssse3)

+global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE

 sym(vp9_filter_block1d8_v8_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -148,7 +148,7 @@

 ;    unsigned int   output_height,

 ;    short *filter

;)

-global sym(vp9_filter_block1d16_v8_ssse3)

+global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE

 sym(vp9_filter_block1d16_v8_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -298,7 +298,7 @@

 ;    unsigned int    output_height,

 ;    short *filter

;)

-global sym(vp9_filter_block1d8_h8_ssse3)

+global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE

 sym(vp9_filter_block1d8_h8_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -405,7 +405,7 @@

 ;    unsigned int    output_height,

 ;    short *filter

;)

-global sym(vp9_filter_block1d16_h8_ssse3)

+global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE

 sym(vp9_filter_block1d16_h8_ssse3):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_subpixel_mmx.asm

+++ b/vp9/common/x86/vp9_subpixel_mmx.asm

@@ -27,7 +27,7 @@

 ;    unsigned int    output_width,

 ;    short           * vp9_filter

;)

-global sym(vp9_filter_block1d_h6_mmx)

+global sym(vp9_filter_block1d_h6_mmx) PRIVATE

 sym(vp9_filter_block1d_h6_mmx):

     push        rbp

     mov         rbp, rsp

@@ -124,7 +124,7 @@

 ;   unsigned int output_width,

 ;   short * vp9_filter

;)

-global sym(vp9_filter_block1dc_v6_mmx)

+global sym(vp9_filter_block1dc_v6_mmx) PRIVATE

 sym(vp9_filter_block1dc_v6_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_subpixel_sse2.asm

+++ b/vp9/common/x86/vp9_subpixel_sse2.asm

@@ -32,7 +32,7 @@

 ;    unsigned int    output_width,

 ;    short           *vp9_filter

;)

-global sym(vp9_filter_block1d8_h6_sse2)

+global sym(vp9_filter_block1d8_h6_sse2) PRIVATE

 sym(vp9_filter_block1d8_h6_sse2):

     push        rbp

     mov         rbp, rsp

@@ -152,7 +152,7 @@

 ; even number. This function handles 8 pixels in horizontal direction, calculating ONE

 ; rows each iteration to take advantage of the 128 bits operations.

 ;*************************************************************************************/

-global sym(vp9_filter_block1d16_h6_sse2)

+global sym(vp9_filter_block1d16_h6_sse2) PRIVATE

 sym(vp9_filter_block1d16_h6_sse2):

     push        rbp

     mov         rbp, rsp

@@ -328,7 +328,7 @@

 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The

 ; input pixel array has output_height rows.

 ;*************************************************************************************/

-global sym(vp9_filter_block1d8_v6_sse2)

+global sym(vp9_filter_block1d8_v6_sse2) PRIVATE

 sym(vp9_filter_block1d8_v6_sse2):

     push        rbp

     mov         rbp, rsp

@@ -423,7 +423,7 @@

 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The

 ; input pixel array has output_height rows.

 ;*************************************************************************************/

-global sym(vp9_filter_block1d16_v6_sse2)

+global sym(vp9_filter_block1d16_v6_sse2) PRIVATE

 sym(vp9_filter_block1d16_v6_sse2):

     push        rbp

     mov         rbp, rsp

@@ -533,7 +533,7 @@

 ;    const short    *vp9_filter

;)

 ; First-pass filter only when yoffset==0

-global sym(vp9_filter_block1d8_h6_only_sse2)

+global sym(vp9_filter_block1d8_h6_only_sse2) PRIVATE

 sym(vp9_filter_block1d8_h6_only_sse2):

     push        rbp

     mov         rbp, rsp

@@ -646,7 +646,7 @@

 ;    const short    *vp9_filter

;)

 ; First-pass filter only when yoffset==0

-global sym(vp9_filter_block1d16_h6_only_sse2)

+global sym(vp9_filter_block1d16_h6_only_sse2) PRIVATE

 sym(vp9_filter_block1d16_h6_only_sse2):

     push        rbp

     mov         rbp, rsp

@@ -811,7 +811,7 @@

 ;    const short    *vp9_filter

;)

 ; Second-pass filter only when xoffset==0

-global sym(vp9_filter_block1d8_v6_only_sse2)

+global sym(vp9_filter_block1d8_v6_only_sse2) PRIVATE

 sym(vp9_filter_block1d8_v6_only_sse2):

     push        rbp

     mov         rbp, rsp

@@ -903,7 +903,7 @@

 ;    unsigned int    output_height,

 ;    unsigned int    output_width

;)

-global sym(vp9_unpack_block1d16_h6_sse2)

+global sym(vp9_unpack_block1d16_h6_sse2) PRIVATE

 sym(vp9_unpack_block1d16_h6_sse2):

     push        rbp

     mov         rbp, rsp

@@ -962,7 +962,7 @@

 ;    int dst_pitch

;)

 extern sym(vp9_bilinear_filters_mmx)

-global sym(vp9_bilinear_predict16x16_sse2)

+global sym(vp9_bilinear_predict16x16_sse2) PRIVATE

 sym(vp9_bilinear_predict16x16_sse2):

     push        rbp

     mov         rbp, rsp

@@ -1231,7 +1231,7 @@

 ;    int dst_pitch

;)

 extern sym(vp9_bilinear_filters_mmx)

-global sym(vp9_bilinear_predict8x8_sse2)

+global sym(vp9_bilinear_predict8x8_sse2) PRIVATE

 sym(vp9_bilinear_predict8x8_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/common/x86/vp9_subpixel_ssse3.asm

+++ b/vp9/common/x86/vp9_subpixel_ssse3.asm

@@ -34,7 +34,7 @@

 ;    unsigned int    output_height,

 ;    unsigned int    vp9_filter_index

;)

-global sym(vp9_filter_block1d8_h6_ssse3)

+global sym(vp9_filter_block1d8_h6_ssse3) PRIVATE

 sym(vp9_filter_block1d8_h6_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -177,7 +177,7 @@

 ;    unsigned int    output_height,

 ;    unsigned int    vp9_filter_index

;)

-global sym(vp9_filter_block1d16_h6_ssse3)

+global sym(vp9_filter_block1d16_h6_ssse3) PRIVATE

 sym(vp9_filter_block1d16_h6_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -284,7 +284,7 @@

 ;    unsigned int    output_height,

 ;    unsigned int    vp9_filter_index

;)

-global sym(vp9_filter_block1d4_h6_ssse3)

+global sym(vp9_filter_block1d4_h6_ssse3) PRIVATE

 sym(vp9_filter_block1d4_h6_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -413,7 +413,7 @@

 ;    unsigned int   output_height,

 ;    unsigned int   vp9_filter_index

;)

-global sym(vp9_filter_block1d16_v6_ssse3)

+global sym(vp9_filter_block1d16_v6_ssse3) PRIVATE

 sym(vp9_filter_block1d16_v6_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -601,7 +601,7 @@

 ;    unsigned int   output_height,

 ;    unsigned int   vp9_filter_index

;)

-global sym(vp9_filter_block1d8_v6_ssse3)

+global sym(vp9_filter_block1d8_v6_ssse3) PRIVATE

 sym(vp9_filter_block1d8_v6_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -741,7 +741,7 @@

 ;    unsigned int   output_height,

 ;    unsigned int   vp9_filter_index

;)

-global sym(vp9_filter_block1d4_v6_ssse3)

+global sym(vp9_filter_block1d4_v6_ssse3) PRIVATE

 sym(vp9_filter_block1d4_v6_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -880,7 +880,7 @@

 ;    unsigned char *dst_ptr,

 ;    int dst_pitch

;)

-global sym(vp9_bilinear_predict16x16_ssse3)

+global sym(vp9_bilinear_predict16x16_ssse3) PRIVATE

 sym(vp9_bilinear_predict16x16_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -1143,7 +1143,7 @@

 ;    unsigned char *dst_ptr,

 ;    int dst_pitch

;)

-global sym(vp9_bilinear_predict8x8_ssse3)

+global sym(vp9_bilinear_predict8x8_ssse3) PRIVATE

 sym(vp9_bilinear_predict8x8_ssse3):

     push        rbp

     mov         rbp, rsp

--- a/vp9/decoder/vp9_dboolhuff.h

+++ b/vp9/decoder/vp9_dboolhuff.h

@@ -13,7 +13,7 @@

 #include <stddef.h>

 #include <limits.h>

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_ports/mem.h"

 #include "vpx/vpx_integer.h"

--- a/vp9/decoder/vp9_decodframe.c

+++ b/vp9/decoder/vp9_decodframe.c

@@ -264,7 +264,8 @@

   if (tx_type != DCT_DCT) {

     vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,

                                     xd->block[0].dequant, xd->predictor,

-                                    xd->dst.y_buffer, 16, xd->dst.y_stride);

+                                    xd->dst.y_buffer, 16, xd->dst.y_stride,

+                                    xd->eobs[0]);

   } else {

     vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,

                                xd->predictor, xd->dst.y_buffer,

@@ -310,7 +311,8 @@

       tx_type = get_tx_type_8x8(xd, &xd->block[ib]);

       if (tx_type != DCT_DCT) {

-        vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride);

+        vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,

+                                      xd->eobs[idx]);

       } else {

         vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,

                                    0, xd->eobs[idx]);

@@ -409,7 +411,7 @@

           vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,

                                     b->dequant, b->predictor,

                                     *(b->base_dst) + b->dst, 16,

-                                    b->dst_stride);

+                                    b->dst_stride, b->eob);

         } else {

           vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,

                                *(b->base_dst) + b->dst, 16, b->dst_stride);

@@ -454,7 +456,8 @@

       if (tx_type != DCT_DCT) {

         vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,

                                   b->dequant, b->predictor,

-                                  *(b->base_dst) + b->dst, 16, b->dst_stride);

+                                  *(b->base_dst) + b->dst, 16, b->dst_stride,

+                                  b->eob);

       } else {

         vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,

                              *(b->base_dst) + b->dst, 16, b->dst_stride);

@@ -516,7 +519,7 @@

           vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,

                                     b->dequant, b->predictor,

                                     *(b->base_dst) + b->dst, 16,

-                                    b->dst_stride);

+                                    b->dst_stride, b->eob);

         } else {

           vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,

                                *(b->base_dst) + b->dst, 16, b->dst_stride);

@@ -570,7 +573,7 @@

         tx_type, xd->qcoeff, xd->block[0].dequant,

         xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,

         xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,

-        xd->dst.y_stride, xd->dst.y_stride);

+        xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);

   } else {

     vp9_dequant_idct_add_16x16(

         xd->qcoeff, xd->block[0].dequant,

@@ -609,7 +612,7 @@

             + x_idx * 16 + (i & 1) * 8,

             xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride

             + x_idx * 16 + (i & 1) * 8,

-            stride, stride);

+            stride, stride, b->eob);

       } else {

         vp9_dequant_idct_add_8x8_c(

             q, dq,

@@ -666,7 +669,7 @@

             + x_idx * 16 + (i & 3) * 4,

             xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride

             + x_idx * 16 + (i & 3) * 4,

-            xd->dst.y_stride, xd->dst.y_stride);

+            xd->dst.y_stride, xd->dst.y_stride, b->eob);

       } else {

         vp9_dequant_idct_add_c(

             b->qcoeff, b->dequant,

--- a/vp9/decoder/vp9_dequantize.c

+++ b/vp9/decoder/vp9_dequantize.c

@@ -14,7 +14,6 @@

 #include "vpx_mem/vpx_mem.h"

 #include "vp9/decoder/vp9_onyxd_int.h"

 #include "vp9/common/vp9_common.h"

 static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,

                          uint8_t *dest, int stride, int width, int height) {

   int r, c;

@@ -61,7 +60,7 @@

 void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,

                                const int16_t *dq,

                                uint8_t *pred, uint8_t *dest,

-                               int pitch, int stride) {

+                               int pitch, int stride, uint16_t eobs) {

   int16_t output[16];

   int16_t *diff_ptr = output;

   int i;

@@ -70,7 +69,7 @@

     input[i] = dq[i] * input[i];

-  vp9_ihtllm(input, output, 4 << 1, tx_type, 4);

+  vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);

   vpx_memset(input, 0, 32);

@@ -80,21 +79,25 @@

 void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,

                                    const int16_t *dq,

                                    uint8_t *pred, uint8_t *dest,

-                                   int pitch, int stride) {

+                                   int pitch, int stride, uint16_t eobs) {

   int16_t output[64];

   int16_t *diff_ptr = output;

   int i;

+  if (eobs == 0) {

+    /* All 0 DCT coefficient */

+    vp9_copy_mem8x8(pred, pitch, dest, stride);

+  } else if (eobs > 0) {

+    input[0] = dq[0] * input[0];

+    for (i = 1; i < 64; i++) {

+      input[i] = dq[1] * input[i];

+    }

-  input[0] = dq[0] * input[0];

-  for (i = 1; i < 64; i++) {

-    input[i] = dq[1] * input[i];

-  }

+    vp9_ihtllm(input, output, 16, tx_type, 8, eobs);

-  vp9_ihtllm(input, output, 16, tx_type, 8);

+    vpx_memset(input, 0, 128);

-  vpx_memset(input, 0, 128);

-  add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);

+    add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);

+  }

 void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,

@@ -256,26 +259,31 @@

 void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,

                                      const int16_t *dq, uint8_t *pred,

-                                     uint8_t *dest, int pitch, int stride) {

+                                     uint8_t *dest, int pitch, int stride,

+                                     uint16_t eobs) {

   int16_t output[256];

   int16_t *diff_ptr = output;

   int i;

+  if (eobs == 0) {

+    /* All 0 DCT coefficient */

+    vp9_copy_mem16x16(pred, pitch, dest, stride);

+  } else if (eobs > 0) {

+    input[0]= input[0] * dq[0];

-  input[0]= input[0] * dq[0];

+    // recover quantizer for 4 4x4 blocks

+    for (i = 1; i < 256; i++)

+      input[i] = input[i] * dq[1];

-  // recover quantizer for 4 4x4 blocks

-  for (i = 1; i < 256; i++)

-    input[i] = input[i] * dq[1];

+    // inverse hybrid transform

+    vp9_ihtllm(input, output, 32, tx_type, 16, eobs);

-  // inverse hybrid transform

-  vp9_ihtllm(input, output, 32, tx_type, 16);

+    // the idct halves ( >> 1) the pitch

+    // vp9_short_idct16x16_c(input, output, 32);

-  // the idct halves ( >> 1) the pitch

-  // vp9_short_idct16x16_c(input, output, 32);

+    vpx_memset(input, 0, 512);

-  vpx_memset(input, 0, 512);

-  add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);

+    add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);

+  }

 void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,

--- a/vp9/decoder/vp9_dequantize.h

+++ b/vp9/decoder/vp9_dequantize.h

@@ -11,108 +11,93 @@

 #ifndef VP9_DECODER_VP9_DEQUANTIZE_H_

 #define VP9_DECODER_VP9_DEQUANTIZE_H_

 #include "vp9/common/vp9_blockd.h"

 #if CONFIG_LOSSLESS

-extern void vp9_dequant_idct_add_lossless_c(int16_t *input,

-                                            const int16_t *dq,

-                                            uint8_t *pred,

-                                            uint8_t *output,

+extern void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,

+                                            unsigned char *pred,

+                                            unsigned char *output,

                                             int pitch, int stride);

-extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input,

-                                               const int16_t *dq,

-                                               uint8_t *pred,

-                                               uint8_t *output,

+extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,

+                                               unsigned char *pred,

+                                               unsigned char *output,

                                                int pitch, int stride, int dc);

 extern void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q,

                                                        const int16_t *dq,

-                                                       uint8_t *pre,

-                                                       uint8_t *dst,

+                                                       unsigned char *pre,

+                                                       unsigned char *dst,

                                                        int stride,

                                                        uint16_t *eobs,

                                                        const int16_t *dc);

-extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q,

-                                                    const int16_t *dq,

-                                                    uint8_t *pre,

-                                                    uint8_t *dst,

+extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,

+                                                    unsigned char *pre,

+                                                    unsigned char *dst,

                                                     int stride,

                                                     uint16_t *eobs);

-extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q,

-                                                     const int16_t *dq,

-                                                     uint8_t *pre,

-                                                     uint8_t *dst_u,

-                                                     uint8_t *dst_v,

+extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,

+                                                     unsigned char *pre,

+                                                     unsigned char *dst_u,

+                                                     unsigned char *dst_v,

                                                      int stride,

                                                      uint16_t *eobs);

-#endif  // CONFIG_LOSSLESS

+#endif

 typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq,

-                                          uint8_t *pred, uint8_t *output,

-                                          int pitch, int stride);

+    unsigned char *pred, unsigned char *output, int pitch, int stride);

 typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq,

-                                            uint8_t *pred, uint8_t *output,

-                                            int pitch, int stride, int dc);

+    unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);

-typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q,

-                                                    const int16_t *dq,

-                                                    uint8_t *pre, uint8_t *dst,

-                                                    int stride, uint16_t *eobs,

-                                                    const int16_t *dc);

+typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,

+    unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs,

+    const int16_t *dc);

 typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,

-                                                 uint8_t *pre, uint8_t *dst,

-                                                 int stride, uint16_t *eobs);

+    unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs);

 typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq,

-                                                  uint8_t *pre, uint8_t *dst_u,

-                                                  uint8_t *dst_v, int stride,

-                                                  uint16_t *eobs);

+    unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,

+    uint16_t *eobs);

-void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,

-                               const int16_t *dq,

-                               uint8_t *pred, uint8_t *dest,

-                               int pitch, int stride);

+void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,

+                                    unsigned char *pred, unsigned char *dest,

+                                    int pitch, int stride, uint16_t eobs);

 void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,

-                                   const int16_t *dq, uint8_t *pred,

-                                   uint8_t *dest, int pitch, int stride);

+                                   const int16_t *dq, unsigned char *pred,

+                                   unsigned char *dest, int pitch, int stride,

+                                   uint16_t eobs);

 void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,

-                                     const int16_t *dq, uint8_t *pred,

-                                     uint8_t *dest,

-                                     int pitch, int stride);

+                                     const int16_t *dq, unsigned char *pred,

+                                     unsigned char *dest,

+                                     int pitch, int stride, uint16_t eobs);

 #if CONFIG_SUPERBLOCKS

-void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q,

-                                                   const int16_t *dq,

-                                                   uint8_t *dst,

+void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq,

+                                                   unsigned char *dst,

                                                    int stride,

                                                    uint16_t *eobs,

                                                    const int16_t *dc,

                                                    MACROBLOCKD *xd);

-void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,

-                                                   const int16_t *dq,

-                                                   uint8_t *dst,

+void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq,

+                                                   unsigned char *dst,

                                                    int stride,

                                                    uint16_t *eobs,

                                                    const int16_t *dc,

                                                    MACROBLOCKD *xd);

-void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q,

-                                                 const int16_t *dq,

-                                                 uint8_t *dstu,

-                                                 uint8_t *dstv,

+void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,

+                                                 unsigned char *dstu,

+                                                 unsigned char *dstv,

                                                  int stride,

                                                  uint16_t *eobs,

                                                  MACROBLOCKD *xd);

-void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q,

-                                                 const int16_t *dq,

-                                                 uint8_t *dstu,

-                                                 uint8_t *dstv,

+void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,

+                                                 unsigned char *dstu,

+                                                 unsigned char *dstv,

                                                  int stride,

                                                  uint16_t *eobs,

                                                  MACROBLOCKD *xd);

-#endif  // CONFIG_SUPERBLOCKS

+#endif

-#endif  // VP9_DECODER_VP9_DEQUANTIZE_H_

+#endif

--- a/vp9/decoder/vp9_onyxd_int.h

+++ b/vp9/decoder/vp9_onyxd_int.h

@@ -10,8 +10,7 @@

 #ifndef VP9_DECODER_VP9_ONYXD_INT_H_

 #define VP9_DECODER_VP9_ONYXD_INT_H_

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/decoder/vp9_onyxd.h"

 #include "vp9/decoder/vp9_treereader.h"

 #include "vp9/common/vp9_onyxc_int.h"

--- a/vp9/decoder/x86/vp9_idct_blk_mmx.c

+++ b/vp9/decoder/x86/vp9_idct_blk_mmx.c

@@ -8,7 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/common/vp9_blockd.h"

 #include "vp9/decoder/vp9_dequantize.h"

 #include "vp9/decoder/x86/vp9_idct_mmx.h"

--- a/vp9/decoder/x86/vp9_idct_blk_sse2.c

+++ b/vp9/decoder/x86/vp9_idct_blk_sse2.c

@@ -8,7 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/common/vp9_blockd.h"

 #include "vp9/decoder/vp9_dequantize.h"

--- a/vp9/decoder/x86/vp9_x86_dsystemdependent.c

+++ b/vp9/decoder/x86/vp9_x86_dsystemdependent.c

@@ -8,7 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_ports/x86.h"

 #include "vp9/decoder/vp9_onyxd_int.h"

--- a/vp9/encoder/vp9_dct.c

+++ b/vp9/encoder/vp9_dct.c

@@ -11,7 +11,7 @@

 #include <assert.h>

 #include <math.h>

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/common/vp9_systemdependent.h"

 #include "vp9/common/vp9_blockd.h"

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/encoder/vp9_encodeframe.h"

 #include "vp9/encoder/vp9_encodemb.h"

 #include "vp9/encoder/vp9_encodemv.h"

@@ -2123,8 +2123,6 @@

   MACROBLOCK *const x = &cpi->mb;

   MACROBLOCKD *const xd = &x->e_mbd;

   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;

-  unsigned char *segment_id = &mbmi->segment_id;

-  int seg_ref_active;

   unsigned char ref_pred_flag;

 #if CONFIG_SUPERBLOCKS

@@ -2169,8 +2167,6 @@

     vp9_update_zbin_extra(cpi, x);

-    seg_ref_active = vp9_segfeature_active(xd, *segment_id, SEG_LVL_REF_FRAME);

     // SET VARIOUS PREDICTION FLAGS

--- a/vp9/encoder/vp9_encodeintra.c

+++ b/vp9/encoder/vp9_encodeintra.c

@@ -8,7 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9_rtcd.h"

 #include "vp9/encoder/vp9_quantize.h"

 #include "vp9/common/vp9_reconintra.h"

@@ -70,7 +70,7 @@

   if (tx_type != DCT_DCT) {

     vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);

     vp9_ht_quantize_b_4x4(be, b, tx_type);

-    vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4);

+    vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);

   } else {

     x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

     x->quantize_b_4x4(be, b) ;

@@ -191,7 +191,7 @@

                 tx_type, 8);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

       vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,

-                   tx_type, 8);

+                   tx_type, 8, xd->block[idx].eob);

     } else {

       x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

@@ -205,7 +205,7 @@

       if (tx_type != DCT_DCT) {

         vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);

         vp9_ht_quantize_b_4x4(be, b, tx_type);

-        vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4);

+        vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);

       } else {

         x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);

         x->quantize_b_4x4(be, b);

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -8,7 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/encoder/vp9_encodemb.h"

 #include "vp9/common/vp9_reconinter.h"

 #include "vp9/encoder/vp9_quantize.h"

--- a/vp9/encoder/vp9_encodemb.h

+++ b/vp9/encoder/vp9_encodemb.h

@@ -11,7 +11,7 @@

 #ifndef VP9_ENCODER_VP9_ENCODEMB_H_

 #define VP9_ENCODER_VP9_ENCODEMB_H_

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/encoder/vp9_block.h"

 typedef struct {

--- a/vp9/encoder/vp9_firstpass.c

+++ b/vp9/encoder/vp9_firstpass.c

@@ -41,9 +41,10 @@

 #define RMAX       128.0

 #define GF_RMAX    96.0

 #define ERR_DIVISOR   150.0

+#define MIN_DECAY_FACTOR 0.1

-#define KF_MB_INTRA_MIN 300

-#define GF_MB_INTRA_MIN 200

+#define KF_MB_INTRA_MIN 150

+#define GF_MB_INTRA_MIN 100

 #define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001)

@@ -800,6 +801,7 @@

 static long long estimate_modemvcost(VP9_COMP *cpi,

                                      FIRSTPASS_STATS *fpstats) {

+#if 0

   int mv_cost;

   int mode_cost;

@@ -828,6 +830,7 @@

   // return mv_cost + mode_cost;

   // TODO PGW Fix overhead costs for extended Q range

+#endif

   return 0;

@@ -1405,10 +1408,9 @@

     // Cumulative effect of prediction quality decay

     if (!flash_detected) {

       decay_accumulator =

-        decay_accumulator *

-        get_prediction_decay_rate(cpi, &this_frame);

-      decay_accumulator =

-        decay_accumulator < 0.1 ? 0.1 : decay_accumulator;

+        decay_accumulator * get_prediction_decay_rate(cpi, &this_frame);

+      decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR

+                          ? MIN_DECAY_FACTOR : decay_accumulator;

     boost_score += (decay_accumulator *

@@ -1443,10 +1445,9 @@

     // Cumulative effect of prediction quality decay

     if (!flash_detected) {

       decay_accumulator =

-        decay_accumulator *

-        get_prediction_decay_rate(cpi, &this_frame);

-      decay_accumulator =

-        decay_accumulator < 0.1 ? 0.1 : decay_accumulator;

+        decay_accumulator * get_prediction_decay_rate(cpi, &this_frame);

+      decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR

+                          ? MIN_DECAY_FACTOR : decay_accumulator;

     boost_score += (decay_accumulator *

@@ -1632,7 +1633,7 @@

         ((mv_ratio_accumulator > 100.0) ||

          (abs_mv_in_out_accumulator > 3.0) ||

          (mv_in_out_accumulator < -2.0) ||

-         ((boost_score - old_boost_score) < 12.5))

+         ((boost_score - old_boost_score) < IIFACTOR))

       )) {

       boost_score = old_boost_score;

       break;

@@ -1952,12 +1953,9 @@

   FIRSTPASS_STATS this_frame;

   FIRSTPASS_STATS this_frame_copy;

-  double this_frame_error;

   double this_frame_intra_error;

   double this_frame_coded_error;

-  FIRSTPASS_STATS *start_pos;

   int overhead_bits;

   if (!cpi->twopass.stats_in) {

@@ -1971,12 +1969,9 @@

   if (EOF == input_stats(cpi, &this_frame))

     return;

-  this_frame_error = this_frame.ssim_weighted_pred_err;

   this_frame_intra_error = this_frame.intra_error;

   this_frame_coded_error = this_frame.coded_error;

-  start_pos = cpi->twopass.stats_in;

   // keyframe and section processing !

   if (cpi->twopass.frames_to_key == 0) {

     // Define next KF group and assign bits to it

@@ -2396,7 +2391,8 @@

     if (!detect_flash(cpi, 0)) {

       loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);

       decay_accumulator = decay_accumulator * loop_decay_rate;

-      decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;

+      decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR

+                            ? MIN_DECAY_FACTOR : decay_accumulator;

     boost_score += (decay_accumulator * r);

@@ -2436,14 +2432,11 @@

     int allocation_chunks;

     int alt_kf_bits;

-    if (kf_boost < 300) {

-      kf_boost += (cpi->twopass.frames_to_key * 3);

-      if (kf_boost > 300)

-        kf_boost = 300;

-    }

+    if (kf_boost < (cpi->twopass.frames_to_key * 5))

+      kf_boost = (cpi->twopass.frames_to_key * 5);

-    if (kf_boost < 250)                                                      // Min KF boost

-      kf_boost = 250;

+    if (kf_boost < 300) // Min KF boost

+      kf_boost = 300;

     // Make a note of baseline boost and the zero motion

     // accumulator value for use elsewhere.

--- a/vp9/encoder/vp9_mbgraph.c

+++ b/vp9/encoder/vp9_mbgraph.c

@@ -27,7 +27,7 @@

   BLOCKD *d = &xd->block[0];

   vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];

   unsigned int best_err;

-  int step_param, further_steps;

+  int step_param;

   int tmp_col_min = x->mv_col_min;

   int tmp_col_max = x->mv_col_max;

@@ -38,10 +38,8 @@

   // Further step/diamond searches as necessary

   if (cpi->Speed < 8) {

     step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0);

-    further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;

   } else {

     step_param = cpi->sf.first_step + 2;

-    further_steps = 0;

   vp9_clamp_mv_min_max(x, ref_mv);

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -12,7 +12,7 @@

 #include "vp9/encoder/vp9_onyx_int.h"

 #include "vp9/encoder/vp9_mcomp.h"

 #include "vpx_mem/vpx_mem.h"

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include <stdio.h>

 #include <limits.h>

 #include <math.h>

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -148,7 +148,6 @@

                                 double x3, double x2, double x, double c) {

   int i;

   double minqtarget;

-  double thisq;

   minqtarget = ((x3 * maxq * maxq * maxq) +

                 (x2 * maxq * maxq) +

@@ -159,7 +158,6 @@

     minqtarget = maxq;

   for (i = 0; i < QINDEX_RANGE; i++) {

-    thisq = vp9_convert_qindex_to_q(i);

     if (minqtarget <= vp9_convert_qindex_to_q(i))

       return i;

@@ -2925,8 +2923,6 @@

   int Loop = FALSE;

   int loop_count;

-  int this_q;

-  int last_zbin_oq;

   int q_low;

   int q_high;

@@ -2940,8 +2936,6 @@

   int overshoot_seen = FALSE;

   int undershoot_seen = FALSE;

-  int loop_size_estimate = 0;

   SPEED_FEATURES *sf = &cpi->sf;

 #if RESET_FOREACH_FILTER

   int q_low0;

@@ -2949,6 +2943,7 @@

   int zbin_oq_high0;

   int zbin_oq_low0 = 0;

   int Q0;

+  int last_zbin_oq;

   int last_zbin_oq0;

   int active_best_quality0;

   int active_worst_quality0;

@@ -3163,7 +3158,9 @@

     // Determine initial Q to try

     Q = vp9_regulate_q(cpi, cpi->this_frame_target);

+#if RESET_FOREACH_FILTER

   last_zbin_oq = cpi->zbin_over_quant;

+#endif

   // Set highest allowed value for Zbin over quant

   if (cm->frame_type == KEY_FRAME)

@@ -3267,7 +3264,6 @@

     vp9_clear_system_state();  // __asm emms;

     vp9_set_quantizer(cpi, Q);

-    this_q = Q;

     if (loop_count == 0) {

@@ -3503,7 +3499,9 @@

       // Loop = ((Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant)) ? TRUE : FALSE;

       Loop = ((Q != last_q)) ? TRUE : FALSE;

+#if RESET_FOREACH_FILTER

       last_zbin_oq = cpi->zbin_over_quant;

+#endif

     } else

       Loop = FALSE;

@@ -3692,9 +3690,6 @@

    * needed in motion search besides loopfilter */

   cm->last_frame_type = cm->frame_type;

-  // Keep a copy of the size estimate used in the loop

-  loop_size_estimate = cpi->projected_frame_size;

   // Update rate control heuristics

   cpi->total_byte_count += (*size);

   cpi->projected_frame_size = (*size) << 3;

@@ -3795,7 +3790,7 @@

               "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"

               "%10.3f %8d %10d %10d %10d\n",

               cpi->common.current_video_frame, cpi->this_frame_target,

-              cpi->projected_frame_size, loop_size_estimate,

+              cpi->projected_frame_size, 0, //loop_size_estimate,

               (cpi->projected_frame_size - cpi->this_frame_target),

               (int)cpi->total_target_vs_actual,

               (cpi->oxcf.starting_buffer_level - cpi->bits_off_target),

@@ -3825,7 +3820,7 @@

               "%8d %10d %10d %10d\n",

               cpi->common.current_video_frame,

               cpi->this_frame_target, cpi->projected_frame_size,

-              loop_size_estimate,

+              0, //loop_size_estimate,

               (cpi->projected_frame_size - cpi->this_frame_target),

               (int)cpi->total_target_vs_actual,

               (cpi->oxcf.starting_buffer_level - cpi->bits_off_target),

--- a/vp9/encoder/vp9_onyx_int.h

+++ b/vp9/encoder/vp9_onyx_int.h

@@ -13,7 +13,7 @@

 #define VP9_ENCODER_VP9_ONYX_INT_H_

 #include <stdio.h>

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vp9/common/vp9_onyx.h"

 #include "vp9/encoder/vp9_treewriter.h"

 #include "vp9/encoder/vp9_tokenize.h"

--- a/vp9/encoder/vp9_picklpf.c

+++ b/vp9/encoder/vp9_picklpf.c

@@ -24,11 +24,9 @@

   uint8_t *src_y, *dst_y;

   int yheight;

   int ystride;

-  int border;

   int yoffset;

   int linestocopy;

-  border   = src_ybc->border;

   yheight  = src_ybc->y_height;

   ystride  = src_ybc->y_stride;

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1328,7 +1328,7 @@

   // inverse transform

   if (best_tx_type != DCT_DCT)

-    vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4);

+    vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);

   else

     xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);

@@ -1518,7 +1518,7 @@

                                           int *skippable,

                                           int64_t txfm_cache[NB_TXFM_MODES]) {

   MB_PREDICTION_MODE mode;

-  TX_SIZE UNINITIALIZED_IS_SAFE(txfm_size);

+  TX_SIZE txfm_size = 0;

   MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);

 #if CONFIG_COMP_INTRA_PRED

   MB_PREDICTION_MODE mode2;

@@ -1562,7 +1562,6 @@

       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

       if (this_rd < best_rd) {

         mode_selected = mode;

         txfm_size = mbmi->txfm_size;

@@ -1796,6 +1795,7 @@

     mic->bmi[ib].as_mode.second = best_second_mode;

 #endif

   *Rate = cost;

   *rate_y = tot_rate_y;

   *Distortion = distortion;

@@ -3889,6 +3889,9 @@

   unsigned int ref_costs[MAX_REF_FRAMES];

   int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];

+  int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,

+                                             cpi->common.y1dc_delta_q);

   vpx_memset(mode8x8, 0, sizeof(mode8x8));

   vpx_memset(&frame_mv, 0, sizeof(frame_mv));

   vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));

@@ -4086,10 +4089,8 @@

     if (!mbmi->ref_frame) {

       switch (this_mode) {

         default:

-        case DC_PRED:

         case V_PRED:

         case H_PRED:

-        case TM_PRED:

         case D45_PRED:

         case D135_PRED:

         case D117_PRED:

@@ -4096,6 +4097,9 @@

         case D153_PRED:

         case D27_PRED:

         case D63_PRED:

+          rate2 += intra_cost_penalty;

+        case DC_PRED:

+        case TM_PRED:

           mbmi->ref_frame = INTRA_FRAME;

           // FIXME compound intra prediction

           vp9_build_intra_predictors_mby(&x->e_mbd);

@@ -4129,6 +4133,7 @@

 #endif

                                              cpi->update_context);

           rate2 += rate;

+          rate2 += intra_cost_penalty;

           distortion2 += distortion;

           if (tmp_rd < best_yrd) {

@@ -4221,6 +4226,7 @@

           rate2 += rate;

+          rate2 += intra_cost_penalty;

           distortion2 += distortion;

           /* TODO: uv rate maybe over-estimated here since there is UV intra

@@ -4730,7 +4736,7 @@

   int mode16x16;

   int mode8x8[2][4];

   int dist;

-  int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8;

+  int modeuv, uv_intra_skippable, uv_intra_skippable_8x8;

   int y_intra16x16_skippable = 0;

   int64_t txfm_cache[NB_TXFM_MODES];

   TX_SIZE txfm_size_16x16;

@@ -4743,13 +4749,11 @@

   if (cpi->common.txfm_mode != ONLY_4X4) {

     rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly,

                                 &distuv8x8, &uv_intra_skippable_8x8);

-    modeuv8x8 = mbmi->uv_mode;

   } else {

     uv_intra_skippable_8x8 = uv_intra_skippable;

     rateuv8x8 = rateuv;

     distuv8x8 = distuv;

     rateuv8x8_tokenonly = rateuv_tokenonly;

-    modeuv8x8 = modeuv;

   // current macroblock under rate-distortion optimization test loop

--- a/vp9/encoder/vp9_sad_c.c

+++ b/vp9/encoder/vp9_sad_c.c

@@ -11,7 +11,7 @@

 #include <stdlib.h>

 #include "vp9/common/vp9_sadmxn.h"

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx/vpx_integer.h"

 unsigned int vp9_sad64x64_c(const uint8_t *src_ptr,

--- a/vp9/encoder/vp9_temporal_filter.c

+++ b/vp9/encoder/vp9_temporal_filter.c

@@ -130,7 +130,6 @@

                                               int error_thresh) {

   MACROBLOCK *x = &cpi->mb;

   int step_param;

-  int further_steps;

   int sadpb = x->sadperbit16;

   int bestsme = INT_MAX;

@@ -164,11 +163,8 @@

   if (cpi->Speed < 8) {

     step_param = cpi->sf.first_step +

                  ((cpi->Speed > 5) ? 1 : 0);

-    further_steps =

-      (cpi->sf.max_step_search_steps - 1) - step_param;

   } else {

     step_param = cpi->sf.first_step + 2;

-    further_steps = 0;

   /*cpi->sf.search_method == HEX*/

--- a/vp9/encoder/x86/vp9_dct_mmx.asm

+++ b/vp9/encoder/x86/vp9_dct_mmx.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;void vp9_short_fdct4x4_mmx(short *input, short *output, int pitch)

-global sym(vp9_short_fdct4x4_mmx)

+global sym(vp9_short_fdct4x4_mmx) PRIVATE

 sym(vp9_short_fdct4x4_mmx):

     push        rbp

     mov         rbp,        rsp

--- a/vp9/encoder/x86/vp9_dct_sse2.asm

+++ b/vp9/encoder/x86/vp9_dct_sse2.asm

@@ -61,7 +61,7 @@

 %endmacro

 ;void vp9_short_fdct4x4_sse2(short *input, short *output, int pitch)

-global sym(vp9_short_fdct4x4_sse2)

+global sym(vp9_short_fdct4x4_sse2) PRIVATE

 sym(vp9_short_fdct4x4_sse2):

     STACK_FRAME_CREATE

@@ -166,7 +166,7 @@

     STACK_FRAME_DESTROY

 ;void vp9_short_fdct8x4_sse2(short *input, short *output, int pitch)

-global sym(vp9_short_fdct8x4_sse2)

+global sym(vp9_short_fdct8x4_sse2) PRIVATE

 sym(vp9_short_fdct8x4_sse2):

     STACK_FRAME_CREATE

--- a/vp9/encoder/x86/vp9_encodeopt.asm

+++ b/vp9/encoder/x86/vp9_encodeopt.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;int vp9_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)

-global sym(vp9_block_error_xmm)

+global sym(vp9_block_error_xmm) PRIVATE

 sym(vp9_block_error_xmm):

     push        rbp

     mov         rbp, rsp

@@ -60,7 +60,7 @@

ret

 ;int vp9_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)

-global sym(vp9_block_error_mmx)

+global sym(vp9_block_error_mmx) PRIVATE

 sym(vp9_block_error_mmx):

     push        rbp

     mov         rbp, rsp

@@ -126,7 +126,7 @@

 ;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);

-global sym(vp9_mbblock_error_mmx_impl)

+global sym(vp9_mbblock_error_mmx_impl) PRIVATE

 sym(vp9_mbblock_error_mmx_impl):

     push        rbp

     mov         rbp, rsp

@@ -203,7 +203,7 @@

 ;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);

-global sym(vp9_mbblock_error_xmm_impl)

+global sym(vp9_mbblock_error_xmm_impl) PRIVATE

 sym(vp9_mbblock_error_xmm_impl):

     push        rbp

     mov         rbp, rsp

@@ -273,7 +273,7 @@

 ;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);

-global sym(vp9_mbuverror_mmx_impl)

+global sym(vp9_mbuverror_mmx_impl) PRIVATE

 sym(vp9_mbuverror_mmx_impl):

     push        rbp

     mov         rbp, rsp

@@ -330,7 +330,7 @@

 ;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);

-global sym(vp9_mbuverror_xmm_impl)

+global sym(vp9_mbuverror_xmm_impl) PRIVATE

 sym(vp9_mbuverror_xmm_impl):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_fwalsh_sse2.asm

+++ b/vp9/encoder/x86/vp9_fwalsh_sse2.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;void vp9_short_walsh4x4_sse2(short *input, short *output, int pitch)

-global sym(vp9_short_walsh4x4_sse2)

+global sym(vp9_short_walsh4x4_sse2) PRIVATE

 sym(vp9_short_walsh4x4_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_quantize_mmx.asm

+++ b/vp9/encoder/x86/vp9_quantize_mmx.asm

@@ -15,7 +15,7 @@

 ;                           short *qcoeff_ptr,short *dequant_ptr,

 ;                           short *scan_mask, short *round_ptr,

 ;                           short *quant_ptr, short *dqcoeff_ptr);

-global sym(vp9_fast_quantize_b_impl_mmx)

+global sym(vp9_fast_quantize_b_impl_mmx) PRIVATE

 sym(vp9_fast_quantize_b_impl_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_quantize_sse2.asm

+++ b/vp9/encoder/x86/vp9_quantize_sse2.asm

@@ -16,7 +16,7 @@

 ;  (BLOCK  *b,                     |  0

 ;   BLOCKD *d)                     |  1

-global sym(vp9_regular_quantize_b_sse2)

+global sym(vp9_regular_quantize_b_sse2) PRIVATE

 sym(vp9_regular_quantize_b_sse2):

     push        rbp

     mov         rbp, rsp

@@ -237,7 +237,7 @@

 ;  (BLOCK  *b,                  |  0

 ;   BLOCKD *d)                  |  1

-global sym(vp9_fast_quantize_b_sse2)

+global sym(vp9_fast_quantize_b_sse2) PRIVATE

 sym(vp9_fast_quantize_b_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_quantize_sse4.asm

+++ b/vp9/encoder/x86/vp9_quantize_sse4.asm

@@ -16,7 +16,7 @@

 ;  (BLOCK  *b,                     |  0

 ;   BLOCKD *d)                     |  1

-global sym(vp9_regular_quantize_b_sse4)

+global sym(vp9_regular_quantize_b_sse4) PRIVATE

 sym(vp9_regular_quantize_b_sse4):

 %if ABI_IS_32BIT

--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm

+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm

@@ -17,7 +17,7 @@

 ;   BLOCKD *d)                   |  1

-global sym(vp9_fast_quantize_b_ssse3)

+global sym(vp9_fast_quantize_b_ssse3) PRIVATE

 sym(vp9_fast_quantize_b_ssse3):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_sad_mmx.asm

+++ b/vp9/encoder/x86/vp9_sad_mmx.asm

@@ -11,11 +11,11 @@

 %include "vpx_ports/x86_abi_support.asm"

-global sym(vp9_sad16x16_mmx)

-global sym(vp9_sad8x16_mmx)

-global sym(vp9_sad8x8_mmx)

-global sym(vp9_sad4x4_mmx)

-global sym(vp9_sad16x8_mmx)

+global sym(vp9_sad16x16_mmx) PRIVATE

+global sym(vp9_sad8x16_mmx) PRIVATE

+global sym(vp9_sad8x8_mmx) PRIVATE

+global sym(vp9_sad4x4_mmx) PRIVATE

+global sym(vp9_sad16x8_mmx) PRIVATE

 ;unsigned int vp9_sad16x16_mmx(

 ;    unsigned char *src_ptr,

--- a/vp9/encoder/x86/vp9_sad_sse2.asm

+++ b/vp9/encoder/x86/vp9_sad_sse2.asm

@@ -16,7 +16,7 @@

 ;    int  src_stride,

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride)

-global sym(vp9_sad16x16_wmt)

+global sym(vp9_sad16x16_wmt) PRIVATE

 sym(vp9_sad16x16_wmt):

     push        rbp

     mov         rbp, rsp

@@ -90,7 +90,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  max_err)

-global sym(vp9_sad8x16_wmt)

+global sym(vp9_sad8x16_wmt) PRIVATE

 sym(vp9_sad8x16_wmt):

     push        rbp

     mov         rbp, rsp

@@ -153,7 +153,7 @@

 ;    int  src_stride,

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride)

-global sym(vp9_sad8x8_wmt)

+global sym(vp9_sad8x8_wmt) PRIVATE

 sym(vp9_sad8x8_wmt):

     push        rbp

     mov         rbp, rsp

@@ -206,7 +206,7 @@

 ;    int  src_stride,

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride)

-global sym(vp9_sad4x4_wmt)

+global sym(vp9_sad4x4_wmt) PRIVATE

 sym(vp9_sad4x4_wmt):

     push        rbp

     mov         rbp, rsp

@@ -261,7 +261,7 @@

 ;    int  src_stride,

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride)

-global sym(vp9_sad16x8_wmt)

+global sym(vp9_sad16x8_wmt) PRIVATE

 sym(vp9_sad16x8_wmt):

     push        rbp

     mov         rbp, rsp

@@ -335,7 +335,7 @@

 ;    unsigned char *dst_ptr,

 ;    int  dst_stride,

 ;    int height);

-global sym(vp9_copy32xn_sse2)

+global sym(vp9_copy32xn_sse2) PRIVATE

 sym(vp9_copy32xn_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_sad_sse3.asm

+++ b/vp9/encoder/x86/vp9_sad_sse3.asm

@@ -380,7 +380,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad16x16x3_sse3)

+global sym(vp9_sad16x16x3_sse3) PRIVATE

 sym(vp9_sad16x16x3_sse3):

     STACK_FRAME_CREATE_X3

@@ -422,7 +422,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad16x8x3_sse3)

+global sym(vp9_sad16x8x3_sse3) PRIVATE

 sym(vp9_sad16x8x3_sse3):

     STACK_FRAME_CREATE_X3

@@ -460,7 +460,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad8x16x3_sse3)

+global sym(vp9_sad8x16x3_sse3) PRIVATE

 sym(vp9_sad8x16x3_sse3):

     STACK_FRAME_CREATE_X3

@@ -489,7 +489,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad8x8x3_sse3)

+global sym(vp9_sad8x8x3_sse3) PRIVATE

 sym(vp9_sad8x8x3_sse3):

     STACK_FRAME_CREATE_X3

@@ -514,7 +514,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad4x4x3_sse3)

+global sym(vp9_sad4x4x3_sse3) PRIVATE

 sym(vp9_sad4x4x3_sse3):

     STACK_FRAME_CREATE_X3

@@ -589,7 +589,7 @@

 ;    int  ref_stride,

 ;    int  max_err)

 ;%define lddqu movdqu

-global sym(vp9_sad16x16_sse3)

+global sym(vp9_sad16x16_sse3) PRIVATE

 sym(vp9_sad16x16_sse3):

     STACK_FRAME_CREATE_X3

@@ -642,7 +642,7 @@

 ;    unsigned char *dst_ptr,

 ;    int  dst_stride,

 ;    int height);

-global sym(vp9_copy32xn_sse3)

+global sym(vp9_copy32xn_sse3) PRIVATE

 sym(vp9_copy32xn_sse3):

     STACK_FRAME_CREATE_X3

@@ -703,7 +703,7 @@

 ;    unsigned char *ref_ptr_base,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad16x16x4d_sse3)

+global sym(vp9_sad16x16x4d_sse3) PRIVATE

 sym(vp9_sad16x16x4d_sse3):

     STACK_FRAME_CREATE_X4

@@ -754,7 +754,7 @@

 ;    unsigned char *ref_ptr_base,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad16x8x4d_sse3)

+global sym(vp9_sad16x8x4d_sse3) PRIVATE

 sym(vp9_sad16x8x4d_sse3):

     STACK_FRAME_CREATE_X4

@@ -801,7 +801,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad8x16x4d_sse3)

+global sym(vp9_sad8x16x4d_sse3) PRIVATE

 sym(vp9_sad8x16x4d_sse3):

     STACK_FRAME_CREATE_X4

@@ -834,7 +834,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad8x8x4d_sse3)

+global sym(vp9_sad8x8x4d_sse3) PRIVATE

 sym(vp9_sad8x8x4d_sse3):

     STACK_FRAME_CREATE_X4

@@ -863,7 +863,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad4x4x4d_sse3)

+global sym(vp9_sad4x4x4d_sse3) PRIVATE

 sym(vp9_sad4x4x4d_sse3):

     STACK_FRAME_CREATE_X4

--- a/vp9/encoder/x86/vp9_sad_sse4.asm

+++ b/vp9/encoder/x86/vp9_sad_sse4.asm

@@ -161,7 +161,7 @@

 ;    const unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    unsigned short *sad_array);

-global sym(vp9_sad16x16x8_sse4)

+global sym(vp9_sad16x16x8_sse4) PRIVATE

 sym(vp9_sad16x16x8_sse4):

     push        rbp

     mov         rbp, rsp

@@ -203,7 +203,7 @@

 ;    int  ref_stride,

 ;    unsigned short *sad_array

;);

-global sym(vp9_sad16x8x8_sse4)

+global sym(vp9_sad16x8x8_sse4) PRIVATE

 sym(vp9_sad16x8x8_sse4):

     push        rbp

     mov         rbp, rsp

@@ -241,7 +241,7 @@

 ;    int  ref_stride,

 ;    unsigned short *sad_array

;);

-global sym(vp9_sad8x8x8_sse4)

+global sym(vp9_sad8x8x8_sse4) PRIVATE

 sym(vp9_sad8x8x8_sse4):

     push        rbp

     mov         rbp, rsp

@@ -279,7 +279,7 @@

 ;    int  ref_stride,

 ;    unsigned short *sad_array

;);

-global sym(vp9_sad8x16x8_sse4)

+global sym(vp9_sad8x16x8_sse4) PRIVATE

 sym(vp9_sad8x16x8_sse4):

     push        rbp

     mov         rbp, rsp

@@ -320,7 +320,7 @@

 ;    int  ref_stride,

 ;    unsigned short *sad_array

;);

-global sym(vp9_sad4x4x8_sse4)

+global sym(vp9_sad4x4x8_sse4) PRIVATE

 sym(vp9_sad4x4x8_sse4):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_sad_ssse3.asm

+++ b/vp9/encoder/x86/vp9_sad_ssse3.asm

@@ -152,7 +152,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad16x16x3_ssse3)

+global sym(vp9_sad16x16x3_ssse3) PRIVATE

 sym(vp9_sad16x16x3_ssse3):

     push        rbp

     mov         rbp, rsp

@@ -265,7 +265,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  ref_stride,

 ;    int  *results)

-global sym(vp9_sad16x8x3_ssse3)

+global sym(vp9_sad16x8x3_ssse3) PRIVATE

 sym(vp9_sad16x8x3_ssse3):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_ssim_opt.asm

+++ b/vp9/encoder/x86/vp9_ssim_opt.asm

@@ -61,7 +61,7 @@

 ; or pavgb At this point this is just meant to be first pass for calculating

 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion

 ; in mode selection code.

-global sym(vp9_ssim_parms_16x16_sse2)

+global sym(vp9_ssim_parms_16x16_sse2) PRIVATE

 sym(vp9_ssim_parms_16x16_sse2):

     push        rbp

     mov         rbp, rsp

@@ -151,7 +151,7 @@

 ; or pavgb At this point this is just meant to be first pass for calculating

 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion

 ; in mode selection code.

-global sym(vp9_ssim_parms_8x8_sse2)

+global sym(vp9_ssim_parms_8x8_sse2) PRIVATE

 sym(vp9_ssim_parms_8x8_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_subtract_mmx.asm

+++ b/vp9/encoder/x86/vp9_subtract_mmx.asm

@@ -14,7 +14,7 @@

 ;void vp9_subtract_b_mmx_impl(unsigned char *z,  int src_stride,

 ;                            short *diff, unsigned char *Predictor,

 ;                            int pitch);

-global sym(vp9_subtract_b_mmx_impl)

+global sym(vp9_subtract_b_mmx_impl) PRIVATE

 sym(vp9_subtract_b_mmx_impl):

     push        rbp

     mov         rbp, rsp

@@ -74,7 +74,7 @@

ret

 ;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride)

-global sym(vp9_subtract_mby_mmx)

+global sym(vp9_subtract_mby_mmx) PRIVATE

 sym(vp9_subtract_mby_mmx):

     push        rbp

     mov         rbp, rsp

@@ -150,7 +150,7 @@

 ;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)

-global sym(vp9_subtract_mbuv_mmx)

+global sym(vp9_subtract_mbuv_mmx) PRIVATE

 sym(vp9_subtract_mbuv_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_subtract_sse2.asm

+++ b/vp9/encoder/x86/vp9_subtract_sse2.asm

@@ -14,7 +14,7 @@

 ;void vp9_subtract_b_sse2_impl(unsigned char *z,  int src_stride,

 ;                            short *diff, unsigned char *Predictor,

 ;                            int pitch);

-global sym(vp9_subtract_b_sse2_impl)

+global sym(vp9_subtract_b_sse2_impl) PRIVATE

 sym(vp9_subtract_b_sse2_impl):

     push        rbp

     mov         rbp, rsp

@@ -72,7 +72,7 @@

 ;void vp9_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)

-global sym(vp9_subtract_mby_sse2)

+global sym(vp9_subtract_mby_sse2) PRIVATE

 sym(vp9_subtract_mby_sse2):

     push        rbp

     mov         rbp, rsp

@@ -146,7 +146,7 @@

 ;void vp9_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)

-global sym(vp9_subtract_mbuv_sse2)

+global sym(vp9_subtract_mbuv_sse2) PRIVATE

 sym(vp9_subtract_mbuv_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm

+++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm

@@ -20,7 +20,7 @@

 ;   int             filter_weight,    |  5

 ;   unsigned int   *accumulator,      |  6

 ;   unsigned short *count)            |  7

-global sym(vp9_temporal_filter_apply_sse2)

+global sym(vp9_temporal_filter_apply_sse2) PRIVATE

 sym(vp9_temporal_filter_apply_sse2):

     push        rbp

--- a/vp9/encoder/x86/vp9_variance_impl_mmx.asm

+++ b/vp9/encoder/x86/vp9_variance_impl_mmx.asm

@@ -12,7 +12,7 @@

 %include "vpx_ports/x86_abi_support.asm"

 ;unsigned int vp9_get_mb_ss_mmx( short *src_ptr )

-global sym(vp9_get_mb_ss_mmx)

+global sym(vp9_get_mb_ss_mmx) PRIVATE

 sym(vp9_get_mb_ss_mmx):

     push        rbp

     mov         rbp, rsp

@@ -72,7 +72,7 @@

 ;    unsigned int *SSE,

 ;    int *Sum

;)

-global sym(vp9_get8x8var_mmx)

+global sym(vp9_get8x8var_mmx) PRIVATE

 sym(vp9_get8x8var_mmx):

     push        rbp

     mov         rbp, rsp

@@ -320,7 +320,7 @@

 ;    unsigned int *SSE,

 ;    int *Sum

;)

-global sym(vp9_get4x4var_mmx)

+global sym(vp9_get4x4var_mmx) PRIVATE

 sym(vp9_get4x4var_mmx):

     push        rbp

     mov         rbp, rsp

@@ -433,7 +433,7 @@

 ;    unsigned char *ref_ptr,

 ;    int  recon_stride

;)

-global sym(vp9_get4x4sse_cs_mmx)

+global sym(vp9_get4x4sse_cs_mmx) PRIVATE

 sym(vp9_get4x4sse_cs_mmx):

     push        rbp

     mov         rbp, rsp

@@ -522,7 +522,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_filter_block2d_bil4x4_var_mmx)

+global sym(vp9_filter_block2d_bil4x4_var_mmx) PRIVATE

 sym(vp9_filter_block2d_bil4x4_var_mmx):

     push        rbp

     mov         rbp, rsp

@@ -667,7 +667,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_filter_block2d_bil_var_mmx)

+global sym(vp9_filter_block2d_bil_var_mmx) PRIVATE

 sym(vp9_filter_block2d_bil_var_mmx):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm

+++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm

@@ -17,7 +17,7 @@

;(

 ;    short *src_ptr

;)

-global sym(vp9_get_mb_ss_sse2)

+global sym(vp9_get_mb_ss_sse2) PRIVATE

 sym(vp9_get_mb_ss_sse2):

     push        rbp

     mov         rbp, rsp

@@ -80,7 +80,7 @@

 ;    unsigned int    *  SSE,

 ;    int             *  Sum

;)

-global sym(vp9_get16x16var_sse2)

+global sym(vp9_get16x16var_sse2) PRIVATE

 sym(vp9_get16x16var_sse2):

     push        rbp

     mov         rbp, rsp

@@ -224,7 +224,7 @@

 ;    unsigned int    *  SSE,

 ;    int             *  Sum

;)

-global sym(vp9_get8x8var_sse2)

+global sym(vp9_get8x8var_sse2) PRIVATE

 sym(vp9_get8x8var_sse2):

     push        rbp

     mov         rbp, rsp

@@ -413,7 +413,7 @@

 ;    unsigned int *sumsquared;;

;)

-global sym(vp9_filter_block2d_bil_var_sse2)

+global sym(vp9_filter_block2d_bil_var_sse2) PRIVATE

 sym(vp9_filter_block2d_bil_var_sse2):

     push        rbp

     mov         rbp, rsp

@@ -690,7 +690,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_half_horiz_vert_variance8x_h_sse2)

+global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE

 sym(vp9_half_horiz_vert_variance8x_h_sse2):

     push        rbp

     mov         rbp, rsp

@@ -812,7 +812,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_half_horiz_vert_variance16x_h_sse2)

+global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE

 sym(vp9_half_horiz_vert_variance16x_h_sse2):

     push        rbp

     mov         rbp, rsp

@@ -928,7 +928,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_half_vert_variance8x_h_sse2)

+global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE

 sym(vp9_half_vert_variance8x_h_sse2):

     push        rbp

     mov         rbp, rsp

@@ -1035,7 +1035,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_half_vert_variance16x_h_sse2)

+global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE

 sym(vp9_half_vert_variance16x_h_sse2):

     push        rbp

     mov         rbp, rsp

@@ -1143,7 +1143,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_half_horiz_variance8x_h_sse2)

+global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE

 sym(vp9_half_horiz_variance8x_h_sse2):

     push        rbp

     mov         rbp, rsp

@@ -1248,7 +1248,7 @@

 ;    int *sum,

 ;    unsigned int *sumsquared

;)

-global sym(vp9_half_horiz_variance16x_h_sse2)

+global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE

 sym(vp9_half_horiz_variance16x_h_sse2):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_variance_impl_ssse3.asm

+++ b/vp9/encoder/x86/vp9_variance_impl_ssse3.asm

@@ -29,7 +29,7 @@

;)

 ;Note: The filter coefficient at offset=0 is 128. Since the second register

 ;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.

-global sym(vp9_filter_block2d_bil_var_ssse3)

+global sym(vp9_filter_block2d_bil_var_ssse3) PRIVATE

 sym(vp9_filter_block2d_bil_var_ssse3):

     push        rbp

     mov         rbp, rsp

--- a/vp9/encoder/x86/vp9_x86_csystemdependent.c

+++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c

@@ -9,7 +9,7 @@

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #include "vpx_ports/x86.h"

 #include "vp9/encoder/vp9_variance.h"

 #include "vp9/encoder/vp9_onyx_int.h"

--- a/vp9/vp9_common.mk

+++ b/vp9/vp9_common.mk

@@ -17,6 +17,7 @@

 VP9_COMMON_SRCS-yes += common/vp9_blockd.c

 VP9_COMMON_SRCS-yes += common/vp9_coefupdateprobs.h

 VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c

+VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h

 VP9_COMMON_SRCS-yes += common/vp9_entropy.c

 VP9_COMMON_SRCS-yes += common/vp9_entropymode.c

 VP9_COMMON_SRCS-yes += common/vp9_entropymv.c

@@ -56,6 +57,7 @@

 VP9_COMMON_SRCS-yes += common/vp9_subpixel.h

 VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.h

 VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h

+VP9_COMMON_SRCS-yes += common/vp9_textblit.h

 VP9_COMMON_SRCS-yes += common/vp9_treecoder.h

 VP9_COMMON_SRCS-yes += common/vp9_invtrans.c

 VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c

@@ -84,7 +86,6 @@

 VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.c

 VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h

 VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c

-VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_idctllm_mmx.asm

 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm

 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm

 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_subpixel_mmx.asm

--- a/vp9/vp9cx.mk

+++ b/vp9/vp9cx.mk

@@ -31,6 +31,7 @@

 VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c

 VP9_CX_SRCS-yes += encoder/vp9_dct.c

 VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c

+VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h

 VP9_CX_SRCS-yes += encoder/vp9_encodeintra.c

 VP9_CX_SRCS-yes += encoder/vp9_encodemb.c

 VP9_CX_SRCS-yes += encoder/vp9_encodemv.c

@@ -58,6 +59,7 @@

 VP9_CX_SRCS-yes += encoder/vp9_modecosts.c

 VP9_CX_SRCS-yes += encoder/vp9_onyx_if.c

 VP9_CX_SRCS-yes += encoder/vp9_picklpf.c

+VP9_CX_SRCS-yes += encoder/vp9_picklpf.h

 VP9_CX_SRCS-yes += encoder/vp9_psnr.c

 VP9_CX_SRCS-yes += encoder/vp9_quantize.c

 VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c

@@ -87,6 +89,7 @@

 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm

 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm

 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm

+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.h

 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_subtract_mmx.asm

 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm

 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c

--- a/vp9/vp9dx.mk

+++ b/vp9/vp9dx.mk

@@ -21,6 +21,7 @@

 VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.c

 VP9_DX_SRCS-yes += decoder/vp9_decodemv.c

 VP9_DX_SRCS-yes += decoder/vp9_decodframe.c

+VP9_DX_SRCS-yes += decoder/vp9_decodframe.h

 VP9_DX_SRCS-yes += decoder/vp9_dequantize.c

 VP9_DX_SRCS-yes += decoder/vp9_detokenize.c

 VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h

@@ -35,9 +36,6 @@

 VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes))

-VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/vp9_x86_dsystemdependent.c

-VP9_DX_SRCS-$(HAVE_MMX) += decoder/x86/vp9_dequantize_mmx.asm

-VP9_DX_SRCS-$(HAVE_MMX) += decoder/x86/vp9_idct_blk_mmx.c

 VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c

 $(eval $(call asm_offsets_template,\

--- a/vpx/vpx_codec.h

+++ b/vpx/vpx_codec.h

@@ -49,15 +49,22 @@

 #ifndef DEPRECATED

 #if defined(__GNUC__) && __GNUC__

 #define DEPRECATED          __attribute__ ((deprecated))

-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */

 #elif defined(_MSC_VER)

 #define DEPRECATED

-#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */

 #else

 #define DEPRECATED

+#endif

+#endif  /* DEPRECATED */

+#ifndef DECLSPEC_DEPRECATED

+#if defined(__GNUC__) && __GNUC__

 #define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */

+#elif defined(_MSC_VER)

+#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */

+#else

+#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */

 #endif

-#endif

+#endif  /* DECLSPEC_DEPRECATED */

   /*!\brief Decorator indicating a function is potentially unused */

 #ifdef UNUSED

--- a/vpx/vpx_codec.mk

+++ b/vpx/vpx_codec.mk

@@ -11,6 +11,21 @@

 API_EXPORTS += exports

+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h

+API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h

+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h

+API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h

+API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h

+API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h

+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h

+API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h

+API_DOC_SRCS-yes += vpx_codec.h

+API_DOC_SRCS-yes += vpx_decoder.h

+API_DOC_SRCS-yes += vpx_encoder.h

+API_DOC_SRCS-yes += vpx_image.h

 API_SRCS-yes                += src/vpx_decoder.c

 API_SRCS-yes                += vpx_decoder.h

 API_SRCS-yes                += src/vpx_encoder.c

@@ -23,3 +38,4 @@

 API_SRCS-yes                += vpx_codec_impl_bottom.h

 API_SRCS-yes                += vpx_codec_impl_top.h

 API_SRCS-yes                += vpx_image.h

+API_SRCS-$(BUILD_LIBVPX)    += vpx_integer.h

--- a/vpx_mem/include/vpx_mem_intrnl.h

+++ b/vpx_mem/include/vpx_mem_intrnl.h

@@ -11,7 +11,7 @@

 #ifndef __VPX_MEM_INTRNL_H__

 #define __VPX_MEM_INTRNL_H__

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #ifndef CONFIG_MEM_MANAGER

 # if defined(VXWORKS)

--- a/vpx_mem/vpx_mem_tracker.c

+++ b/vpx_mem/vpx_mem_tracker.c

@@ -22,7 +22,7 @@

    in the memory_tracker struct as well as calls to create/destroy/lock/unlock

    the mutex in vpx_memory_tracker_init/Destroy and memory_tracker_lock_mutex/unlock_mutex

*/

-#include "vpx_ports/config.h"

+#include "./vpx_config.h"

 #if defined(__uClinux__)

 # include <lddk.h>

--- a/vpx_ports/arm_cpudetect.c

+++ b/vpx_ports/arm_cpudetect.c

@@ -136,7 +136,6 @@

 #elif defined(__linux__) /* end __ANDROID__ */

-#elif defined(__linux__) /* end __ANDROID__ */

 #include <stdio.h>

 int arm_cpu_caps(void) {

--- /dev/null

+++ b/vpx_ports/emmintrin_compat.h

@@ -1,0 +1,55 @@

+/*

+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H

+#define VPX_PORTS_EMMINTRIN_COMPAT_H

+#if defined(__GNUC__) && __GNUC__ < 4

+/* From emmintrin.h (gcc 4.5.3) */

+/* Casts between various SP, DP, INT vector types.  Note that these do no

+   conversion of values, they just change the type.  */

+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))

+_mm_castpd_ps(__m128d __A)

+{

+  return (__m128) __A;

+}

+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))

+_mm_castpd_si128(__m128d __A)

+{

+  return (__m128i) __A;

+}

+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))

+_mm_castps_pd(__m128 __A)

+{

+  return (__m128d) __A;

+}

+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))

+_mm_castps_si128(__m128 __A)

+{

+  return (__m128i) __A;

+}

+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))

+_mm_castsi128_ps(__m128i __A)

+{

+  return (__m128) __A;

+}

+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))

+_mm_castsi128_pd(__m128i __A)

+{

+  return (__m128d) __A;

+}

+#endif

+#endif

--- /dev/null

+++ b/vpx_ports/vpx_ports.mk

@@ -1,0 +1,26 @@

+##

+##  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+##

+##  Use of this source code is governed by a BSD-style license

+##  that can be found in the LICENSE file in the root of the source

+##  tree. An additional intellectual property rights grant can be found

+##  in the file PATENTS.  All contributing project authors may

+##  be found in the AUTHORS file in the root of the source tree.

+##

+PORTS_SRCS-yes += vpx_ports.mk

+PORTS_SRCS-$(BUILD_LIBVPX) += asm_offsets.h

+PORTS_SRCS-$(BUILD_LIBVPX) += mem.h

+PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h

+ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)

+PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm

+PORTS_SRCS-$(BUILD_LIBVPX) += x86.h

+PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm

+PORTS_SRCS-$(BUILD_LIBVPX) += x86_cpuid.c

+endif

+PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c

+PORTS_SRCS-$(ARCH_ARM) += arm.h

--- a/vpx_scale/vpx_scale.mk

+++ b/vpx_scale/vpx_scale.mk

@@ -5,7 +5,9 @@

 SCALE_SRCS-yes += generic/yv12config.c

 SCALE_SRCS-yes += generic/yv12extend.c

 SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c

+SCALE_SRCS-yes += vpx_scale_asm_offsets.c

 SCALE_SRCS-yes += vpx_scale_rtcd.c

+SCALE_SRCS-yes += vpx_scale_rtcd.sh

 #neon

 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)

--- a/vpxenc.c

+++ b/vpxenc.c

@@ -23,7 +23,9 @@

 #include <limits.h>

 #include <assert.h>

 #include "vpx/vpx_encoder.h"

+#if CONFIG_DECODERS

 #include "vpx/vpx_decoder.h"

+#endif

 #if USE_POSIX_MMAP

 #include <sys/types.h>

 #include <sys/stat.h>

@@ -2174,6 +2176,7 @@

     ctx_exit_on_error(&stream->encoder, "Failed to control codec");

+#if CONFIG_DECODERS

   if (global->test_decode) {

     int width, height;

@@ -2186,6 +2189,7 @@

     stream->ref_enc.frame_type = VP8_LAST_FRAME;

     stream->ref_dec.frame_type = VP8_LAST_FRAME;

+#endif

@@ -2278,16 +2282,19 @@

         stream->nbytes += pkt->data.raw.sz;

         *got_data = 1;

+#if CONFIG_DECODERS

         if (global->test_decode) {

           vpx_codec_decode(&stream->decoder, pkt->data.frame.buf,

                            pkt->data.frame.sz, NULL, 0);

           ctx_exit_on_error(&stream->decoder, "Failed to decode frame");

+#endif

         break;

       case VPX_CODEC_STATS_PKT:

         stream->frames_out++;

-        fprintf(stderr, " %6luS",

-                (unsigned long)pkt->data.twopass_stats.sz);

+        if (!global->quiet)

+          fprintf(stderr, " %6luS",

+                  (unsigned long)pkt->data.twopass_stats.sz);

         stats_write(&stream->stats,

                     pkt->data.twopass_stats.buf,

                     pkt->data.twopass_stats.sz);

--

⑨