shithub: libvpx

--- a/build/make/Android.mk

+++ b/build/make/Android.mk

@@ -112,12 +112,12 @@

 # Use ads2gas script to convert from RVCT format to GAS format.  This passes

 #  puts the processed file under $(ASM_CNV_PATH).  Local clean rule

 #  to handle removing these

-ASM_CNV_OFFSETS_DEPEND = $(ASM_CNV_PATH)/asm_com_offsets.asm

+ASM_CNV_OFFSETS_DEPEND = $(ASM_CNV_PATH)/vp8_asm_com_offsets.asm

 ifeq ($(CONFIG_VP8_DECODER), yes)

-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/asm_dec_offsets.asm

+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_dec_offsets.asm

 endif

 ifeq ($(CONFIG_VP8_ENCODER), yes)

-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/asm_enc_offsets.asm

+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm

 endif

 .PRECIOUS: %.asm.s

@@ -190,19 +190,19 @@

 include $(BUILD_SHARED_LIBRARY)

 $(eval $(call asm_offsets_template,\

-    $(ASM_CNV_PATH)/asm_com_offsets.asm, \

-    $(LIBVPX_PATH)/vp8/common/asm_com_offsets.c))

+    $(ASM_CNV_PATH)/vp8_asm_com_offsets.asm, \

+    $(LIBVPX_PATH)/vp8/common/vp8_asm_com_offsets.c))

 ifeq ($(CONFIG_VP8_DECODER), yes)

   $(eval $(call asm_offsets_template,\

-    $(ASM_CNV_PATH)/asm_dec_offsets.asm, \

-    $(LIBVPX_PATH)/vp8/decoder/asm_dec_offsets.c))

+    $(ASM_CNV_PATH)/vp8_asm_dec_offsets.asm, \

+    $(LIBVPX_PATH)/vp8/decoder/vp8_asm_dec_offsets.c))

 endif

 ifeq ($(CONFIG_VP8_ENCODER), yes)

   $(eval $(call asm_offsets_template,\

-    $(ASM_CNV_PATH)/asm_enc_offsets.asm, \

-    $(LIBVPX_PATH)/vp8/encoder/asm_enc_offsets.c))

+    $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \

+    $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))

 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)

--- a/build/make/Makefile

+++ b/build/make/Makefile

@@ -377,7 +377,7 @@

     DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/yasm.rules

     DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/obj_int_extract.bat

     DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh

-    # Include obj_int_extract if we use offsets from asm_*_offsets

+    # Include obj_int_extract if we use offsets from *_asm_*_offsets

     DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64)    += build/make/obj_int_extract.c

     DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl

     DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas_apple.pl

--- a/build/make/obj_int_extract.c

+++ b/build/make/obj_int_extract.c

@@ -144,7 +144,7 @@

           /* Location of string is cacluated each time from the

            * start of the string buffer.  On darwin the symbols

            * are prefixed by "_", so we bump the pointer by 1.

-           * The target value is defined as an int in asm_*_offsets.c,

+           * The target value is defined as an int in *_asm_*_offsets.c,

            * which is 4 bytes on all targets we currently use.

*/

           if (bits == 32) {

@@ -446,7 +446,7 @@

             if (strcmp(section_name, ".bss")) {

               if (sizeof(val) != sym.st_size) {

                 /* The target value is declared as an int in

-                 * asm_*_offsets.c, which is 4 bytes on all

+                 * *_asm_*_offsets.c, which is 4 bytes on all

                  * targets we currently use. Complain loudly if

                  * this is not true.

*/

@@ -528,7 +528,7 @@

             if ((strcmp(section_name, ".bss"))) {

               if (sizeof(val) != sym.st_size) {

                 /* The target value is declared as an int in

-                 * asm_*_offsets.c, which is 4 bytes on all

+                 * *_asm_*_offsets.c, which is 4 bytes on all

                  * targets we currently use. Complain loudly if

                  * this is not true.

*/

--- a/build/x86-msvs/obj_int_extract.bat

+++ b/build/x86-msvs/obj_int_extract.bat

@@ -14,10 +14,10 @@

 obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"

 obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"

-cl /I "./" /I "%1" /nologo /c "%1/vp8/common/asm_com_offsets.c"

-cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/asm_dec_offsets.c"

-cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/asm_enc_offsets.c"

-obj_int_extract.exe rvds "asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"

-obj_int_extract.exe rvds "asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"

-obj_int_extract.exe rvds "asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

+cl /I "./" /I "%1" /nologo /c "%1/vp8/common/vp8_asm_com_offsets.c"

+cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/vp8_asm_dec_offsets.c"

+cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"

+obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"

+obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"

+obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

--- a/configure

+++ b/configure

@@ -299,6 +299,7 @@

     multi_res_encoding

     temporal_denoising

     experimental

+    decrypt

     ${EXPERIMENT_LIST}

 CMDLINE_SELECT="

@@ -348,6 +349,7 @@

     multi_res_encoding

     temporal_denoising

     experimental

+    decrypt

 process_cmdline() {

--- a/test/decode_test_driver.cc

+++ b/test/decode_test_driver.cc

@@ -14,18 +14,13 @@

 #include "test/video_source.h"

 namespace libvpx_test {

-void Decoder::DecodeFrame(const uint8_t *cxdata, int size) {

-  if (!decoder_.priv) {

-    const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_,

-                                                        CodecInterface(),

-                                                        &cfg_, 0);

-    ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError();

-  }

+vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, int size) {

   vpx_codec_err_t res_dec;

+  InitOnce();

   REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_,

                                                   cxdata, size, NULL, 0));

-  ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError();

+  return res_dec;

 void DecoderTest::RunLoop(CompressedVideoSource *video) {

@@ -35,7 +30,9 @@

   // Decode frames.

   for (video->Begin(); video->cxdata(); video->Next()) {

-    decoder->DecodeFrame(video->cxdata(), video->frame_size());

+    vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),

+                                                   video->frame_size());

+    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();

     DxDataIterator dec_iter = decoder->GetDxData();

     const vpx_image_t *img = NULL;

--- a/test/decode_test_driver.h

+++ b/test/decode_test_driver.h

@@ -42,7 +42,7 @@

 class Decoder {

  public:

   Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)

-      : cfg_(cfg), deadline_(deadline) {

+      : cfg_(cfg), deadline_(deadline), init_done_(false) {

     memset(&decoder_, 0, sizeof(decoder_));

@@ -50,7 +50,7 @@

     vpx_codec_destroy(&decoder_);

-  void DecodeFrame(const uint8_t *cxdata, int size);

+  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, int size);

   DxDataIterator GetDxData() {

     return DxDataIterator(&decoder_);

@@ -61,12 +61,16 @@

   void Control(int ctrl_id, int arg) {

+    InitOnce();

     const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);

     ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();

- protected:

-  virtual const vpx_codec_iface_t* CodecInterface() const = 0;

+  void Control(int ctrl_id, const void *arg) {

+    InitOnce();

+    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);

+    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();

+  }

   const char* DecodeError() {

     const char *detail = vpx_codec_error_detail(&decoder_);

@@ -73,9 +77,23 @@

     return detail ? detail : vpx_codec_error(&decoder_);

+ protected:

+  virtual const vpx_codec_iface_t* CodecInterface() const = 0;

+  void InitOnce() {

+    if (!init_done_) {

+      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,

+                                                     CodecInterface(),

+                                                     &cfg_, 0);

+      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();

+      init_done_ = true;

+    }

+  }

   vpx_codec_ctx_t     decoder_;

   vpx_codec_dec_cfg_t cfg_;

   unsigned int        deadline_;

+  bool                init_done_;

};

 // Common test functionality for all Decoder tests.

--- a/test/encode_test_driver.cc

+++ b/test/encode_test_driver.cc

@@ -175,8 +175,9 @@

           case VPX_CODEC_CX_FRAME_PKT:

             has_cxdata = true;

             if (decoder && DoDecode()) {

-              decoder->DecodeFrame((const uint8_t*)pkt->data.frame.buf,

-                                   pkt->data.frame.sz);

+              vpx_codec_err_t res_dec = decoder->DecodeFrame(

+                  (const uint8_t*)pkt->data.frame.buf, pkt->data.frame.sz);

+              ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();

               has_dxdata = true;

             ASSERT_GE(pkt->data.frame.pts, last_pts_);

--- a/test/test.mk

+++ b/test/test.mk

@@ -31,6 +31,7 @@

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc

##

 ## WHITE BOX TESTS

##

@@ -55,6 +56,7 @@

 LIBVPX_TEST_SRCS-yes                   += sixtap_predict_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc

+LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc

 endif # VP8

--- a/test/vp8_boolcoder_test.cc

+++ b/test/vp8_boolcoder_test.cc

@@ -26,6 +26,20 @@

 namespace {

 const int num_tests = 10;

+void encrypt_buffer(uint8_t *buffer, int size, const uint8_t *key) {

+  for (int i = 0; i < size; ++i) {

+    buffer[i] ^= key[i % 32];

+  }

+}

+const uint8_t secret_key[32] = {

+  234,  32,   2,  3,  4, 230,   6,  11,

+    0, 132,  22, 23, 45,  21, 124, 255,

+    0,  43,  52,  3, 23,  63,  99,   7,

+  120,   8, 252, 84,  4,  83,   6,  13

+};

 }  // namespace

 using libvpx_test::ACMRandom;

@@ -71,7 +85,12 @@

         vp8_stop_encode(&bw);

         BOOL_DECODER br;

-        vp8dx_start_decode(&br, bw_buffer, buffer_size);

+#if CONFIG_DECRYPT

+        encrypt_buffer(bw_buffer, buffer_size, secret_key);

+#endif

+        vp8dx_start_decode(&br, bw_buffer, buffer_size, bw_buffer, secret_key);

         bit_rnd.Reset(random_seed);

         for (int i = 0; i < bits_to_test; ++i) {

           if (bit_method == 2) {

--- /dev/null

+++ b/test/vp8_decrypt_test.cc

@@ -1,0 +1,65 @@

+/*

+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <cstdio>

+#include <cstdlib>

+#include <string>

+#include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/decode_test_driver.h"

+#include "test/ivf_video_source.h"

+#if CONFIG_DECRYPT

+namespace {

+const uint8_t decrypt_key[32] = {

+  255, 0, 0, 0, 0, 0, 0, 0,

+    0, 0, 0, 0, 0, 0, 0, 0,

+    0, 0, 0, 0, 0, 0, 0, 0,

+    0, 0, 0, 0, 0, 0, 0, 0,

+};

+}  // namespace

+namespace libvpx_test {

+TEST(TestDecrypt, NullKey) {

+  vpx_codec_dec_cfg_t cfg = {0};

+  vpx_codec_ctx_t decoder = {0};

+  vpx_codec_err_t res = vpx_codec_dec_init(&decoder, &vpx_codec_vp8_dx_algo,

+                                           &cfg, 0);

+  ASSERT_EQ(VPX_CODEC_OK, res);

+  res = vpx_codec_control(&decoder, VP8_SET_DECRYPT_KEY, NULL);

+  ASSERT_EQ(VPX_CODEC_INVALID_PARAM, res);

+}

+TEST(TestDecrypt, DecryptWorks) {

+  libvpx_test::IVFVideoSource video("vp80-00-comprehensive-001.ivf");

+  video.Init();

+  vpx_codec_dec_cfg_t dec_cfg = {0};

+  Decoder decoder(dec_cfg, 0);

+  // Zero decrypt key (by default)

+  video.Begin();

+  vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size());

+  ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();

+  // Non-zero decrypt key

+  video.Next();

+  decoder.Control(VP8_SET_DECRYPT_KEY, decrypt_key);

+  res = decoder.DecodeFrame(video.cxdata(), video.frame_size());

+  ASSERT_NE(VPX_CODEC_OK, res) << decoder.DecodeError();

+}

+}  // namespace libvpx_test

+#endif  // CONFIG_DECRYPT

--- a/vp8/common/asm_com_offsets.c

+++ /dev/null

@@ -1,52 +1,0 @@

-/*

- *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "vpx_config.h"

-#include "vpx/vpx_codec.h"

-#include "vpx_ports/asm_offsets.h"

-#include "vp8/common/blockd.h"

-#if CONFIG_POSTPROC

-#include "postproc.h"

-#endif /* CONFIG_POSTPROC */

-BEGIN

-#if CONFIG_POSTPROC

-/* mfqe.c / filter_by_weight */

-DEFINE(MFQE_PRECISION_VAL,                      MFQE_PRECISION);

-#endif /* CONFIG_POSTPROC */

-END

-/* add asserts for any offset that is not supported by assembly code */

-/* add asserts for any size that is not supported by assembly code */

-#if HAVE_MEDIA

-/* switch case in vp8_intra4x4_predict_armv6 is based on these enumerated values */

-ct_assert(B_DC_PRED, B_DC_PRED == 0);

-ct_assert(B_TM_PRED, B_TM_PRED == 1);

-ct_assert(B_VE_PRED, B_VE_PRED == 2);

-ct_assert(B_HE_PRED, B_HE_PRED == 3);

-ct_assert(B_LD_PRED, B_LD_PRED == 4);

-ct_assert(B_RD_PRED, B_RD_PRED == 5);

-ct_assert(B_VR_PRED, B_VR_PRED == 6);

-ct_assert(B_VL_PRED, B_VL_PRED == 7);

-ct_assert(B_HD_PRED, B_HD_PRED == 8);

-ct_assert(B_HU_PRED, B_HU_PRED == 9);

-#endif

-#if HAVE_SSE2

-#if CONFIG_POSTPROC

-/* vp8_filter_by_weight16x16 and 8x8 */

-ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)

-#endif /* CONFIG_POSTPROC */

-#endif /* HAVE_SSE2 */

--- a/vp8/common/loopfilter.c

+++ b/vp8/common/loopfilter.c

@@ -156,39 +156,38 @@

             continue;

-        lvl_ref = lvl_seg;

         /* INTRA_FRAME */

         ref = INTRA_FRAME;

         /* Apply delta for reference frame */

-        lvl_ref += mbd->ref_lf_deltas[ref];

+        lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];

         /* Apply delta for Intra modes */

         mode = 0; /* B_PRED */

         /* Only the split mode BPRED has a further special case */

-        lvl_mode = lvl_ref +  mbd->mode_lf_deltas[mode];

-        lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */

+        lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];

+        /* clamp */

+        lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;

         lfi->lvl[seg][ref][mode] = lvl_mode;

         mode = 1; /* all the rest of Intra modes */

-        lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref)  : 0; /* clamp */

+        /* clamp */

+        lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0;

         lfi->lvl[seg][ref][mode] = lvl_mode;

         /* LAST, GOLDEN, ALT */

         for(ref = 1; ref < MAX_REF_FRAMES; ref++)

-            int lvl_ref = lvl_seg;

             /* Apply delta for reference frame */

-            lvl_ref += mbd->ref_lf_deltas[ref];

+            lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];

             /* Apply delta for Inter modes */

             for (mode = 1; mode < 4; mode++)

                 lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];

-                lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */

+                /* clamp */

+                lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;

                 lfi->lvl[seg][ref][mode] = lvl_mode;

--- a/vp8/common/loopfilter_filters.c

+++ b/vp8/common/loopfilter_filters.c

@@ -54,7 +54,7 @@

     signed char ps0, qs0;

     signed char ps1, qs1;

-    signed char vp8_filter, Filter1, Filter2;

+    signed char filter_value, Filter1, Filter2;

     signed char u;

     ps1 = (signed char) * op1 ^ 0x80;

@@ -63,19 +63,19 @@

     qs1 = (signed char) * oq1 ^ 0x80;

     /* add outer taps if we have high edge variance */

-    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);

-    vp8_filter &= hev;

+    filter_value = vp8_signed_char_clamp(ps1 - qs1);

+    filter_value &= hev;

     /* inner taps */

-    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));

-    vp8_filter &= mask;

+    filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));

+    filter_value &= mask;

     /* save bottom 3 bits so that we round one side +4 and the other +3

      * if it equals 4 we'll set to adjust by -1 to account for the fact

      * we'd round 3 the other way

*/

-    Filter1 = vp8_signed_char_clamp(vp8_filter + 4);

-    Filter2 = vp8_signed_char_clamp(vp8_filter + 3);

+    Filter1 = vp8_signed_char_clamp(filter_value + 4);

+    Filter2 = vp8_signed_char_clamp(filter_value + 3);

     Filter1 >>= 3;

     Filter2 >>= 3;

     u = vp8_signed_char_clamp(qs0 - Filter1);

@@ -82,16 +82,16 @@

     *oq0 = u ^ 0x80;

     u = vp8_signed_char_clamp(ps0 + Filter2);

     *op0 = u ^ 0x80;

-    vp8_filter = Filter1;

+    filter_value = Filter1;

     /* outer tap adjustments */

-    vp8_filter += 1;

-    vp8_filter >>= 1;

-    vp8_filter &= ~hev;

+    filter_value += 1;

+    filter_value >>= 1;

+    filter_value &= ~hev;

-    u = vp8_signed_char_clamp(qs1 - vp8_filter);

+    u = vp8_signed_char_clamp(qs1 - filter_value);

     *oq1 = u ^ 0x80;

-    u = vp8_signed_char_clamp(ps1 + vp8_filter);

+    u = vp8_signed_char_clamp(ps1 + filter_value);

     *op1 = u ^ 0x80;

@@ -162,7 +162,7 @@

                            uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)

     signed char s, u;

-    signed char vp8_filter, Filter1, Filter2;

+    signed char filter_value, Filter1, Filter2;

     signed char ps2 = (signed char) * op2 ^ 0x80;

     signed char ps1 = (signed char) * op1 ^ 0x80;

     signed char ps0 = (signed char) * op0 ^ 0x80;

@@ -171,11 +171,11 @@

     signed char qs2 = (signed char) * oq2 ^ 0x80;

     /* add outer taps if we have high edge variance */

-    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);

-    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));

-    vp8_filter &= mask;

+    filter_value = vp8_signed_char_clamp(ps1 - qs1);

+    filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));

+    filter_value &= mask;

-    Filter2 = vp8_filter;

+    Filter2 = filter_value;

     Filter2 &= hev;

     /* save bottom 3 bits so that we round one side +4 and the other +3 */

@@ -188,8 +188,8 @@

     /* only apply wider filter if not high edge variance */

-    vp8_filter &= ~hev;

-    Filter2 = vp8_filter;

+    filter_value &= ~hev;

+    Filter2 = filter_value;

     /* roughly 3/7th difference across boundary */

     u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);

@@ -291,7 +291,7 @@

 static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)

-    signed char vp8_filter, Filter1, Filter2;

+    signed char filter_value, Filter1, Filter2;

     signed char p1 = (signed char) * op1 ^ 0x80;

     signed char p0 = (signed char) * op0 ^ 0x80;

     signed char q0 = (signed char) * oq0 ^ 0x80;

@@ -298,17 +298,17 @@

     signed char q1 = (signed char) * oq1 ^ 0x80;

     signed char u;

-    vp8_filter = vp8_signed_char_clamp(p1 - q1);

-    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));

-    vp8_filter &= mask;

+    filter_value = vp8_signed_char_clamp(p1 - q1);

+    filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));

+    filter_value &= mask;

     /* save bottom 3 bits so that we round one side +4 and the other +3 */

-    Filter1 = vp8_signed_char_clamp(vp8_filter + 4);

+    Filter1 = vp8_signed_char_clamp(filter_value + 4);

     Filter1 >>= 3;

     u = vp8_signed_char_clamp(q0 - Filter1);

     *oq0  = u ^ 0x80;

-    Filter2 = vp8_signed_char_clamp(vp8_filter + 3);

+    Filter2 = vp8_signed_char_clamp(filter_value + 3);

     Filter2 >>= 3;

     u = vp8_signed_char_clamp(p0 + Filter2);

     *op0 = u ^ 0x80;

--- a/vp8/common/reconintra.c

+++ b/vp8/common/reconintra.c

@@ -36,7 +36,6 @@

     case DC_PRED:

         int expected_dc;

-        int i;

         int shift;

         int average = 0;

@@ -168,7 +167,6 @@

         int expected_udc;

         int expected_vdc;

-        int i;

         int shift;

         int Uaverage = 0;

         int Vaverage = 0;

@@ -217,8 +215,6 @@

     break;

     case V_PRED:

-        int i;

         for (i = 0; i < 8; i++)

             vpx_memcpy(upred_ptr, uabove_row, 8);

@@ -231,8 +227,6 @@

     break;

     case H_PRED:

-        int i;

         for (i = 0; i < 8; i++)

             vpx_memset(upred_ptr, uleft_col[i], 8);

@@ -245,8 +239,6 @@

     break;

     case TM_PRED:

-        int i;

         for (i = 0; i < 8; i++)

             for (j = 0; j < 8; j++)

--- a/vp8/common/rtcd_defs.sh

+++ b/vp8/common/rtcd_defs.sh

@@ -444,8 +444,9 @@

 # Quantizer

 prototype void vp8_regular_quantize_b "struct block *, struct blockd *"

-specialize vp8_regular_quantize_b sse2 sse4_1

-vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4

+specialize vp8_regular_quantize_b sse2 #sse4_1

+# TODO(johann) Update sse4 implementation and re-enable

+#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4

 prototype void vp8_fast_quantize_b "struct block *, struct blockd *"

 specialize vp8_fast_quantize_b sse2 ssse3 media neon

--- /dev/null

+++ b/vp8/common/vp8_asm_com_offsets.c

@@ -1,0 +1,52 @@

+/*

+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vpx_config.h"

+#include "vpx/vpx_codec.h"

+#include "vpx_ports/asm_offsets.h"

+#include "vp8/common/blockd.h"

+#if CONFIG_POSTPROC

+#include "postproc.h"

+#endif /* CONFIG_POSTPROC */

+BEGIN

+#if CONFIG_POSTPROC

+/* mfqe.c / filter_by_weight */

+DEFINE(MFQE_PRECISION_VAL,                      MFQE_PRECISION);

+#endif /* CONFIG_POSTPROC */

+END

+/* add asserts for any offset that is not supported by assembly code */

+/* add asserts for any size that is not supported by assembly code */

+#if HAVE_MEDIA

+/* switch case in vp8_intra4x4_predict_armv6 is based on these enumerated values */

+ct_assert(B_DC_PRED, B_DC_PRED == 0);

+ct_assert(B_TM_PRED, B_TM_PRED == 1);

+ct_assert(B_VE_PRED, B_VE_PRED == 2);

+ct_assert(B_HE_PRED, B_HE_PRED == 3);

+ct_assert(B_LD_PRED, B_LD_PRED == 4);

+ct_assert(B_RD_PRED, B_RD_PRED == 5);

+ct_assert(B_VR_PRED, B_VR_PRED == 6);

+ct_assert(B_VL_PRED, B_VL_PRED == 7);

+ct_assert(B_HD_PRED, B_HD_PRED == 8);

+ct_assert(B_HU_PRED, B_HU_PRED == 9);

+#endif

+#if HAVE_SSE2

+#if CONFIG_POSTPROC

+/* vp8_filter_by_weight16x16 and 8x8 */

+ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)

+#endif /* CONFIG_POSTPROC */

+#endif /* HAVE_SSE2 */

--- a/vp8/common/x86/postproc_mmx.asm

+++ b/vp8/common/x86/postproc_mmx.asm

@@ -61,7 +61,7 @@

             mov         rcx,        8

 .init_borderd                                                    ; initialize borders

             lea         rdi,        [rdi + rax]

-            movq        [rdi],      xmm1

+            movq        [rdi],      mm1

             dec         rcx

             jne         .init_borderd

@@ -193,7 +193,6 @@

             movq        mm4,        [sym(vp8_rv) + rcx*2]

 %endif

             paddw       mm1,        mm4

-            ;paddw     xmm1,       eight8s

             psraw       mm1,        4

             packuswb    mm1,        mm0

--- a/vp8/decoder/asm_dec_offsets.c

+++ /dev/null

@@ -1,26 +1,0 @@

-/*

- *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "vpx_ports/asm_offsets.h"

-#include "onyxd_int.h"

-BEGIN

-DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));

-DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));

-DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));

-DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));

-DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));

-END

-/* add asserts for any offset that is not supported by assembly code */

-/* add asserts for any size that is not supported by assembly code */

--- a/vp8/decoder/dboolhuff.c

+++ b/vp8/decoder/dboolhuff.c

@@ -10,12 +10,12 @@

 #include "dboolhuff.h"

-#include "vpx_ports/mem.h"

-#include "vpx_mem/vpx_mem.h"

 int vp8dx_start_decode(BOOL_DECODER *br,

                        const unsigned char *source,

-                       unsigned int source_sz)

+                       unsigned int source_sz,

+                       const unsigned char *origin,

+                       const unsigned char *key)

     br->user_buffer_end = source+source_sz;

     br->user_buffer     = source;

@@ -22,6 +22,8 @@

     br->value    = 0;

     br->count    = -8;

     br->range    = 255;

+    br->origin = origin;

+    br->key = key;

     if (source_sz && !source)

         return 1;

@@ -32,19 +34,34 @@

     return 0;

 void vp8dx_bool_decoder_fill(BOOL_DECODER *br)

-    const unsigned char *bufptr;

-    const unsigned char *bufend;

-    VP8_BD_VALUE         value;

-    int                  count;

-    bufend = br->user_buffer_end;

-    bufptr = br->user_buffer;

-    value = br->value;

-    count = br->count;

+    const unsigned char *bufptr = br->user_buffer;

+    const unsigned char *bufend = br->user_buffer_end;

+    VP8_BD_VALUE value = br->value;

+    int count = br->count;

+    int shift = VP8_BD_VALUE_SIZE - 8 - (count + 8);

+    size_t bits_left = (bufend - bufptr)*CHAR_BIT;

+    int x = (int)(shift + CHAR_BIT - bits_left);

+    int loop_end = 0;

-    VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);

+    if(x >= 0)

+    {

+        count += VP8_LOTS_OF_BITS;

+        loop_end = x;

+    }

+    if (x < 0 || bits_left)

+    {

+        while(shift >= loop_end)

+        {

+            count += CHAR_BIT;

+            value |= ((VP8_BD_VALUE)decrypt_byte(bufptr, br->origin,

+                                                 br->key)) << shift;

+            ++bufptr;

+            shift -= CHAR_BIT;

+        }

+    }

     br->user_buffer = bufptr;

     br->value = value;

--- a/vp8/decoder/dboolhuff.h

+++ b/vp8/decoder/dboolhuff.h

@@ -9,10 +9,12 @@

*/

-#ifndef DBOOLHUFF_H

-#define DBOOLHUFF_H

+#ifndef DBOOLHUFF_H_

+#define DBOOLHUFF_H_

 #include <stddef.h>

 #include <limits.h>

 #include "vpx_config.h"

 #include "vpx_ports/mem.h"

 #include "vpx/vpx_integer.h"

@@ -19,12 +21,25 @@

 typedef size_t VP8_BD_VALUE;

-# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)

+#define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)

 /*This is meant to be a large, positive constant that can still be efficiently

    loaded as an immediate (on platforms like ARM, for example).

   Even relatively modest values like 100 would work fine.*/

-# define VP8_LOTS_OF_BITS (0x40000000)

+#define VP8_LOTS_OF_BITS (0x40000000)

+static unsigned char decrypt_byte(const unsigned char *ch,

+                                  const unsigned char *origin,

+                                  const unsigned char *key)

+{

+#if CONFIG_DECRYPT

+    const int offset = (int)(ch - origin);

+    return *ch ^ key[offset % 32];  // VP8_DECRYPT_KEY_SIZE

+#else

+    return *ch;

+#endif

+}

 typedef struct

     const unsigned char *user_buffer_end;

@@ -32,6 +47,8 @@

     VP8_BD_VALUE         value;

     int                  count;

     unsigned int         range;

+    const unsigned char *origin;

+    const unsigned char *key;

 } BOOL_DECODER;

 DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);

@@ -38,41 +55,13 @@

 int vp8dx_start_decode(BOOL_DECODER *br,

                        const unsigned char *source,

-                       unsigned int source_sz);

+                       unsigned int source_sz,

+                       const unsigned char *origin,

+                       const unsigned char *key);

 void vp8dx_bool_decoder_fill(BOOL_DECODER *br);

-/*The refill loop is used in several places, so define it in a macro to make

-   sure they're all consistent.

-  An inline function would be cleaner, but has a significant penalty, because

-   multiple BOOL_DECODER fields must be modified, and the compiler is not smart

-   enough to eliminate the stores to those fields and the subsequent reloads

-   from them when inlining the function.*/

-#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \

-    do \

-    { \

-        int shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); \

-        int loop_end, x; \

-        size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \

-        \

-        x = (int)(shift + CHAR_BIT - bits_left); \

-        loop_end = 0; \

-        if(x >= 0) \

-        { \

-            (_count) += VP8_LOTS_OF_BITS; \

-            loop_end = x; \

-            if(!bits_left) break; \

-        } \

-        while(shift >= loop_end) \

-        { \

-            (_count) += CHAR_BIT; \

-            (_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \

-            shift -= CHAR_BIT; \

-        } \

-    } \

-    while(0) \

 static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {

     unsigned int bit = 0;

     VP8_BD_VALUE value;

@@ -151,4 +140,5 @@

     /* No error. */

     return 0;

-#endif

+#endif  // DBOOLHUFF_H_

--- a/vp8/decoder/decodemv.h

+++ b/vp8/decoder/decodemv.h

@@ -8,7 +8,11 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#ifndef DECODEMV_H_

+#define DECODEMV_H_

 #include "onyxd_int.h"

 void vp8_decode_mode_mvs(VP8D_COMP *);

+#endif  // DECODEMV_H_

--- a/vp8/decoder/decoderthreading.h

+++ b/vp8/decoder/decoderthreading.h

@@ -8,19 +8,15 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#ifndef DECODERTHREADING_H_

+#define DECODERTHREADING_H_

-#ifndef _DECODER_THREADING_H

-#define _DECODER_THREADING_H

 #if CONFIG_MULTITHREAD

-extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);

-extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);

-extern void vp8_decoder_create_threads(VP8D_COMP *pbi);

-extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);

-extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);

+void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);

+void vp8_decoder_remove_threads(VP8D_COMP *pbi);

+void vp8_decoder_create_threads(VP8D_COMP *pbi);

+void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);

+void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);

 #endif

-#endif

+#endif  // DECODERTHREADING_H_

--- a/vp8/decoder/decodframe.c

+++ b/vp8/decoder/decodframe.c

@@ -893,7 +893,9 @@

         if (vp8dx_start_decode(bool_decoder,

                                pbi->fragments.ptrs[partition_idx],

-                               pbi->fragments.sizes[partition_idx]))

+                               pbi->fragments.sizes[partition_idx],

+                               pbi->fragments.ptrs[0],

+                               pbi->decrypt_key))

             vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,

                                "Failed to allocate bool decoder %d",

                                partition_idx);

@@ -980,10 +982,11 @@

 int vp8_decode_frame(VP8D_COMP *pbi)

-    vp8_reader *const bc = & pbi->mbc[8];

-    VP8_COMMON *const pc = & pbi->common;

-    MACROBLOCKD *const xd  = & pbi->mb;

+    vp8_reader *const bc = &pbi->mbc[8];

+    VP8_COMMON *const pc = &pbi->common;

+    MACROBLOCKD *const xd  = &pbi->mb;

     const unsigned char *data = pbi->fragments.ptrs[0];

+    const unsigned char *const origin = data;

     const unsigned char *data_end =  data + pbi->fragments.sizes[0];

     ptrdiff_t first_partition_length_in_bytes;

@@ -1016,13 +1019,21 @@

     else

-        pc->frame_type = (FRAME_TYPE)(data[0] & 1);

-        pc->version = (data[0] >> 1) & 7;

-        pc->show_frame = (data[0] >> 4) & 1;

+        const unsigned char data0 = decrypt_byte(data + 0, origin,

+                                                 pbi->decrypt_key);

+        const unsigned char data1 = decrypt_byte(data + 1, origin,

+                                                 pbi->decrypt_key);

+        const unsigned char data2 = decrypt_byte(data + 2, origin,

+                                                 pbi->decrypt_key);

+        pc->frame_type = (FRAME_TYPE)(data0 & 1);

+        pc->version = (data0 >> 1) & 7;

+        pc->show_frame = (data0 >> 4) & 1;

         first_partition_length_in_bytes =

-            (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;

+            (data0 | (data1 << 8) | (data2 << 16)) >> 5;

-        if (!pbi->ec_active && (data + first_partition_length_in_bytes > data_end

+        if (!pbi->ec_active &&

+            (data + first_partition_length_in_bytes > data_end

             || data + first_partition_length_in_bytes < data))

             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,

                                "Truncated packet or corrupt partition 0 length");

@@ -1040,7 +1051,13 @@

*/

             if (!pbi->ec_active || data + 3 < data_end)

-                if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)

+                const unsigned char data0 = decrypt_byte(data + 0, origin,

+                                                         pbi->decrypt_key);

+                const unsigned char data1 = decrypt_byte(data + 1, origin,

+                                                         pbi->decrypt_key);

+                const unsigned char data2 = decrypt_byte(data + 2, origin,

+                                                         pbi->decrypt_key);

+                if (data0 != 0x9d || data1 != 0x01 || data2 != 0x2a)

                     vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,

                                    "Invalid frame sync code");

@@ -1051,10 +1068,19 @@

*/

             if (!pbi->ec_active || data + 6 < data_end)

-                pc->Width = (data[3] | (data[4] << 8)) & 0x3fff;

-                pc->horiz_scale = data[4] >> 6;

-                pc->Height = (data[5] | (data[6] << 8)) & 0x3fff;

-                pc->vert_scale = data[6] >> 6;

+                const unsigned char data3 = decrypt_byte(data + 3, origin,

+                                                         pbi->decrypt_key);

+                const unsigned char data4 = decrypt_byte(data + 4, origin,

+                                                         pbi->decrypt_key);

+                const unsigned char data5 = decrypt_byte(data + 5, origin,

+                                                         pbi->decrypt_key);

+                const unsigned char data6 = decrypt_byte(data + 6, origin,

+                                                         pbi->decrypt_key);

+                pc->Width = (data3 | (data4 << 8)) & 0x3fff;

+                pc->horiz_scale = data4 >> 6;

+                pc->Height = (data5 | (data6 << 8)) & 0x3fff;

+                pc->vert_scale = data6 >> 6;

             data += 7;

@@ -1072,7 +1098,11 @@

     init_frame(pbi);

-    if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data)))

+    if (vp8dx_start_decode(bc,

+                           data,

+                           (unsigned int)(data_end - data),

+                           pbi->fragments.ptrs[0],

+                           pbi->decrypt_key))

         vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,

                            "Failed to allocate bool decoder 0");

     if (pc->frame_type == KEY_FRAME) {

--- a/vp8/decoder/detokenize.h

+++ b/vp8/decoder/detokenize.h

@@ -8,13 +8,12 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#ifndef DETOKENIZE_H_

+#define DETOKENIZE_H_

-#ifndef DETOKENIZE_H

-#define DETOKENIZE_H

 #include "onyxd_int.h"

 void vp8_reset_mb_tokens_context(MACROBLOCKD *x);

 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);

-#endif /* DETOKENIZE_H */

+#endif  // DETOKENIZE_H

--- a/vp8/decoder/ec_types.h

+++ b/vp8/decoder/ec_types.h

@@ -14,7 +14,6 @@

 #define MAX_OVERLAPS 16

 /* The area (pixel area in Q6) the block pointed to by bmi overlaps

  * another block with.

*/

@@ -48,4 +47,4 @@

     MV_REFERENCE_FRAME ref_frame;

 } EC_BLOCK;

-#endif /* VP8_DEC_EC_TYPES_H */

+#endif  // VP8_DEC_EC_TYPES_H

--- a/vp8/decoder/error_concealment.c

+++ b/vp8/decoder/error_concealment.c

@@ -8,13 +8,13 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include <assert.h>

 #include "error_concealment.h"

 #include "onyxd_int.h"

 #include "decodemv.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vp8/common/findnearmv.h"

-#include <assert.h>

 #define MIN(x,y) (((x)<(y))?(x):(y))

 #define MAX(x,y) (((x)>(y))?(x):(y))

--- a/vp8/decoder/error_concealment.h

+++ b/vp8/decoder/error_concealment.h

@@ -9,8 +9,8 @@

*/

-#ifndef ERROR_CONCEALMENT_H

-#define ERROR_CONCEALMENT_H

+#ifndef ERROR_CONCEALMENT_H_

+#define ERROR_CONCEALMENT_H_

 #include "onyxd_int.h"

 #include "ec_types.h"

@@ -38,4 +38,4 @@

*/

 void vp8_conceal_corrupt_mb(MACROBLOCKD *xd);

-#endif

+#endif  // ERROR_CONCEALMENT_H_

--- a/vp8/decoder/onyxd_int.h

+++ b/vp8/decoder/onyxd_int.h

@@ -9,8 +9,9 @@

*/

-#ifndef __INC_VP8D_INT_H

-#define __INC_VP8D_INT_H

+#ifndef ONYXD_INT_H_

+#define ONYXD_INT_H_

 #include "vpx_config.h"

 #include "vp8/common/onyxd.h"

 #include "treereader.h"

@@ -121,6 +122,7 @@

     int independent_partitions;

     int frame_corrupt_residual;

+    const unsigned char *decrypt_key;

 } VP8D_COMP;

 int vp8_decode_frame(VP8D_COMP *cpi);

@@ -145,4 +147,4 @@

     } while(0)

 #endif

-#endif

+#endif  // ONYXD_INT_H_

--- a/vp8/decoder/threading.c

+++ b/vp8/decoder/threading.c

@@ -36,7 +36,7 @@

 } while (0)

-extern void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);

+void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);

 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)

--- a/vp8/decoder/treereader.h

+++ b/vp8/decoder/treereader.h

@@ -9,11 +9,10 @@

*/

-#ifndef tree_reader_h

-#define tree_reader_h 1

+#ifndef TREEREADER_H_

+#define TREEREADER_H_

 #include "vp8/common/treecoder.h"

 #include "dboolhuff.h"

 typedef BOOL_DECODER vp8_reader;

@@ -20,7 +19,7 @@

 #define vp8_read vp8dx_decode_bool

 #define vp8_read_literal vp8_decode_value

-#define vp8_read_bit( R) vp8_read( R, vp8_prob_half)

+#define vp8_read_bit(R) vp8_read(R, vp8_prob_half)

 /* Intent of tree data structure is to make decoding trivial. */

@@ -38,4 +37,4 @@

     return -i;

-#endif /* tree_reader_h */

+#endif  // TREEREADER_H_

--- /dev/null

+++ b/vp8/decoder/vp8_asm_dec_offsets.c

@@ -1,0 +1,26 @@

+/*

+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vpx_ports/asm_offsets.h"

+#include "onyxd_int.h"

+BEGIN

+DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));

+DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));

+DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));

+DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));

+DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));

+END

+/* add asserts for any offset that is not supported by assembly code */

+/* add asserts for any size that is not supported by assembly code */

--- a/vp8/encoder/asm_enc_offsets.c

+++ /dev/null

@@ -1,93 +1,0 @@

-/*

- *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "vpx_ports/asm_offsets.h"

-#include "vpx_config.h"

-#include "block.h"

-#include "vp8/common/blockd.h"

-#include "onyx_int.h"

-#include "treewriter.h"

-#include "tokenize.h"

-BEGIN

-/* regular quantize */

-DEFINE(vp8_block_coeff,                         offsetof(BLOCK, coeff));

-DEFINE(vp8_block_zbin,                          offsetof(BLOCK, zbin));

-DEFINE(vp8_block_round,                         offsetof(BLOCK, round));

-DEFINE(vp8_block_quant,                         offsetof(BLOCK, quant));

-DEFINE(vp8_block_quant_fast,                    offsetof(BLOCK, quant_fast));

-DEFINE(vp8_block_zbin_extra,                    offsetof(BLOCK, zbin_extra));

-DEFINE(vp8_block_zrun_zbin_boost,               offsetof(BLOCK, zrun_zbin_boost));

-DEFINE(vp8_block_quant_shift,                   offsetof(BLOCK, quant_shift));

-DEFINE(vp8_blockd_qcoeff,                       offsetof(BLOCKD, qcoeff));

-DEFINE(vp8_blockd_dequant,                      offsetof(BLOCKD, dequant));

-DEFINE(vp8_blockd_dqcoeff,                      offsetof(BLOCKD, dqcoeff));

-DEFINE(vp8_blockd_eob,                          offsetof(BLOCKD, eob));

-/* subtract */

-DEFINE(vp8_block_base_src,                      offsetof(BLOCK, base_src));

-DEFINE(vp8_block_src,                           offsetof(BLOCK, src));

-DEFINE(vp8_block_src_diff,                      offsetof(BLOCK, src_diff));

-DEFINE(vp8_block_src_stride,                    offsetof(BLOCK, src_stride));

-DEFINE(vp8_blockd_predictor,                    offsetof(BLOCKD, predictor));

-/* pack tokens */

-DEFINE(vp8_writer_lowvalue,                     offsetof(vp8_writer, lowvalue));

-DEFINE(vp8_writer_range,                        offsetof(vp8_writer, range));

-DEFINE(vp8_writer_count,                        offsetof(vp8_writer, count));

-DEFINE(vp8_writer_pos,                          offsetof(vp8_writer, pos));

-DEFINE(vp8_writer_buffer,                       offsetof(vp8_writer, buffer));

-DEFINE(vp8_writer_buffer_end,                   offsetof(vp8_writer, buffer_end));

-DEFINE(vp8_writer_error,                        offsetof(vp8_writer, error));

-DEFINE(tokenextra_token,                        offsetof(TOKENEXTRA, Token));

-DEFINE(tokenextra_extra,                        offsetof(TOKENEXTRA, Extra));

-DEFINE(tokenextra_context_tree,                 offsetof(TOKENEXTRA, context_tree));

-DEFINE(tokenextra_skip_eob_node,                offsetof(TOKENEXTRA, skip_eob_node));

-DEFINE(TOKENEXTRA_SZ,                           sizeof(TOKENEXTRA));

-DEFINE(vp8_extra_bit_struct_sz,                 sizeof(vp8_extra_bit_struct));

-DEFINE(vp8_token_value,                         offsetof(vp8_token, value));

-DEFINE(vp8_token_len,                           offsetof(vp8_token, Len));

-DEFINE(vp8_extra_bit_struct_tree,               offsetof(vp8_extra_bit_struct, tree));

-DEFINE(vp8_extra_bit_struct_prob,               offsetof(vp8_extra_bit_struct, prob));

-DEFINE(vp8_extra_bit_struct_len,                offsetof(vp8_extra_bit_struct, Len));

-DEFINE(vp8_extra_bit_struct_base_val,           offsetof(vp8_extra_bit_struct, base_val));

-DEFINE(vp8_comp_tplist,                         offsetof(VP8_COMP, tplist));

-DEFINE(vp8_comp_common,                         offsetof(VP8_COMP, common));

-DEFINE(vp8_comp_bc ,                            offsetof(VP8_COMP, bc));

-DEFINE(vp8_writer_sz ,                          sizeof(vp8_writer));

-DEFINE(tokenlist_start,                         offsetof(TOKENLIST, start));

-DEFINE(tokenlist_stop,                          offsetof(TOKENLIST, stop));

-DEFINE(TOKENLIST_SZ,                            sizeof(TOKENLIST));

-DEFINE(vp8_common_mb_rows,                      offsetof(VP8_COMMON, mb_rows));

-END

-/* add asserts for any offset that is not supported by assembly code

- * add asserts for any size that is not supported by assembly code

- * These are used in vp8cx_pack_tokens.  They are hard coded so if their sizes

- * change they will have to be adjusted.

- */

-#if HAVE_EDSP

-ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)

-ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)

-#endif

--- a/vp8/encoder/bitstream.c

+++ b/vp8/encoder/bitstream.c

@@ -980,6 +980,12 @@

                               int prob_garf

+    assert(prob_intra >= 0);

+    assert(prob_intra <= 255);

+    assert(prob_last >= 0);

+    assert(prob_last <= 255);

+    assert(prob_garf >= 0);

+    assert(prob_garf <= 255);

     ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(prob_intra);

     ref_frame_cost[LAST_FRAME]    = vp8_cost_one(prob_intra)

                                     + vp8_cost_zero(prob_last);

--- a/vp8/encoder/block.h

+++ b/vp8/encoder/block.h

@@ -37,7 +37,7 @@

     /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */

     short *quant;

     short *quant_fast;

-    unsigned char *quant_shift;

+    short *quant_shift;

     short *zbin;

     short *zrun_zbin_boost;

     short *round;

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -641,7 +641,6 @@

     for (i = 0; i < MAX_MODES; i ++)

         cpi->mode_check_freq[i] = 0;

-        cpi->mode_chosen_counts[i] = 0;

     cpi->mb.mbs_tested_so_far = 0;

@@ -2816,6 +2815,8 @@

         if (cpi->common.refresh_alt_ref_frame)

             cpi->prob_intra_coded += 40;

+            if (cpi->prob_intra_coded > 255)

+                cpi->prob_intra_coded = 255;

             cpi->prob_last_coded = 200;

             cpi->prob_gf_coded = 1;

@@ -4597,9 +4598,6 @@

                         cpi->common.current_video_frame,

                         cm->frame_type, cm->refresh_golden_frame,

                         cm->refresh_alt_ref_frame);

-            for (i = 0; i < MAX_MODES; i++)

-                fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);

             fprintf(fmodes, "\n");

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -282,17 +282,17 @@

     DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);

-    DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]);

+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);

-    DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);

+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);

-    DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);

+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);

     DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);

@@ -349,7 +349,6 @@

     int ambient_err;

     unsigned int mode_check_freq[MAX_MODES];

-    unsigned int mode_chosen_counts[MAX_MODES];

     int rd_baseline_thresh[MAX_MODES];

--- a/vp8/encoder/quantize.c

+++ b/vp8/encoder/quantize.c

@@ -50,8 +50,8 @@

         if (x >= zbin)

             x += round_ptr[rc];

-            y  = (((x * quant_ptr[rc]) >> 16) + x)

-                 >> quant_shift_ptr[rc];             /* quantize (x) */

+            y  = ((((x * quant_ptr[rc]) >> 16) + x)

+                 * quant_shift_ptr[rc]) >> 16;       /* quantize (x) */

             x  = (y ^ sz) - sz;                      /* get the sign back */

             qcoeff_ptr[rc] = x;                      /* write to destination */

             dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */

@@ -113,7 +113,7 @@

     short *zbin_ptr        = b->zbin;

     short *round_ptr       = b->round;

     short *quant_ptr       = b->quant;

-    unsigned char *quant_shift_ptr = b->quant_shift;

+    short *quant_shift_ptr = b->quant_shift;

     short *qcoeff_ptr      = d->qcoeff;

     short *dqcoeff_ptr     = d->dqcoeff;

     short *dequant_ptr     = d->dequant;

@@ -138,8 +138,8 @@

         if (x >= zbin)

             x += round_ptr[rc];

-            y  = (((x * quant_ptr[rc]) >> 16) + x)

-                 >> quant_shift_ptr[rc];             /* quantize (x) */

+            y  = ((((x * quant_ptr[rc]) >> 16) + x)

+                 * quant_shift_ptr[rc]) >> 16;       /* quantize (x) */

             x  = (y ^ sz) - sz;                      /* get the sign back */

             qcoeff_ptr[rc]  = x;                     /* write to destination */

             dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */

@@ -167,7 +167,7 @@

     int sz;

     short *coeff_ptr;

     short *quant_ptr;

-    unsigned char *quant_shift_ptr;

+    short *quant_shift_ptr;

     short *qcoeff_ptr;

     short *dqcoeff_ptr;

     short *dequant_ptr;

@@ -198,7 +198,7 @@

         if (x >= dq)

             /* Quantize x. */

-            y  = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];

+            y  = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16;

             /* Put the sign back. */

             x = (y + sz) ^ sz;

             /* Save the coefficient and its dequantized value. */

@@ -406,7 +406,7 @@

 #define EXACT_QUANT

 #ifdef EXACT_QUANT

 static void invert_quant(int improved_quant, short *quant,

-                               unsigned char *shift, short d)

+                         short *shift, short d)

     if(improved_quant)

@@ -418,11 +418,15 @@

         t = 1 + (1<<(16+l))/d;

         *quant = (short)(t - (1<<16));

         *shift = l;

+        /* use multiplication and constant shift by 16 */

+        *shift = 1 << (16 - *shift);

     else

         *quant = (1 << 16) / d;

         *shift = 0;

+        /* use multiplication and constant shift by 16 */

+        *shift = 1 << (16 - *shift);

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -2512,9 +2512,6 @@

                 x->rd_thresh_mult[best_mode_index];

-    /* Note how often each mode chosen as best */

-    cpi->mode_chosen_counts[best_mode_index] ++;

 #if CONFIG_TEMPORAL_DENOISING

     if (cpi->oxcf.noise_sensitivity)

--- /dev/null

+++ b/vp8/encoder/vp8_asm_enc_offsets.c

@@ -1,0 +1,93 @@

+/*

+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vpx_ports/asm_offsets.h"

+#include "vpx_config.h"

+#include "block.h"

+#include "vp8/common/blockd.h"

+#include "onyx_int.h"

+#include "treewriter.h"

+#include "tokenize.h"

+BEGIN

+/* regular quantize */

+DEFINE(vp8_block_coeff,                         offsetof(BLOCK, coeff));

+DEFINE(vp8_block_zbin,                          offsetof(BLOCK, zbin));

+DEFINE(vp8_block_round,                         offsetof(BLOCK, round));

+DEFINE(vp8_block_quant,                         offsetof(BLOCK, quant));

+DEFINE(vp8_block_quant_fast,                    offsetof(BLOCK, quant_fast));

+DEFINE(vp8_block_zbin_extra,                    offsetof(BLOCK, zbin_extra));

+DEFINE(vp8_block_zrun_zbin_boost,               offsetof(BLOCK, zrun_zbin_boost));

+DEFINE(vp8_block_quant_shift,                   offsetof(BLOCK, quant_shift));

+DEFINE(vp8_blockd_qcoeff,                       offsetof(BLOCKD, qcoeff));

+DEFINE(vp8_blockd_dequant,                      offsetof(BLOCKD, dequant));

+DEFINE(vp8_blockd_dqcoeff,                      offsetof(BLOCKD, dqcoeff));

+DEFINE(vp8_blockd_eob,                          offsetof(BLOCKD, eob));

+/* subtract */

+DEFINE(vp8_block_base_src,                      offsetof(BLOCK, base_src));

+DEFINE(vp8_block_src,                           offsetof(BLOCK, src));

+DEFINE(vp8_block_src_diff,                      offsetof(BLOCK, src_diff));

+DEFINE(vp8_block_src_stride,                    offsetof(BLOCK, src_stride));

+DEFINE(vp8_blockd_predictor,                    offsetof(BLOCKD, predictor));

+/* pack tokens */

+DEFINE(vp8_writer_lowvalue,                     offsetof(vp8_writer, lowvalue));

+DEFINE(vp8_writer_range,                        offsetof(vp8_writer, range));

+DEFINE(vp8_writer_count,                        offsetof(vp8_writer, count));

+DEFINE(vp8_writer_pos,                          offsetof(vp8_writer, pos));

+DEFINE(vp8_writer_buffer,                       offsetof(vp8_writer, buffer));

+DEFINE(vp8_writer_buffer_end,                   offsetof(vp8_writer, buffer_end));

+DEFINE(vp8_writer_error,                        offsetof(vp8_writer, error));

+DEFINE(tokenextra_token,                        offsetof(TOKENEXTRA, Token));

+DEFINE(tokenextra_extra,                        offsetof(TOKENEXTRA, Extra));

+DEFINE(tokenextra_context_tree,                 offsetof(TOKENEXTRA, context_tree));

+DEFINE(tokenextra_skip_eob_node,                offsetof(TOKENEXTRA, skip_eob_node));

+DEFINE(TOKENEXTRA_SZ,                           sizeof(TOKENEXTRA));

+DEFINE(vp8_extra_bit_struct_sz,                 sizeof(vp8_extra_bit_struct));

+DEFINE(vp8_token_value,                         offsetof(vp8_token, value));

+DEFINE(vp8_token_len,                           offsetof(vp8_token, Len));

+DEFINE(vp8_extra_bit_struct_tree,               offsetof(vp8_extra_bit_struct, tree));

+DEFINE(vp8_extra_bit_struct_prob,               offsetof(vp8_extra_bit_struct, prob));

+DEFINE(vp8_extra_bit_struct_len,                offsetof(vp8_extra_bit_struct, Len));

+DEFINE(vp8_extra_bit_struct_base_val,           offsetof(vp8_extra_bit_struct, base_val));

+DEFINE(vp8_comp_tplist,                         offsetof(VP8_COMP, tplist));

+DEFINE(vp8_comp_common,                         offsetof(VP8_COMP, common));

+DEFINE(vp8_comp_bc ,                            offsetof(VP8_COMP, bc));

+DEFINE(vp8_writer_sz ,                          sizeof(vp8_writer));

+DEFINE(tokenlist_start,                         offsetof(TOKENLIST, start));

+DEFINE(tokenlist_stop,                          offsetof(TOKENLIST, stop));

+DEFINE(TOKENLIST_SZ,                            sizeof(TOKENLIST));

+DEFINE(vp8_common_mb_rows,                      offsetof(VP8_COMMON, mb_rows));

+END

+/* add asserts for any offset that is not supported by assembly code

+ * add asserts for any size that is not supported by assembly code

+ * These are used in vp8cx_pack_tokens.  They are hard coded so if their sizes

+ * change they will have to be adjusted.

+ */

+#if HAVE_EDSP

+ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)

+ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)

+#endif

--- a/vp8/encoder/x86/quantize_sse2.asm

+++ /dev/null

@@ -1,245 +1,0 @@

-;

-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

-;

-;  Use of this source code is governed by a BSD-style license and patent

-;  grant that can be found in the LICENSE file in the root of the source

-;  tree. All contributing project authors may be found in the AUTHORS

-;  file in the root of the source tree.

-;

-%include "vpx_ports/x86_abi_support.asm"

-%include "vp8_asm_enc_offsets.asm"

-; void vp8_regular_quantize_b_sse2 | arg

-;  (BLOCK  *b,                     |  0

-;   BLOCKD *d)                     |  1

-global sym(vp8_regular_quantize_b_sse2) PRIVATE

-sym(vp8_regular_quantize_b_sse2):

-    push        rbp

-    mov         rbp, rsp

-    SAVE_XMM 7

-    GET_GOT     rbx

-%if ABI_IS_32BIT

-    push        rdi

-    push        rsi

-%else

-  %if LIBVPX_YASM_WIN64

-    push        rdi

-    push        rsi

-  %endif

-%endif

-    ALIGN_STACK 16, rax

-    %define zrun_zbin_boost   0  ;  8

-    %define abs_minus_zbin    8  ; 32

-    %define temp_qcoeff       40 ; 32

-    %define qcoeff            72 ; 32

-    %define stack_size        104

-    sub         rsp, stack_size

-    ; end prolog

-%if ABI_IS_32BIT

-    mov         rdi, arg(0)                 ; BLOCK *b

-    mov         rsi, arg(1)                 ; BLOCKD *d

-%else

-  %if LIBVPX_YASM_WIN64

-    mov         rdi, rcx                    ; BLOCK *b

-    mov         rsi, rdx                    ; BLOCKD *d

-  %else

-    ;mov         rdi, rdi                    ; BLOCK *b

-    ;mov         rsi, rsi                    ; BLOCKD *d

-  %endif

-%endif

-    mov         rdx, [rdi + vp8_block_coeff] ; coeff_ptr

-    mov         rcx, [rdi + vp8_block_zbin] ; zbin_ptr

-    movd        xmm7, [rdi + vp8_block_zbin_extra] ; zbin_oq_value

-    ; z

-    movdqa      xmm0, [rdx]

-    movdqa      xmm4, [rdx + 16]

-    mov         rdx, [rdi + vp8_block_round] ; round_ptr

-    pshuflw     xmm7, xmm7, 0

-    punpcklwd   xmm7, xmm7                  ; duplicated zbin_oq_value

-    movdqa      xmm1, xmm0

-    movdqa      xmm5, xmm4

-    ; sz

-    psraw       xmm0, 15

-    psraw       xmm4, 15

-    ; (z ^ sz)

-    pxor        xmm1, xmm0

-    pxor        xmm5, xmm4

-    ; x = abs(z)

-    psubw       xmm1, xmm0

-    psubw       xmm5, xmm4

-    movdqa      xmm2, [rcx]

-    movdqa      xmm3, [rcx + 16]

-    mov         rcx, [rdi + vp8_block_quant] ; quant_ptr

-    ; *zbin_ptr + zbin_oq_value

-    paddw       xmm2, xmm7

-    paddw       xmm3, xmm7

-    ; x - (*zbin_ptr + zbin_oq_value)

-    psubw       xmm1, xmm2

-    psubw       xmm5, xmm3

-    movdqa      [rsp + abs_minus_zbin], xmm1

-    movdqa      [rsp + abs_minus_zbin + 16], xmm5

-    ; add (zbin_ptr + zbin_oq_value) back

-    paddw       xmm1, xmm2

-    paddw       xmm5, xmm3

-    movdqa      xmm2, [rdx]

-    movdqa      xmm6, [rdx + 16]

-    movdqa      xmm3, [rcx]

-    movdqa      xmm7, [rcx + 16]

-    ; x + round

-    paddw       xmm1, xmm2

-    paddw       xmm5, xmm6

-    ; y = x * quant_ptr >> 16

-    pmulhw      xmm3, xmm1

-    pmulhw      xmm7, xmm5

-    ; y += x

-    paddw       xmm1, xmm3

-    paddw       xmm5, xmm7

-    movdqa      [rsp + temp_qcoeff], xmm1

-    movdqa      [rsp + temp_qcoeff + 16], xmm5

-    pxor        xmm6, xmm6

-    ; zero qcoeff

-    movdqa      [rsp + qcoeff], xmm6

-    movdqa      [rsp + qcoeff + 16], xmm6

-    mov         rdx, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr

-    mov         rax, [rdi + vp8_block_quant_shift] ; quant_shift_ptr

-    mov         [rsp + zrun_zbin_boost], rdx

-%macro ZIGZAG_LOOP 1

-    ; x

-    movsx       ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]

-    ; if (x >= zbin)

-    sub         cx, WORD PTR[rdx]           ; x - zbin

-    lea         rdx, [rdx + 2]              ; zbin_boost_ptr++

-    jl          .rq_zigzag_loop_%1           ; x < zbin

-    movsx       edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]

-    ; downshift by quant_shift[rc]

-    movsx       cx, BYTE PTR[rax + %1]      ; quant_shift_ptr[rc]

-    sar         edi, cl                     ; also sets Z bit

-    je          .rq_zigzag_loop_%1           ; !y

-    mov         WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]

-    mov         rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost

-.rq_zigzag_loop_%1:

-%endmacro

-; in vp8_default_zig_zag1d order: see vp8/common/entropy.c

-ZIGZAG_LOOP  0

-ZIGZAG_LOOP  1

-ZIGZAG_LOOP  4

-ZIGZAG_LOOP  8

-ZIGZAG_LOOP  5

-ZIGZAG_LOOP  2

-ZIGZAG_LOOP  3

-ZIGZAG_LOOP  6

-ZIGZAG_LOOP  9

-ZIGZAG_LOOP 12

-ZIGZAG_LOOP 13

-ZIGZAG_LOOP 10

-ZIGZAG_LOOP  7

-ZIGZAG_LOOP 11

-ZIGZAG_LOOP 14

-ZIGZAG_LOOP 15

-    movdqa      xmm2, [rsp + qcoeff]

-    movdqa      xmm3, [rsp + qcoeff + 16]

-    mov         rcx, [rsi + vp8_blockd_dequant] ; dequant_ptr

-    mov         rdi, [rsi + vp8_blockd_dqcoeff] ; dqcoeff_ptr

-    ; y ^ sz

-    pxor        xmm2, xmm0

-    pxor        xmm3, xmm4

-    ; x = (y ^ sz) - sz

-    psubw       xmm2, xmm0

-    psubw       xmm3, xmm4

-    ; dequant

-    movdqa      xmm0, [rcx]

-    movdqa      xmm1, [rcx + 16]

-    mov         rcx, [rsi + vp8_blockd_qcoeff] ; qcoeff_ptr

-    pmullw      xmm0, xmm2

-    pmullw      xmm1, xmm3

-    movdqa      [rcx], xmm2        ; store qcoeff

-    movdqa      [rcx + 16], xmm3

-    movdqa      [rdi], xmm0        ; store dqcoeff

-    movdqa      [rdi + 16], xmm1

-    mov         rcx, [rsi + vp8_blockd_eob]

-    ; select the last value (in zig_zag order) for EOB

-    pcmpeqw     xmm2, xmm6

-    pcmpeqw     xmm3, xmm6

-    ; !

-    pcmpeqw     xmm6, xmm6

-    pxor        xmm2, xmm6

-    pxor        xmm3, xmm6

-    ; mask inv_zig_zag

-    pand        xmm2, [GLOBAL(inv_zig_zag)]

-    pand        xmm3, [GLOBAL(inv_zig_zag + 16)]

-    ; select the max value

-    pmaxsw      xmm2, xmm3

-    pshufd      xmm3, xmm2, 00001110b

-    pmaxsw      xmm2, xmm3

-    pshuflw     xmm3, xmm2, 00001110b

-    pmaxsw      xmm2, xmm3

-    pshuflw     xmm3, xmm2, 00000001b

-    pmaxsw      xmm2, xmm3

-    movd        eax, xmm2

-    and         eax, 0xff

-    mov         BYTE PTR [rcx], al          ; store eob

-    ; begin epilog

-    add         rsp, stack_size

-    pop         rsp

-%if ABI_IS_32BIT

-    pop         rsi

-    pop         rdi

-%else

-  %if LIBVPX_YASM_WIN64

-    pop         rsi

-    pop         rdi

-  %endif

-%endif

-    RESTORE_GOT

-    RESTORE_XMM

-    pop         rbp

-    ret

-SECTION_RODATA

-align 16

-inv_zig_zag:

-  dw 0x0001, 0x0002, 0x0006, 0x0007

-  dw 0x0003, 0x0005, 0x0008, 0x000d

-  dw 0x0004, 0x0009, 0x000c, 0x000e

-  dw 0x000a, 0x000b, 0x000f, 0x0010

--- a/vp8/encoder/x86/quantize_sse2_intrinsics.c

+++ b/vp8/encoder/x86/quantize_sse2_intrinsics.c

@@ -9,13 +9,139 @@

*/

-#include "vp8/common/blockd.h"

-#include "vp8/common/entropy.h"

+#include "vpx_config.h"

+#include "vp8_rtcd.h"

+#include "vpx_ports/x86.h"

+#include "vpx_mem/vpx_mem.h"

 #include "vp8/encoder/block.h"

+#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */

-#include <mmintrin.h> //MMX

-#include <xmmintrin.h> //SSE

-#include <emmintrin.h> //SSE2

+#include <mmintrin.h> /* MMX */

+#include <xmmintrin.h> /* SSE */

+#include <emmintrin.h> /* SSE2 */

+#define SELECT_EOB(i, z) \

+    do { \

+        short boost = *zbin_boost_ptr; \

+        int cmp = (x[z] < boost) | (y[z] == 0); \

+        zbin_boost_ptr++; \

+        if (cmp) \

+            goto select_eob_end_##i; \

+        qcoeff_ptr[z] = y[z]; \

+        eob = i; \

+        zbin_boost_ptr = b->zrun_zbin_boost; \

+        select_eob_end_##i:; \

+    } while (0)

+void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)

+{

+    char eob = 0;

+    short *zbin_boost_ptr  = b->zrun_zbin_boost;

+    short *qcoeff_ptr      = d->qcoeff;

+    DECLARE_ALIGNED_ARRAY(16, short, x, 16);

+    DECLARE_ALIGNED_ARRAY(16, short, y, 16);

+    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;

+    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));

+    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));

+    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));

+    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));

+    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);

+    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));

+    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));

+    __m128i round0 = _mm_load_si128((__m128i *)(b->round));

+    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));

+    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));

+    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));

+    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));

+    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));

+    vpx_memset(qcoeff_ptr, 0, 32);

+    /* Duplicate to all lanes. */

+    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);

+    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);

+    /* Sign of z: z >> 15 */

+    sz0 = _mm_srai_epi16(z0, 15);

+    sz1 = _mm_srai_epi16(z1, 15);

+    /* x = abs(z): (z ^ sz) - sz */

+    x0 = _mm_xor_si128(z0, sz0);

+    x1 = _mm_xor_si128(z1, sz1);

+    x0 = _mm_sub_epi16(x0, sz0);

+    x1 = _mm_sub_epi16(x1, sz1);

+    /* zbin[] + zbin_extra */

+    zbin0 = _mm_add_epi16(zbin0, zbin_extra);

+    zbin1 = _mm_add_epi16(zbin1, zbin_extra);

+    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance

+     * the equation because boost is the only value which can change:

+     * x - (zbin[] + extra) >= boost */

+    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);

+    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);

+    _mm_store_si128((__m128i *)(x), x_minus_zbin0);

+    _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);

+    /* All the remaining calculations are valid whether they are done now with

+     * simd or later inside the loop one at a time. */

+    x0 = _mm_add_epi16(x0, round0);

+    x1 = _mm_add_epi16(x1, round1);

+    y0 = _mm_mulhi_epi16(x0, quant0);

+    y1 = _mm_mulhi_epi16(x1, quant1);

+    y0 = _mm_add_epi16(y0, x0);

+    y1 = _mm_add_epi16(y1, x1);

+    /* Instead of shifting each value independently we convert the scaling

+     * factor with 1 << (16 - shift) so we can use multiply/return high half. */

+    y0 = _mm_mulhi_epi16(y0, quant_shift0);

+    y1 = _mm_mulhi_epi16(y1, quant_shift1);

+    /* Return the sign: (y ^ sz) - sz */

+    y0 = _mm_xor_si128(y0, sz0);

+    y1 = _mm_xor_si128(y1, sz1);

+    y0 = _mm_sub_epi16(y0, sz0);

+    y1 = _mm_sub_epi16(y1, sz1);

+    _mm_store_si128((__m128i *)(y), y0);

+    _mm_store_si128((__m128i *)(y + 8), y1);

+    zbin_boost_ptr = b->zrun_zbin_boost;

+    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */

+    SELECT_EOB(1, 0);

+    SELECT_EOB(2, 1);

+    SELECT_EOB(3, 4);

+    SELECT_EOB(4, 8);

+    SELECT_EOB(5, 5);

+    SELECT_EOB(6, 2);

+    SELECT_EOB(7, 3);

+    SELECT_EOB(8, 6);

+    SELECT_EOB(9, 9);

+    SELECT_EOB(10, 12);

+    SELECT_EOB(11, 13);

+    SELECT_EOB(12, 10);

+    SELECT_EOB(13, 7);

+    SELECT_EOB(14, 11);

+    SELECT_EOB(15, 14);

+    SELECT_EOB(16, 15);

+    y0 = _mm_load_si128((__m128i *)(d->qcoeff));

+    y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));

+    /* dqcoeff = qcoeff * dequant */

+    y0 = _mm_mullo_epi16(y0, dequant0);

+    y1 = _mm_mullo_epi16(y1, dequant1);

+    _mm_store_si128((__m128i *)(d->dqcoeff), y0);

+    _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);

+    *d->eob = eob;

+}

 void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)

--- a/vp8/vp8_common.mk

+++ b/vp8/vp8_common.mk

@@ -14,7 +14,6 @@

 VP8_COMMON_SRCS-yes += common/onyx.h

 VP8_COMMON_SRCS-yes += common/onyxd.h

 VP8_COMMON_SRCS-yes += common/alloccommon.c

-VP8_COMMON_SRCS-yes += common/asm_com_offsets.c

 VP8_COMMON_SRCS-yes += common/blockd.c

 VP8_COMMON_SRCS-yes += common/coefupdateprobs.h

 VP8_COMMON_SRCS-yes += common/debugmodes.c

@@ -67,6 +66,7 @@

 VP8_COMMON_SRCS-yes += common/swapyv12buffer.c

 VP8_COMMON_SRCS-yes += common/variance_c.c

 VP8_COMMON_SRCS-yes += common/variance.h

+VP8_COMMON_SRCS-yes += common/vp8_asm_com_offsets.c

 VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h

@@ -193,6 +193,6 @@

 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)

 $(eval $(call asm_offsets_template,\

-         vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/asm_com_offsets.c))

+         vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/vp8_asm_com_offsets.c))

 $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))

--- a/vp8/vp8_dx_iface.c

+++ b/vp8/vp8_dx_iface.c

@@ -29,6 +29,8 @@

 #define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \

                                     VPX_CODEC_CAP_ERROR_CONCEALMENT : 0)

+#define VP8_DECRYPT_KEY_SIZE 32

 typedef vpx_codec_stream_info_t  vp8_stream_info_t;

 /* Structures for handling memory allocations */

@@ -73,6 +75,7 @@

     int                     dbg_color_b_modes_flag;

     int                     dbg_display_mv_flag;

 #endif

+    unsigned char           decrypt_key[VP8_DECRYPT_KEY_SIZE];

     vpx_image_t             img;

     int                     img_setup;

     struct frame_buffers    yv12_frame_buffers;

@@ -150,6 +153,8 @@

     return res;

+static const unsigned char fake_decrypt_key[VP8_DECRYPT_KEY_SIZE] = { 0 };

 static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)

     int i;

@@ -164,6 +169,8 @@

     ctx->priv->alg_priv->mmaps[0] = *mmap;

     ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);

+    memcpy(ctx->priv->alg_priv->decrypt_key, fake_decrypt_key,

+           VP8_DECRYPT_KEY_SIZE);

     ctx->priv->init_flags = ctx->init_flags;

     if (ctx->config.dec)

@@ -211,21 +218,19 @@

         mmap.flags = vp8_mem_req_segs[0].flags;

         res = vp8_mmap_alloc(&mmap);

+        if (res != VPX_CODEC_OK) return res;

-        if (!res)

-        {

-            vp8_init_ctx(ctx, &mmap);

+        vp8_init_ctx(ctx, &mmap);

-            /* initialize number of fragments to zero */

-            ctx->priv->alg_priv->fragments.count = 0;

-            /* is input fragments enabled? */

-            ctx->priv->alg_priv->fragments.enabled =

-                    (ctx->priv->alg_priv->base.init_flags &

-                        VPX_CODEC_USE_INPUT_FRAGMENTS);

+        /* initialize number of fragments to zero */

+        ctx->priv->alg_priv->fragments.count = 0;

+        /* is input fragments enabled? */

+        ctx->priv->alg_priv->fragments.enabled =

+                (ctx->priv->alg_priv->base.init_flags &

+                    VPX_CODEC_USE_INPUT_FRAGMENTS);

-            ctx->priv->alg_priv->defer_alloc = 1;

-            /*post processing level initialized to do nothing */

-        }

+        ctx->priv->alg_priv->defer_alloc = 1;

+        /*post processing level initialized to do nothing */

     ctx->priv->alg_priv->yv12_frame_buffers.use_frame_threads =

@@ -264,14 +269,17 @@

     return VPX_CODEC_OK;

-static vpx_codec_err_t vp8_peek_si(const uint8_t         *data,

-                                   unsigned int           data_sz,

-                                   vpx_codec_stream_info_t *si)

+static vpx_codec_err_t vp8_peek_si_external(const uint8_t         *data,

+                                            unsigned int           data_sz,

+                                            vpx_codec_stream_info_t *si,

+                                            const unsigned char *decrypt_key)

     vpx_codec_err_t res = VPX_CODEC_OK;

     if(data + data_sz <= data)

+    {

         res = VPX_CODEC_INVALID_PARAM;

+    }

     else

         /* Parse uncompresssed part of key frame header.

@@ -280,19 +288,27 @@

          * 4 bytes:- including image width and height in the lowest 14 bits

          *           of each 2-byte value.

*/

-        si->is_kf = 0;

-        if (data_sz >= 10 && !(data[0] & 0x01))  /* I-Frame */

+        const uint8_t data0 = decrypt_byte(data, data, decrypt_key);

+        si->is_kf = 0;

+        if (data_sz >= 10 && !(data0 & 0x01))  /* I-Frame */

-            const uint8_t *c = data + 3;

+            const uint8_t data3 = decrypt_byte(data + 3, data, decrypt_key);

+            const uint8_t data4 = decrypt_byte(data + 4, data, decrypt_key);

+            const uint8_t data5 = decrypt_byte(data + 5, data, decrypt_key);

+            const uint8_t data6 = decrypt_byte(data + 6, data, decrypt_key);

+            const uint8_t data7 = decrypt_byte(data + 7, data, decrypt_key);

+            const uint8_t data8 = decrypt_byte(data + 8, data, decrypt_key);

+            const uint8_t data9 = decrypt_byte(data + 9, data, decrypt_key);

             si->is_kf = 1;

             /* vet via sync code */

-            if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a)

+            if (data3 != 0x9d || data4 != 0x01 || data5 != 0x2a)

                 res = VPX_CODEC_UNSUP_BITSTREAM;

-            si->w = (c[3] | (c[4] << 8)) & 0x3fff;

-            si->h = (c[5] | (c[6] << 8)) & 0x3fff;

+            si->w = (data6 | (data7 << 8)) & 0x3fff;

+            si->h = (data8 | (data9 << 8)) & 0x3fff;

             /*printf("w=%d, h=%d\n", si->w, si->h);*/

             if (!(si->h | si->w))

@@ -299,11 +315,18 @@

                 res = VPX_CODEC_UNSUP_BITSTREAM;

         else

+        {

             res = VPX_CODEC_UNSUP_BITSTREAM;

+        }

     return res;

+}

+static vpx_codec_err_t vp8_peek_si(const uint8_t *data,

+                                   unsigned int data_sz,

+                                   vpx_codec_stream_info_t *si) {

+    return vp8_peek_si_external(data, data_sz, si, fake_decrypt_key);

 static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t    *ctx,

@@ -432,8 +455,10 @@

     w = ctx->si.w;

     h = ctx->si.h;

-    res = ctx->base.iface->dec.peek_si(ctx->fragments.ptrs[0],

-                                       ctx->fragments.sizes[0], &ctx->si);

+    res = vp8_peek_si_external(ctx->fragments.ptrs[0],

+                               ctx->fragments.sizes[0],

+                               &ctx->si,

+                               ctx->decrypt_key);

     if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf)

@@ -507,6 +532,7 @@

             res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf);

+            ctx->yv12_frame_buffers.pbi[0]->decrypt_key = ctx->decrypt_key;

         ctx->decoder_init = 1;

@@ -928,6 +954,20 @@

+static vpx_codec_err_t vp8_set_decrypt_key(vpx_codec_alg_priv_t *ctx,

+                                           int ctr_id,

+                                           va_list args)

+{

+    const unsigned char *data = va_arg(args, const unsigned char *);

+    if (data == NULL) {

+        return VPX_CODEC_INVALID_PARAM;

+    }

+    memcpy(ctx->decrypt_key, data, VP8_DECRYPT_KEY_SIZE);

+    return VPX_CODEC_OK;

+}

 vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =

     {VP8_SET_REFERENCE,             vp8_set_reference},

@@ -940,6 +980,7 @@

     {VP8D_GET_LAST_REF_UPDATES,     vp8_get_last_ref_updates},

     {VP8D_GET_FRAME_CORRUPTED,      vp8_get_frame_corrupted},

     {VP8D_GET_LAST_REF_USED,        vp8_get_last_ref_frame},

+    {VP8_SET_DECRYPT_KEY,           vp8_set_decrypt_key},

     { -1, NULL},

};

--- a/vp8/vp8cx.mk

+++ b/vp8/vp8cx.mk

@@ -24,7 +24,6 @@

 VP8_CX_SRCS-yes += vp8_cx_iface.c

-VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c

 VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h

 VP8_CX_SRCS-yes += encoder/bitstream.c

 VP8_CX_SRCS-yes += encoder/boolhuff.c

@@ -78,6 +77,7 @@

 VP8_CX_SRCS-yes += encoder/temporal_filter.c

 VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c

 VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h

+VP8_CX_SRCS-yes += encoder/vp8_asm_enc_offsets.c

 ifeq ($(CONFIG_REALTIME_ONLY),yes)

 VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c

@@ -90,7 +90,6 @@

 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm

 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm

 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2_intrinsics.c

-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm

 # TODO(johann) make this generic

 ifeq ($(HAVE_SSE2),yes)

@@ -122,4 +121,4 @@

 VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))

 $(eval $(call asm_offsets_template,\

-         vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/asm_enc_offsets.c))

+         vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/vp8_asm_enc_offsets.c))

--- a/vp8/vp8dx.mk

+++ b/vp8/vp8dx.mk

@@ -20,7 +20,6 @@

 VP8_DX_SRCS-yes += vp8_dx_iface.c

-VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c

 VP8_DX_SRCS-yes += decoder/dboolhuff.c

 VP8_DX_SRCS-yes += decoder/decodemv.c

 VP8_DX_SRCS-yes += decoder/decodframe.c

@@ -36,8 +35,9 @@

 VP8_DX_SRCS-yes += decoder/treereader.h

 VP8_DX_SRCS-yes += decoder/onyxd_if.c

 VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c

+VP8_DX_SRCS-yes += decoder/vp8_asm_dec_offsets.c

 VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))

 $(eval $(call asm_offsets_template,\

-         vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/asm_dec_offsets.c))

+         vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/vp8_asm_dec_offsets.c))

--- a/vpx/vp8dx.h

+++ b/vpx/vp8dx.h

@@ -63,6 +63,12 @@

*/

   VP8D_GET_LAST_REF_USED,

+  /** decryption key to protect encoded data buffer before decoding,

+   *  pointer to 32 byte array which is copied, so the array passed

+   *  does not need to be preserved

+   */

+  VP8_SET_DECRYPT_KEY,

   VP8_DECODER_CTRL_ID_MAX

};

@@ -78,6 +84,7 @@

 VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES,   int *)

 VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED,    int *)

 VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED,      int *)

+VPX_CTRL_USE_TYPE(VP8_SET_DECRYPT_KEY,         const unsigned char *)

 /*! @} - end defgroup vp8_decoder */

--- a/vpxdec.c

+++ b/vpxdec.c

@@ -1024,7 +1024,7 @@

     if (!noblit) {

       if (do_scale) {

-        if (frame_out == 1) {

+        if (img && frame_out == 1) {

           stream_w = img->d_w;

           stream_h = img->d_h;

           scaled_img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420,

--- a/vpxenc.c

+++ b/vpxenc.c

@@ -89,8 +89,8 @@

 static const char *exec_name;

-#define VP8_FOURCC (0x00385056)

-#define VP9_FOURCC (0x00395056)

+#define VP8_FOURCC (0x30385056)

+#define VP9_FOURCC (0x30395056)

 static const struct codec_item {

   char const              *name;

   const vpx_codec_iface_t *(*iface)(void);

@@ -2560,7 +2560,7 @@

     usage_exit();

   for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {

-    int frames_in = 0;

+    int frames_in = 0, seen_frames = 0;

     int64_t estimated_time_left = -1;

     int64_t average_rate = -1;

     off_t lagged_count = 0;

@@ -2640,9 +2640,11 @@

         if (frame_avail)

           frames_in++;

+        seen_frames = frames_in > global.skip_frames ?

+                          frames_in - global.skip_frames : 0;

         if (!global.quiet) {

-          float fps = usec_to_fps(cx_time, frames_in);

+          float fps = usec_to_fps(cx_time, seen_frames);

           fprintf(stderr, "\rPass %d/%d ", pass + 1, global.passes);

           if (stream_cnt == 1)

@@ -2678,16 +2680,17 @@

         FOREACH_STREAM(get_cx_data(stream, &global, &got_data));

         if (!got_data && input.length && !streams->frames_out) {

-          lagged_count = global.limit ? frames_in : ftello(input.file);

+          lagged_count = global.limit ? seen_frames : ftello(input.file);

         } else if (input.length) {

           int64_t remaining;

           int64_t rate;

           if (global.limit) {

-            int frame_in_lagged = (frames_in - lagged_count) * 1000;

+            int frame_in_lagged = (seen_frames - lagged_count) * 1000;

             rate = cx_time ? frame_in_lagged * (int64_t)1000000 / cx_time : 0;

-            remaining = 1000 * (global.limit - frames_in + lagged_count);

+            remaining = 1000 * (global.limit - global.skip_frames

+                                - seen_frames + lagged_count);

           } else {

             off_t input_pos = ftello(input.file);

             off_t input_pos_lagged = input_pos - lagged_count;

@@ -2719,14 +2722,14 @@

                        "\rPass %d/%d frame %4d/%-4d %7"PRId64"B %7lub/f %7"PRId64"b/s"

                        " %7"PRId64" %s (%.2f fps)\033[K\n", pass + 1,

                        global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes,

-                       frames_in ? (unsigned long)(stream->nbytes * 8 / frames_in) : 0,

-                       frames_in ? (int64_t)stream->nbytes * 8

+                       seen_frames ? (unsigned long)(stream->nbytes * 8 / seen_frames) : 0,

+                       seen_frames ? (int64_t)stream->nbytes * 8

                        * (int64_t)global.framerate.num / global.framerate.den

-                       / frames_in

+                       / seen_frames

                        : 0,

                        stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time,

                        stream->cx_time > 9999999 ? "ms" : "us",

-                       usec_to_fps(stream->cx_time, frames_in));

+                       usec_to_fps(stream->cx_time, seen_frames));

);

     if (global.show_psnr)

--

⑨