shithub: libvpx

--- a/.mailmap

+++ b/.mailmap

@@ -12,6 +12,8 @@

 Elliott Karpilovsky <elliottk@google.com>

 Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>

 Fyodor Kyslov <kyslov@google.com>

+Gregor Jasny <gjasny@gmail.com>

+Gregor Jasny <gjasny@gmail.com> <gjasny@googlemail.com>

 Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>

 Hangyu Kuang <hkuang@google.com>

 Hui Su <huisu@google.com>

--- a/AUTHORS

+++ b/AUTHORS

@@ -3,6 +3,7 @@

 Aaron Watry <awatry@gmail.com>

 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>

+Adam B. Goode <adam.mckee84@gmail.com>

 Adrian Grange <agrange@google.com>

 Ahmad Sharif <asharif@google.com>

 Aidan Welch <aidansw@yahoo.com>

@@ -25,6 +26,7 @@

 Aron Rosenberg <arosenberg@logitech.com>

 Attila Nagy <attilanagy@google.com>

 Birk Magnussen <birk.magnussen@googlemail.com>

+Bohan Li <bohanli@google.com>

 Brian Foley <bpfoley@google.com>

 Brion Vibber <bvibber@wikimedia.org>

 changjun.yang <changjun.yang@intel.com>

@@ -34,9 +36,11 @@

 chm <chm@rock-chips.com>

 Chris Cunningham <chcunningham@chromium.org>

 Christian Duvivier <cduvivier@google.com>

+Chunbo Hua <chunbo.hua@intel.com>

 Clement Courbet <courbet@google.com>

 Daniele Castagna <dcastagna@chromium.org>

 Daniel Kang <ddkang@google.com>

+Daniel Sommermann <dcsommer@gmail.com>

 Dan Zhu <zxdan@google.com>

 Deb Mukherjee <debargha@google.com>

 Deepa K G <deepa.kg@ittiam.com>

@@ -67,6 +71,7 @@

 Harish Mahendrakar <harish.mahendrakar@ittiam.com>

 Henrik Lundin <hlundin@google.com>

 Hien Ho <hienho@google.com>

+Hirokazu Honda <hiroh@chromium.org>

 Hui Su <huisu@google.com>

 Ivan Krasin <krasin@chromium.org>

 Ivan Maltz <ivanmaltz@google.com>

@@ -73,6 +78,7 @@

 Jacek Caban <cjacek@gmail.com>

 Jacky Chen <jackychen@google.com>

 James Berry <jamesberry@google.com>

+James Touton <bekenn@gmail.com>

 James Yu <james.yu@linaro.org>

 James Zern <jzern@google.com>

 Jan Gerber <j@mailb.org>

@@ -82,16 +88,20 @@

 Jeff Faust <jfaust@google.com>

 Jeff Muizelaar <jmuizelaar@mozilla.com>

 Jeff Petkau <jpet@chromium.org>

+Jeremy Leconte <jleconte@google.com>

 Jerome Jiang <jianj@google.com>

 Jia Jia <jia.jia@linaro.org>

 Jian Zhou <zhoujian@google.com>

 Jim Bankoski <jimbankoski@google.com>

+jinbo <jinbo-hf@loongson.cn>

 Jingning Han <jingning@google.com>

+Joel Fernandes <joelaf@google.com>

 Joey Parrish <joeyparrish@google.com>

 Johann Koenig <johannkoenig@google.com>

 John Koleszar <jkoleszar@google.com>

 Johnny Klonaris <google@jawknee.com>

 John Stark <jhnstrk@gmail.com>

+Jonathan Wright <jonathan.wright@arm.com>

 Jon Kunkee <jkunkee@microsoft.com>

 Jorge E. Moreira <jemoreira@google.com>

 Joshua Bleecher Snyder <josh@treelinelabs.com>

@@ -141,6 +151,7 @@

 Peter Boström <pbos@chromium.org>

 Peter Collingbourne <pcc@chromium.org>

 Peter de Rivaz <peter.derivaz@gmail.com>

+Peter Kasting <pkasting@chromium.org>

 Philip Jägenstedt <philipj@opera.com>

 Priit Laes <plaes@plaes.org>

 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>

--- a/CHANGELOG

+++ b/CHANGELOG

@@ -1,3 +1,59 @@

+2021-09-27 v1.11.0 "Smew Duck"

+  This maintenance release adds support for VBR mode in VP9 rate control

+  interface, new codec controls to get quantization parameters and loop filter

+  levels, and includes several improvements to NEON and numerous bug fixes.

+  - Upgrading:

+    New codec control is added to get quantization parameters and loop filter

+    levels.

+    VBR mode is supported in VP9 rate control library.

+  - Enhancement:

+    Numerous improvements for Neon optimizations.

+    Code clean-up and refactoring.

+    Calculation of rd multiplier is changed with BDRATE gains.

+  - Bug fixes:

+    Fix to overflow on duration.

+    Fix to several instances of -Wunused-but-set-variable.

+    Fix to avoid chroma resampling for 420mpeg2 input.

+    Fix to overflow in calc_iframe_target_size.

+    Fix to disallow skipping transform and quantization.

+    Fix some -Wsign-compare warnings in simple_encode.

+    Fix input file path in simple_encode_test.

+    Fix valid range for under/over_shoot pct.

+2021-03-09 v1.10.0 "Ruddy Duck"

+  This maintenance release adds support for darwin20 and new codec controls, as

+  well as numerous bug fixes.

+  - Upgrading:

+    New codec control is added to disable loopfilter for VP9.

+    New encoder control is added to disable feature to increase Q on overshoot

+    detection for CBR.

+    Configure support for darwin20 is added.

+    New codec control is added for VP9 rate control. The control ID of this

+    interface is VP9E_SET_EXTERNAL_RATE_CONTROL. To make VP9 use a customized

+    external rate control model, users will have to implement each callback

+    function in vpx_rc_funcs_t and register them using libvpx API

+    vpx_codec_control_() with the control ID.

+  - Enhancement:

+    Use -std=gnu++11 instead of -std=c++11 for c++ files.

+  - Bug fixes:

+    Override assembler with --as option of configure for MSVS.

+    Fix several compilation issues with gcc 4.8.5.

+    Fix to resetting rate control for temporal layers.

+    Fix to the rate control stats of SVC example encoder when number of spatial

+    layers is 1.

+    Fix to reusing motion vectors from the base spatial layer in SVC.

+    2 pass related flags removed from SVC example encoder.

 2020-07-29 v1.9.0 "Quacking Duck"

   This release adds support for NV12, a separate library for rate control, as

   well as incremental improvements.

--- a/CONTRIBUTING.md

+++ b/CONTRIBUTING.md

@@ -19,10 +19,9 @@

 All submissions, including submissions by project members, require review. We

 use a [Gerrit](https://www.gerritcodereview.com) instance hosted at

-https://chromium-review.googlesource.com for this purpose.

-See https://www.webmproject.org/code/contribute/submitting-patches for an

-example of a typical gerrit workflow.

+https://chromium-review.googlesource.com for this purpose. See the

+[WebM Project page](https://www.webmproject.org/code/contribute/submitting-patches/)

+for additional details.

 ## Community Guidelines

--- a/README

+++ b/README

@@ -1,4 +1,4 @@

-README - 20 July 2020

+README - 08 March 2021

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -10,14 +10,14 @@

   1. Prerequisites

     * All x86 targets require the Yasm[1] assembler be installed[2].

-    * All Windows builds require that Cygwin[3] be installed.

-    * Building the documentation requires Doxygen[4]. If you do not

+    * All Windows builds require that Cygwin[3] or MSYS2[4] be installed.

+    * Building the documentation requires Doxygen[5]. If you do not

       have this package, the install-docs option will be disabled.

-    * Downloading the data for the unit tests requires curl[5] and sha1sum.

+    * Downloading the data for the unit tests requires curl[6] and sha1sum.

       sha1sum is provided via the GNU coreutils, installed by default on

       many *nix platforms, as well as MinGW and Cygwin. If coreutils is not

       available, a compatible version of sha1sum can be built from

-      source[6]. These requirements are optional if not running the unit

+      source[7]. These requirements are optional if not running the unit

       tests.

     [1]: http://www.tortall.net/projects/yasm

@@ -26,9 +26,10 @@

          yasm-<version>-<arch>.exe to yasm.exe and place it in:

          Program Files (x86)/Microsoft Visual Studio/2017/<level>/Common7/Tools/

     [3]: http://www.cygwin.com

-    [4]: http://www.doxygen.org

-    [5]: http://curl.haxx.se

-    [6]: http://www.microbrew.org/tools/md5sha1sum/

+    [4]: http://www.msys2.org/

+    [5]: http://www.doxygen.org

+    [6]: http://curl.haxx.se

+    [7]: http://www.microbrew.org/tools/md5sha1sum/

   2. Out-of-tree builds

   Out of tree builds are a supported method of building the application. For

--- a/args.c

+++ b/args.c

@@ -18,8 +18,10 @@

 #include "vpx/vpx_integer.h"

 #include "vpx_ports/msvc.h"

-#if defined(__GNUC__) && __GNUC__

-extern void die(const char *fmt, ...) __attribute__((noreturn));

+#if defined(__GNUC__)

+__attribute__((noreturn)) extern void die(const char *fmt, ...);

+#elif defined(_MSC_VER)

+__declspec(noreturn) extern void die(const char *fmt, ...);

 #else

 extern void die(const char *fmt, ...);

 #endif

--- a/build/make/Android.mk

+++ b/build/make/Android.mk

@@ -166,6 +166,9 @@

     -I$(ASM_CNV_PATH)/libvpx

 LOCAL_MODULE := libvpx

+LOCAL_LICENSE_KINDS := SPDX-license-identifier-BSD

+LOCAL_LICENSE_CONDITIONS := notice

+LOCAL_NOTICE_FILE := $(LOCAL_PATH)/../../LICENSE $(LOCAL_PATH)/../../PATENTS

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)

   LOCAL_STATIC_LIBRARIES := cpufeatures

--- a/build/make/configure.sh

+++ b/build/make/configure.sh

@@ -1296,10 +1296,6 @@

           enabled optimizations && disabled gprof && check_add_cflags -fomit-frame-pointer

;;

         vs*)

-          # When building with Microsoft Visual Studio the assembler is

-          # invoked directly. Checking at configure time is unnecessary.

-          # Skip the check by setting AS arbitrarily

-          AS=msvs

           msvs_arch_dir=x86-msvs

           case ${tgt_cc##vs} in

14)

--- a/build/make/gen_msvs_vcxproj.sh

+++ b/build/make/gen_msvs_vcxproj.sh

@@ -157,6 +157,8 @@

;;

         --lib) proj_kind="lib"

;;

+        --as=*) as="${optval}"

+        ;;

         --src-path-bare=*)

             src_path_bare=$(fix_path "$optval")

             src_path_bare=${src_path_bare%/}

@@ -247,13 +249,13 @@

 case "$target" in

     x86_64*)

         platforms[0]="x64"

-        asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} &quot;%(FullPath)&quot;"

-        asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} &quot;%(FullPath)&quot;"

+        asm_Debug_cmdline="${as} -Xvc -gcv8 -f win64 ${yasmincs} &quot;%(FullPath)&quot;"

+        asm_Release_cmdline="${as} -Xvc -f win64 ${yasmincs} &quot;%(FullPath)&quot;"

;;

     x86*)

         platforms[0]="Win32"

-        asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} &quot;%(FullPath)&quot;"

-        asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;%(FullPath)&quot;"

+        asm_Debug_cmdline="${as} -Xvc -gcv8 -f win32 ${yasmincs} &quot;%(FullPath)&quot;"

+        asm_Release_cmdline="${as} -Xvc -f win32 ${yasmincs} &quot;%(FullPath)&quot;"

;;

     arm64*)

         platforms[0]="ARM64"

--- a/build/make/msvs_common.sh

+++ b/build/make/msvs_common.sh

@@ -9,7 +9,8 @@

 ##  be found in the AUTHORS file in the root of the source tree.

##

-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \

+shell_name="$(uname -o 2>/dev/null)"

+if [[ "$shell_name" = "Cygwin" || "$shell_name" = "Msys" ]] \

    && cygpath --help >/dev/null 2>&1; then

     FIXPATH='cygpath -m'

 else

--- a/configure

+++ b/configure

@@ -731,17 +731,17 @@

             soft_enable libyuv

;;

         *-android-*)

-            check_add_cxxflags -std=c++11 && soft_enable webm_io

+            check_add_cxxflags -std=gnu++11 && soft_enable webm_io

             soft_enable libyuv

             # GTestLog must be modified to use Android logging utilities.

;;

         *-darwin-*)

-            check_add_cxxflags -std=c++11

+            check_add_cxxflags -std=gnu++11

             # iOS/ARM builds do not work with gtest. This does not match

             # x86 targets.

;;

         *-iphonesimulator-*)

-            check_add_cxxflags -std=c++11 && soft_enable webm_io

+            check_add_cxxflags -std=gnu++11 && soft_enable webm_io

             soft_enable libyuv

;;

         *-win*)

@@ -748,7 +748,7 @@

             # Some mingw toolchains don't have pthread available by default.

             # Treat these more like visual studio where threading in gtest

             # would be disabled for the same reason.

-            check_add_cxxflags -std=c++11 && soft_enable unit_tests \

+            check_add_cxxflags -std=gnu++11 && soft_enable unit_tests \

               && soft_enable webm_io

             check_cxx "$@" <<EOF && soft_enable libyuv

 int z;

@@ -755,9 +755,9 @@

EOF

;;

*)

-            enabled pthread_h && check_add_cxxflags -std=c++11 \

+            enabled pthread_h && check_add_cxxflags -std=gnu++11 \

               && soft_enable unit_tests

-            check_add_cxxflags -std=c++11 && soft_enable webm_io

+            check_add_cxxflags -std=gnu++11 && soft_enable webm_io

             check_cxx "$@" <<EOF && soft_enable libyuv

 int z;

EOF

--- a/examples.mk

+++ b/examples.mk

@@ -376,6 +376,7 @@

             --ver=$$(CONFIG_VS_VERSION)\

             --proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\

             --src-path-bare="$(SRC_PATH_BARE)" \

+            --as=$$(AS) \

             $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \

             --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \

             $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -l$$(CODEC_LIB) $$^

--- a/examples/vp9_spatial_svc_encoder.c

+++ b/examples/vp9_spatial_svc_encoder.c

@@ -66,12 +66,6 @@

     ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");

 static const arg_def_t scale_factors_arg =

     ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");

-static const arg_def_t passes_arg =

-    ARG_DEF("p", "passes", 1, "Number of passes (1/2)");

-static const arg_def_t pass_arg =

-    ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");

-static const arg_def_t fpf_name_arg =

-    ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");

 static const arg_def_t min_q_arg =

     ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");

 static const arg_def_t max_q_arg =

@@ -125,9 +119,6 @@

                                        &spatial_layers_arg,

                                        &kf_dist_arg,

                                        &scale_factors_arg,

-                                       &passes_arg,

-                                       &pass_arg,

-                                       &fpf_name_arg,

                                        &min_q_arg,

                                        &max_q_arg,

                                        &min_bitrate_arg,

@@ -173,8 +164,6 @@

   uint32_t frames_to_skip;

   struct VpxInputContext input_ctx;

   stats_io_t rc_stats;

-  int passes;

-  int pass;

   int tune_content;

   int inter_layer_pred;

 } AppInput;

@@ -197,9 +186,6 @@

   char **argi = NULL;

   char **argj = NULL;

   vpx_codec_err_t res;

-  int passes = 0;

-  int pass = 0;

-  const char *fpf_file_name = NULL;

   unsigned int min_bitrate = 0;

   unsigned int max_bitrate = 0;

   char string_options[1024] = { 0 };

@@ -289,18 +275,6 @@

               sizeof(string_options) - strlen(string_options) - 1);

       strncat(string_options, arg.val,

               sizeof(string_options) - strlen(string_options) - 1);

-    } else if (arg_match(&arg, &passes_arg, argi)) {

-      passes = arg_parse_uint(&arg);

-      if (passes < 1 || passes > 2) {

-        die("Error: Invalid number of passes (%d)\n", passes);

-      }

-    } else if (arg_match(&arg, &pass_arg, argi)) {

-      pass = arg_parse_uint(&arg);

-      if (pass < 1 || pass > 2) {

-        die("Error: Invalid pass selected (%d)\n", pass);

-      }

-    } else if (arg_match(&arg, &fpf_name_arg, argi)) {

-      fpf_file_name = arg.val;

     } else if (arg_match(&arg, &min_q_arg, argi)) {

       strncat(string_options, " min-quantizers=",

               sizeof(string_options) - strlen(string_options) - 1);

@@ -355,36 +329,8 @@

   if (strlen(string_options) > 0)

     vpx_svc_set_options(svc_ctx, string_options + 1);

-  if (passes == 0 || passes == 1) {

-    if (pass) {

-      fprintf(stderr, "pass is ignored since there's only one pass\n");

-    }

-    enc_cfg->g_pass = VPX_RC_ONE_PASS;

-  } else {

-    if (pass == 0) {

-      die("pass must be specified when passes is 2\n");

-    }

+  enc_cfg->g_pass = VPX_RC_ONE_PASS;

-    if (fpf_file_name == NULL) {

-      die("fpf must be specified when passes is 2\n");

-    }

-    if (pass == 1) {

-      enc_cfg->g_pass = VPX_RC_FIRST_PASS;

-      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {

-        fatal("Failed to open statistics store");

-      }

-    } else {

-      enc_cfg->g_pass = VPX_RC_LAST_PASS;

-      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {

-        fatal("Failed to open statistics store");

-      }

-      enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);

-    }

-    app_input->passes = passes;

-    app_input->pass = pass;

-  }

   if (enc_cfg->rc_target_bitrate > 0) {

     if (min_bitrate > 0) {

       enc_cfg->rc_2pass_vbr_minsection_pct =

@@ -1004,13 +950,11 @@

   info.time_base.numerator = enc_cfg.g_timebase.num;

   info.time_base.denominator = enc_cfg.g_timebase.den;

-  if (!(app_input.passes == 2 && app_input.pass == 1)) {

-    // We don't save the bitstream for the 1st pass on two pass rate control

-    writer =

-        vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);

-    if (!writer)

-      die("Failed to open %s for writing\n", app_input.output_filename);

-  }

+  writer =

+      vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);

+  if (!writer)

+    die("Failed to open %s for writing\n", app_input.output_filename);

 #if OUTPUT_RC_STATS

   // Write out spatial layer stream.

   // TODO(marpan/jianj): allow for writing each spatial and temporal stream.

@@ -1230,7 +1174,6 @@

 #endif

   if (vpx_codec_destroy(&encoder))

     die_codec(&encoder, "Failed to destroy codec");

-  if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1);

   if (writer) {

     vpx_video_writer_close(writer);

--- a/examples/vpx_dec_fuzzer.cc

+++ b/examples/vpx_dec_fuzzer.cc

@@ -41,7 +41,7 @@

    $make -j32

  * Build vp9 fuzzer

-   $ $CXX $CXXFLAGS -std=c++11 -DDECODER=vp9 \

+   $ $CXX $CXXFLAGS -std=gnu++11 -DDECODER=vp9 \

    -fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \

    ../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \

    ./libvpx.a -Wl,--end-group

--- a/examples/vpx_temporal_svc_encoder.c

+++ b/examples/vpx_temporal_svc_encoder.c

@@ -831,6 +831,7 @@

   } else if (strncmp(encoder->name, "vp9", 3) == 0) {

     vpx_svc_extra_cfg_t svc_params;

     memset(&svc_params, 0, sizeof(svc_params));

+    vpx_codec_control(&codec, VP9E_SET_POSTENCODE_DROP, 0);

     vpx_codec_control(&codec, VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, 0);

     vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);

     vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);

@@ -930,6 +931,7 @@

           // Update for short-time encoding bitrate states, for moving window

           // of size rc->window, shifted by rc->window / 2.

           // Ignore first window segment, due to key frame.

+          if (rc.window_size == 0) rc.window_size = 15;

           if (frame_cnt > rc.window_size) {

             sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;

             if (frame_cnt % rc.window_size == 0) {

--- a/libs.mk

+++ b/libs.mk

@@ -94,15 +94,28 @@

   INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%

   CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h vpx/vpx_ext_ratectrl.h

   CODEC_DOC_SECTIONS += vp9 vp9_encoder

+endif

-  RC_RTC_SRCS := $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))

-  RC_RTC_SRCS += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h

-  RC_RTC_SRCS += vpx/vpx_ext_ratectrl.h

+RC_RTC_SRCS := vpx/vp8.h vpx/vp8cx.h

+RC_RTC_SRCS += vpx/vpx_ext_ratectrl.h

+RC_RTC_SRCS += vpx/internal/vpx_ratectrl_rtc.h

+ifeq ($(CONFIG_VP9_ENCODER),yes)

+  VP9_PREFIX=vp9/

+  RC_RTC_SRCS += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))

+  RC_RTC_SRCS += $(VP9_PREFIX)vp9cx.mk

   RC_RTC_SRCS += $(VP9_PREFIX)ratectrl_rtc.cc

   RC_RTC_SRCS += $(VP9_PREFIX)ratectrl_rtc.h

   INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP9_PREFIX)ratectrl_rtc.cc

   INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP9_PREFIX)ratectrl_rtc.h

 endif

+ifeq ($(CONFIG_VP8_ENCODER),yes)

+  VP8_PREFIX=vp8/

+  RC_RTC_SRCS += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))

+  RC_RTC_SRCS += $(VP8_PREFIX)vp8_ratectrl_rtc.cc

+  RC_RTC_SRCS += $(VP8_PREFIX)vp8_ratectrl_rtc.h

+  INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP8_PREFIX)vp8_ratectrl_rtc.cc

+  INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP8_PREFIX)vp8_ratectrl_rtc.h

+endif

 ifeq ($(CONFIG_VP9_DECODER),yes)

   VP9_PREFIX=vp9/

@@ -126,7 +139,7 @@

 ifeq ($(CONFIG_MSVS),yes)

 CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)

 GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd)

-RC_RTC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vp9rcmt,vp9rcmd)

+RC_RTC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxrcmt,vpxrcmd)

 # This variable uses deferred expansion intentionally, since the results of

 # $(wildcard) may change during the course of the Make.

 VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))

@@ -232,6 +245,7 @@

             --ver=$(CONFIG_VS_VERSION) \

             --src-path-bare="$(SRC_PATH_BARE)" \

             --out=$@ $(CFLAGS) \

+            --as=$(AS) \

             $(filter $(SRC_PATH_BARE)/vp8/%.c, $(VCPROJ_SRCS)) \

             $(filter $(SRC_PATH_BARE)/vp8/%.h, $(VCPROJ_SRCS)) \

             $(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \

@@ -248,20 +262,21 @@

 vpx.$(VCPROJ_SFX): vpx_config.asm

 vpx.$(VCPROJ_SFX): $(RTCD)

-vp9rc.$(VCPROJ_SFX): \

+vpxrc.$(VCPROJ_SFX): \

     VCPROJ_SRCS=$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^)

-vp9rc.$(VCPROJ_SFX): $(RC_RTC_SRCS)

+vpxrc.$(VCPROJ_SFX): $(RC_RTC_SRCS)

 	@echo "    [CREATE] $@"

 	$(qexec)$(GEN_VCPROJ) \

             $(if $(CONFIG_SHARED),--dll,--lib) \

             --target=$(TOOLCHAIN) \

             $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \

-            --name=vp9rc \

+            --name=vpxrc \

             --proj-guid=C26FF952-9494-4838-9A3F-7F3D4F613385 \

             --ver=$(CONFIG_VS_VERSION) \

             --src-path-bare="$(SRC_PATH_BARE)" \

             --out=$@ $(CFLAGS) \

+            --as=$(AS) \

             $(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \

             $(filter $(SRC_PATH_BARE)/vp9/%.cc, $(VCPROJ_SRCS)) \

             $(filter $(SRC_PATH_BARE)/vp9/%.h, $(VCPROJ_SRCS)) \

@@ -273,10 +288,10 @@

               $(VCPROJ_SRCS)) \

             --src-path-bare="$(SRC_PATH_BARE)" \

-PROJECTS-yes += vp9rc.$(VCPROJ_SFX)

+PROJECTS-yes += vpxrc.$(VCPROJ_SFX)

-vp9rc.$(VCPROJ_SFX): vpx_config.asm

-vp9rc.$(VCPROJ_SFX): $(RTCD)

+vpxrc.$(VCPROJ_SFX): vpx_config.asm

+vpxrc.$(VCPROJ_SFX): $(RTCD)

 endif # ifeq ($(CONFIG_MSVS),yes)

 else # ifeq ($(CONFIG_EXTERNAL_BUILD),yes)

@@ -285,8 +300,20 @@

 LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a

 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

-SO_VERSION_MAJOR := 6

-SO_VERSION_MINOR := 3

+# Updating version info.

+# https://www.gnu.org/software/libtool/manual/libtool.html#Updating-version-info

+# For libtool: c=<current>, a=<age>, r=<revision>

+# libtool generates .so file as .so.[c-a].a.r, while -version-info c:r:a is

+# passed to libtool.

+#

+# libvpx library file is generated as libvpx.so.<MAJOR>.<MINOR>.<PATCH>

+# MAJOR = c-a, MINOR = a, PATCH = r

+#

+# To determine SO_VERSION_{MAJOR,MINOR,PATCH}, calculate c,a,r with current

+# SO_VERSION_* then follow the rules in the link to detemine the new version

+# (c1, a1, r1) and set MAJOR to [c1-a1], MINOR to a1 and PATCH to r1

+SO_VERSION_MAJOR := 7

+SO_VERSION_MINOR := 0

 SO_VERSION_PATCH := 0

 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))

 LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib

@@ -384,12 +411,11 @@

 INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc

 CLEAN-OBJS += vpx.pc

-ifeq ($(CONFIG_VP9_ENCODER),yes)

+ifeq ($(CONFIG_ENCODERS),yes)

   RC_RTC_OBJS=$(call objs,$(RC_RTC_SRCS))

-  RC_RTC_OBJS=$(call objs,$(RC_RTC_SRCS))

   OBJS-yes += $(RC_RTC_OBJS)

-  LIBS-yes += $(BUILD_PFX)libvp9rc.a $(BUILD_PFX)libvp9rc_g.a

-  $(BUILD_PFX)libvp9rc_g.a: $(RC_RTC_OBJS)

+  LIBS-yes += $(BUILD_PFX)libvpxrc.a $(BUILD_PFX)libvpxrc_g.a

+  $(BUILD_PFX)libvpxrc_g.a: $(RC_RTC_OBJS)

 endif

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_RATE_CTRL),yesyes)

@@ -479,10 +505,12 @@

                            $(call enabled,TEST_INTRA_PRED_SPEED_SRCS))

 TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))

+ifeq ($(CONFIG_ENCODERS),yes)

 RC_INTERFACE_TEST_BIN=./test_rc_interface$(EXE_SFX)

 RC_INTERFACE_TEST_SRCS=$(call addprefix_clean,test/,\

                        $(call enabled,RC_INTERFACE_TEST_SRCS))

 RC_INTERFACE_TEST_OBJS := $(sort $(call objs,$(RC_INTERFACE_TEST_SRCS)))

+endif

 SIMPLE_ENCODE_TEST_BIN=./test_simple_encode$(EXE_SFX)

 SIMPLE_ENCODE_TEST_SRCS=$(call addprefix_clean,test/,\

@@ -536,6 +564,7 @@

             --proj-guid=EC00E1EC-AF68-4D92-A255-181690D1C9B1 \

             --ver=$(CONFIG_VS_VERSION) \

             --src-path-bare="$(SRC_PATH_BARE)" \

+            --as=$(AS) \

             -D_VARIADIC_MAX=10 \

             --out=gtest.$(VCPROJ_SFX) $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc \

             -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" -I"$(SRC_PATH_BARE)/third_party/googletest/src"

@@ -552,6 +581,7 @@

             --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \

             --ver=$(CONFIG_VS_VERSION) \

             --src-path-bare="$(SRC_PATH_BARE)" \

+            --as=$(AS) \

             $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \

             --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \

             -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \

@@ -574,6 +604,7 @@

             --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \

             --ver=$(CONFIG_VS_VERSION) \

             --src-path-bare="$(SRC_PATH_BARE)" \

+            --as=$(AS) \

             $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \

             --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \

             -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \

@@ -580,10 +611,11 @@

             -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^

 endif  # TEST_INTRA_PRED_SPEED

+ifeq ($(CONFIG_ENCODERS),yes)

 ifneq ($(strip $(RC_INTERFACE_TEST_OBJS)),)

 PROJECTS-$(CONFIG_MSVS) += test_rc_interface.$(VCPROJ_SFX)

 test_rc_interface.$(VCPROJ_SFX): $(RC_INTERFACE_TEST_SRCS) vpx.$(VCPROJ_SFX) \

-	vp9rc.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)

+	vpxrc.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)

 	@echo "    [CREATE] $@"

 	$(qexec)$(GEN_VCPROJ) \

             --exe \

@@ -592,6 +624,7 @@

             -D_VARIADIC_MAX=10 \

             --proj-guid=30458F88-1BC6-4689-B41C-50F3737AAB27 \

             --ver=$(CONFIG_VS_VERSION) \

+            --as=$(AS) \

             --src-path-bare="$(SRC_PATH_BARE)" \

             $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \

             --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \

@@ -598,6 +631,7 @@

             -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \

             -L. -l$(CODEC_LIB) -l$(RC_RTC_LIB) -l$(GTEST_LIB) $^

 endif  # RC_INTERFACE_TEST

+endif  # CONFIG_VP9_ENCODER

 endif

 else

@@ -639,6 +673,7 @@

               -L. -lvpx -lgtest $(extralibs) -lm))

 endif  # TEST_INTRA_PRED_SPEED

+ifeq ($(CONFIG_ENCODERS),yes)

 ifneq ($(strip $(RC_INTERFACE_TEST_OBJS)),)

 $(RC_INTERFACE_TEST_OBJS) $(RC_INTERFACE_TEST_OBJS:.o=.d): \

   CXXFLAGS += $(GTEST_INCLUDES)

@@ -645,11 +680,12 @@

 OBJS-yes += $(RC_INTERFACE_TEST_OBJS)

 BINS-yes += $(RC_INTERFACE_TEST_BIN)

-$(RC_INTERFACE_TEST_BIN): $(TEST_LIBS) libvp9rc.a

+$(RC_INTERFACE_TEST_BIN): $(TEST_LIBS) libvpxrc.a

 $(eval $(call linkerxx_template,$(RC_INTERFACE_TEST_BIN), \

               $(RC_INTERFACE_TEST_OBJS) \

-              -L. -lvpx -lgtest -lvp9rc $(extralibs) -lm))

+              -L. -lvpx -lgtest -lvpxrc $(extralibs) -lm))

 endif  # RC_INTERFACE_TEST

+endif  # CONFIG_ENCODERS

 ifneq ($(strip $(SIMPLE_ENCODE_TEST_OBJS)),)

 $(SIMPLE_ENCODE_TEST_OBJS) $(SIMPLE_ENCODE_TEST_OBJS:.o=.d): \

--- a/test/android/Android.mk

+++ b/test/android/Android.mk

@@ -34,6 +34,9 @@

 LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/include/

 LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/include/

 LOCAL_SRC_FILES := ./third_party/googletest/src/src/gtest-all.cc

+LOCAL_LICENSE_KINDS := SPDX-license-identifier-BSD

+LOCAL_LICENSE_CONDITIONS := notice

+LOCAL_NOTICE_FILE := $(LOCAL_PATH)/../../LICENSE $(LOCAL_PATH)/../../PATENTS

 include $(BUILD_STATIC_LIBRARY)

 #libvpx_test

@@ -48,6 +51,9 @@

   LOCAL_STATIC_LIBRARIES += vpx

 endif

+LOCAL_LICENSE_KINDS := SPDX-license-identifier-BSD

+LOCAL_LICENSE_CONDITIONS := notice

+LOCAL_NOTICE_FILE := $(LOCAL_PATH)/../../LICENSE $(LOCAL_PATH)/../../PATENTS

 include $(LOCAL_PATH)/test/test.mk

 LOCAL_C_INCLUDES := $(BINDINGS_DIR)

 FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))

--- a/test/encode_api_test.cc

+++ b/test/encode_api_test.cc

@@ -8,6 +8,9 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include <climits>

+#include <cstring>

 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"

@@ -18,6 +21,12 @@

 #define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))

+bool IsVP9(const vpx_codec_iface_t *iface) {

+  static const char kVP9Name[] = "WebM Project VP9";

+  return strncmp(kVP9Name, vpx_codec_iface_name(iface), sizeof(kVP9Name) - 1) ==

+         0;

+}

 TEST(EncodeAPI, InvalidParams) {

   static const vpx_codec_iface_t *kCodecs[] = {

 #if CONFIG_VP8_ENCODER

@@ -184,15 +193,120 @@

     // VP9 should report incapable, VP8 invalid for all configurations.

-    const char kVP9Name[] = "WebM Project VP9";

-    const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),

-                                sizeof(kVP9Name) - 1) == 0;

-    EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,

+    EXPECT_EQ(IsVP9(iface) ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,

               vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));

     for (int i = 0; i < 2; i++) {

       vpx_codec_destroy(&enc[i]);

+  }

+}

+TEST(EncodeAPI, SetRoi) {

+  static struct {

+    const vpx_codec_iface_t *iface;

+    int ctrl_id;

+  } kCodecs[] = {

+#if CONFIG_VP8_ENCODER

+    { &vpx_codec_vp8_cx_algo, VP8E_SET_ROI_MAP },

+#endif

+#if CONFIG_VP9_ENCODER

+    { &vpx_codec_vp9_cx_algo, VP9E_SET_ROI_MAP },

+#endif

+  };

+  constexpr int kWidth = 64;

+  constexpr int kHeight = 64;

+  for (const auto &codec : kCodecs) {

+    SCOPED_TRACE(vpx_codec_iface_name(codec.iface));

+    vpx_codec_ctx_t enc;

+    vpx_codec_enc_cfg_t cfg;

+    EXPECT_EQ(vpx_codec_enc_config_default(codec.iface, &cfg, 0), VPX_CODEC_OK);

+    cfg.g_w = kWidth;

+    cfg.g_h = kHeight;

+    EXPECT_EQ(vpx_codec_enc_init(&enc, codec.iface, &cfg, 0), VPX_CODEC_OK);

+    vpx_roi_map_t roi = {};

+    uint8_t roi_map[kWidth * kHeight] = {};

+    if (IsVP9(codec.iface)) {

+      roi.rows = (cfg.g_w + 7) >> 3;

+      roi.cols = (cfg.g_h + 7) >> 3;

+    } else {

+      roi.rows = (cfg.g_w + 15) >> 4;

+      roi.cols = (cfg.g_h + 15) >> 4;

+    }

+    EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK);

+    roi.roi_map = roi_map;

+    // VP8 only. This value isn't range checked.

+    roi.static_threshold[1] = 1000;

+    roi.static_threshold[2] = INT_MIN;

+    roi.static_threshold[3] = INT_MAX;

+    for (const auto delta : { -63, -1, 0, 1, 63 }) {

+      for (int i = 0; i < 8; ++i) {

+        roi.delta_q[i] = delta;

+        roi.delta_lf[i] = delta;

+        // VP9 only.

+        roi.skip[i] ^= 1;

+        roi.ref_frame[i] = (roi.ref_frame[i] + 1) % 4;

+        EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK);

+      }

+    }

+    vpx_codec_err_t expected_error;

+    for (const auto delta : { -64, 64, INT_MIN, INT_MAX }) {

+      expected_error = VPX_CODEC_INVALID_PARAM;

+      for (int i = 0; i < 8; ++i) {

+        roi.delta_q[i] = delta;

+        // The max segment count for VP8 is 4, the remainder of the entries are

+        // ignored.

+        if (i >= 4 && !IsVP9(codec.iface)) expected_error = VPX_CODEC_OK;

+        EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)

+            << "delta_q[" << i << "]: " << delta;

+        roi.delta_q[i] = 0;

+        roi.delta_lf[i] = delta;

+        EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)

+            << "delta_lf[" << i << "]: " << delta;

+        roi.delta_lf[i] = 0;

+      }

+    }

+    // VP8 should ignore skip[] and ref_frame[] values.

+    expected_error =

+        IsVP9(codec.iface) ? VPX_CODEC_INVALID_PARAM : VPX_CODEC_OK;

+    for (const auto skip : { -2, 2, INT_MIN, INT_MAX }) {

+      for (int i = 0; i < 8; ++i) {

+        roi.skip[i] = skip;

+        EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)

+            << "skip[" << i << "]: " << skip;

+        roi.skip[i] = 0;

+      }

+    }

+    // VP9 allows negative values to be used to disable segmentation.

+    for (int ref_frame = -3; ref_frame < 0; ++ref_frame) {

+      for (int i = 0; i < 8; ++i) {

+        roi.ref_frame[i] = ref_frame;

+        EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK)

+            << "ref_frame[" << i << "]: " << ref_frame;

+        roi.ref_frame[i] = 0;

+      }

+    }

+    for (const auto ref_frame : { 4, INT_MIN, INT_MAX }) {

+      for (int i = 0; i < 8; ++i) {

+        roi.ref_frame[i] = ref_frame;

+        EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)

+            << "ref_frame[" << i << "]: " << ref_frame;

+        roi.ref_frame[i] = 0;

+      }

+    }

+    EXPECT_EQ(vpx_codec_destroy(&enc), VPX_CODEC_OK);

--- a/test/ratectrl_rtc_test.cc

+++ /dev/null

@@ -1,229 +1,0 @@

-/*

- *  Copyright (c) 2020 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "vp9/ratectrl_rtc.h"

-#include <fstream>  // NOLINT

-#include <string>

-#include "./vpx_config.h"

-#include "third_party/googletest/src/include/gtest/gtest.h"

-#include "test/codec_factory.h"

-#include "test/encode_test_driver.h"

-#include "test/util.h"

-#include "test/video_source.h"

-#include "vpx/vpx_codec.h"

-#include "vpx_ports/bitops.h"

-namespace {

-const size_t kNumFrame = 850;

-struct FrameInfo {

-  friend std::istream &operator>>(std::istream &is, FrameInfo &info) {

-    is >> info.frame_id >> info.spatial_id >> info.temporal_id >> info.base_q >>

-        info.target_bandwidth >> info.buffer_level >> info.filter_level_ >>

-        info.bytes_used;

-    return is;

-  }

-  int frame_id;

-  int spatial_id;

-  int temporal_id;

-  // Base QP

-  int base_q;

-  size_t target_bandwidth;

-  size_t buffer_level;

-  // Loopfilter level

-  int filter_level_;

-  // Frame size for current frame, used for pose encode update

-  size_t bytes_used;

-};

-// This test runs the rate control interface and compare against ground truth

-// generated by encoders.

-// Settings for the encoder:

-// For 1 layer:

-//

-// examples/vpx_temporal_svc_encoder gipsrec_motion1.1280_720.yuv out vp9

-//    1280 720 1 30 7 0 0 1 0 1000

-//

-// For SVC (3 temporal layers, 3 spatial layers):

-//

-// examples/vp9_spatial_svc_encoder -f 10000 -w 1280 -h 720 -t 1/30 -sl 3

-// -k 10000 -bl 100,140,200,250,350,500,450,630,900 -b 1600 --rc-end-usage=1

-// --lag-in-frames=0 --passes=1 --speed=7 --threads=1

-// --temporal-layering-mode=3 -aq 1 -rcstat 1

-// gipsrec_motion1.1280_720.yuv -o out.webm

-//

-// - AQ_Mode 0

-// - Disable golden refresh

-// - Bitrate x 2 at frame/superframe 200

-// - Bitrate / 4 at frame/superframe 400

-//

-// The generated file includes:

-// frame number, spatial layer ID, temporal layer ID, base QP, target

-// bandwidth, buffer level, loopfilter level, encoded frame size

-// TODO(jianj): Remove golden files, and run actual encoding in this test.

-class RcInterfaceTest : public ::testing::Test {

- public:

-  explicit RcInterfaceTest() {}

-  virtual ~RcInterfaceTest() {}

- protected:

-  void RunOneLayer() {

-    SetConfigOneLayer();

-    rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);

-    FrameInfo frame_info;

-    libvpx::VP9FrameParamsQpRTC frame_params;

-    frame_params.frame_type = KEY_FRAME;

-    frame_params.spatial_layer_id = 0;

-    frame_params.temporal_layer_id = 0;

-    std::ifstream one_layer_file;

-    one_layer_file.open(libvpx_test::GetDataPath() +

-                        "/rc_interface_test_one_layer");

-    ASSERT_TRUE(one_layer_file.good());

-    for (size_t i = 0; i < kNumFrame; i++) {

-      one_layer_file >> frame_info;

-      if (frame_info.frame_id > 0) frame_params.frame_type = INTER_FRAME;

-      if (frame_info.frame_id == 200) {

-        rc_cfg_.target_bandwidth = rc_cfg_.target_bandwidth * 2;

-        rc_api_->UpdateRateControl(rc_cfg_);

-      } else if (frame_info.frame_id == 400) {

-        rc_cfg_.target_bandwidth = rc_cfg_.target_bandwidth / 4;

-        rc_api_->UpdateRateControl(rc_cfg_);

-      }

-      ASSERT_EQ(frame_info.spatial_id, 0);

-      ASSERT_EQ(frame_info.temporal_id, 0);

-      rc_api_->ComputeQP(frame_params);

-      ASSERT_EQ(rc_api_->GetQP(), frame_info.base_q);

-      ASSERT_EQ(rc_api_->GetLoopfilterLevel(), frame_info.filter_level_);

-      rc_api_->PostEncodeUpdate(frame_info.bytes_used);

-    }

-  }

-  void RunSVC() {

-    SetConfigSVC();

-    rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);

-    FrameInfo frame_info;

-    libvpx::VP9FrameParamsQpRTC frame_params;

-    frame_params.frame_type = KEY_FRAME;

-    std::ifstream svc_file;

-    svc_file.open(std::string(std::getenv("LIBVPX_TEST_DATA_PATH")) +

-                  "/rc_interface_test_svc");

-    ASSERT_TRUE(svc_file.good());

-    for (size_t i = 0; i < kNumFrame * rc_cfg_.ss_number_layers; i++) {

-      svc_file >> frame_info;

-      if (frame_info.frame_id > 0) frame_params.frame_type = INTER_FRAME;

-      if (frame_info.frame_id == 200 * rc_cfg_.ss_number_layers) {

-        for (int layer = 0;

-             layer < rc_cfg_.ss_number_layers * rc_cfg_.ts_number_layers;

-             layer++)

-          rc_cfg_.layer_target_bitrate[layer] *= 2;

-        rc_cfg_.target_bandwidth *= 2;

-        rc_api_->UpdateRateControl(rc_cfg_);

-      } else if (frame_info.frame_id == 400 * rc_cfg_.ss_number_layers) {

-        for (int layer = 0;

-             layer < rc_cfg_.ss_number_layers * rc_cfg_.ts_number_layers;

-             layer++)

-          rc_cfg_.layer_target_bitrate[layer] /= 4;

-        rc_cfg_.target_bandwidth /= 4;

-        rc_api_->UpdateRateControl(rc_cfg_);

-      }

-      frame_params.spatial_layer_id = frame_info.spatial_id;

-      frame_params.temporal_layer_id = frame_info.temporal_id;

-      rc_api_->ComputeQP(frame_params);

-      ASSERT_EQ(rc_api_->GetQP(), frame_info.base_q);

-      ASSERT_EQ(rc_api_->GetLoopfilterLevel(), frame_info.filter_level_);

-      rc_api_->PostEncodeUpdate(frame_info.bytes_used);

-    }

-  }

- private:

-  void SetConfigOneLayer() {

-    rc_cfg_.width = 1280;

-    rc_cfg_.height = 720;

-    rc_cfg_.max_quantizer = 52;

-    rc_cfg_.min_quantizer = 2;

-    rc_cfg_.target_bandwidth = 1000;

-    rc_cfg_.buf_initial_sz = 600;

-    rc_cfg_.buf_optimal_sz = 600;

-    rc_cfg_.buf_sz = 1000;

-    rc_cfg_.undershoot_pct = 50;

-    rc_cfg_.overshoot_pct = 50;

-    rc_cfg_.max_intra_bitrate_pct = 1000;

-    rc_cfg_.framerate = 30.0;

-    rc_cfg_.ss_number_layers = 1;

-    rc_cfg_.ts_number_layers = 1;

-    rc_cfg_.scaling_factor_num[0] = 1;

-    rc_cfg_.scaling_factor_den[0] = 1;

-    rc_cfg_.layer_target_bitrate[0] = 1000;

-    rc_cfg_.max_quantizers[0] = 52;

-    rc_cfg_.min_quantizers[0] = 2;

-  }

-  void SetConfigSVC() {

-    rc_cfg_.width = 1280;

-    rc_cfg_.height = 720;

-    rc_cfg_.max_quantizer = 56;

-    rc_cfg_.min_quantizer = 2;

-    rc_cfg_.target_bandwidth = 1600;

-    rc_cfg_.buf_initial_sz = 500;

-    rc_cfg_.buf_optimal_sz = 600;

-    rc_cfg_.buf_sz = 1000;

-    rc_cfg_.undershoot_pct = 50;

-    rc_cfg_.overshoot_pct = 50;

-    rc_cfg_.max_intra_bitrate_pct = 900;

-    rc_cfg_.framerate = 30.0;

-    rc_cfg_.ss_number_layers = 3;

-    rc_cfg_.ts_number_layers = 3;

-    rc_cfg_.scaling_factor_num[0] = 1;

-    rc_cfg_.scaling_factor_den[0] = 4;

-    rc_cfg_.scaling_factor_num[1] = 2;

-    rc_cfg_.scaling_factor_den[1] = 4;

-    rc_cfg_.scaling_factor_num[2] = 4;

-    rc_cfg_.scaling_factor_den[2] = 4;

-    rc_cfg_.ts_rate_decimator[0] = 4;

-    rc_cfg_.ts_rate_decimator[1] = 2;

-    rc_cfg_.ts_rate_decimator[2] = 1;

-    rc_cfg_.layer_target_bitrate[0] = 100;

-    rc_cfg_.layer_target_bitrate[1] = 140;

-    rc_cfg_.layer_target_bitrate[2] = 200;

-    rc_cfg_.layer_target_bitrate[3] = 250;

-    rc_cfg_.layer_target_bitrate[4] = 350;

-    rc_cfg_.layer_target_bitrate[5] = 500;

-    rc_cfg_.layer_target_bitrate[6] = 450;

-    rc_cfg_.layer_target_bitrate[7] = 630;

-    rc_cfg_.layer_target_bitrate[8] = 900;

-    for (int sl = 0; sl < rc_cfg_.ss_number_layers; ++sl) {

-      for (int tl = 0; tl < rc_cfg_.ts_number_layers; ++tl) {

-        const int i = sl * rc_cfg_.ts_number_layers + tl;

-        rc_cfg_.max_quantizers[i] = 56;

-        rc_cfg_.min_quantizers[i] = 2;

-      }

-    }

-  }

-  std::unique_ptr<libvpx::VP9RateControlRTC> rc_api_;

-  libvpx::VP9RateControlRtcConfig rc_cfg_;

-};

-TEST_F(RcInterfaceTest, OneLayer) { RunOneLayer(); }

-TEST_F(RcInterfaceTest, SVC) { RunSVC(); }

-}  // namespace

-int main(int argc, char **argv) {

-  ::testing::InitGoogleTest(&argc, argv);

-  return RUN_ALL_TESTS();

-}

--- a/test/sad_test.cc

+++ b/test/sad_test.cc

@@ -26,6 +26,10 @@

 #include "vpx_ports/msvc.h"

 #include "vpx_ports/vpx_timer.h"

+// const[expr] should be sufficient for DECLARE_ALIGNED but early

+// implementations of c++11 appear to have some issues with it.

+#define kDataAlignment 32

 template <typename Function>

 struct TestParams {

   TestParams(int w, int h, Function f, int bd = -1)

@@ -117,9 +121,6 @@

  protected:

   // Handle blocks up to 4 blocks 64x64 with stride up to 128

   // crbug.com/webm/1660

-  // const[expr] should be sufficient for DECLARE_ALIGNED but early

-  // implementations of c++11 appear to have some issues with it.

-  enum { kDataAlignment = 32 };

   static const int kDataBlockSize = 64 * 128;

   static const int kDataBufferSize = 4 * kDataBlockSize;

--- a/test/simple_encode_test.cc

+++ b/test/simple_encode_test.cc

@@ -13,6 +13,7 @@

 #include <string>

 #include <vector>

 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/video_source.h"

 #include "vp9/simple_encode.h"

 namespace vp9 {

@@ -36,7 +37,8 @@

   const int frame_rate_den_ = 1;

   const int target_bitrate_ = 1000;

   const int num_frames_ = 17;

-  const std::string in_file_path_str_ = "bus_352x288_420_f20_b8.yuv";

+  const std::string in_file_path_str_ =

+      libvpx_test::GetDataPath() + "/bus_352x288_420_f20_b8.yuv";

};

 TEST_F(SimpleEncodeTest, ComputeFirstPassStats) {

--- a/test/svc_datarate_test.cc

+++ b/test/svc_datarate_test.cc

@@ -84,6 +84,7 @@

       prev_frame_width[i] = 320;

       prev_frame_height[i] = 240;

+    ksvc_flex_noupd_tlenh_ = false;

   virtual void BeginPassHook(unsigned int /*pass*/) {}

@@ -91,9 +92,10 @@

   // bypass/flexible mode. The pattern corresponds to the pattern

   // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in

   // non-flexible mode, except that we disable inter-layer prediction.

-  void set_frame_flags_bypass_mode(

-      int tl, int num_spatial_layers, int is_key_frame,

-      vpx_svc_ref_frame_config_t *ref_frame_config) {

+  void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,

+                                   int is_key_frame,

+                                   vpx_svc_ref_frame_config_t *ref_frame_config,

+                                   int noupdate_tlenh) {

     for (int sl = 0; sl < num_spatial_layers; ++sl)

       ref_frame_config->update_buffer_slot[sl] = 0;

@@ -151,9 +153,12 @@

           ref_frame_config->reference_last[sl] = 1;

           ref_frame_config->reference_golden[sl] = 0;

           ref_frame_config->reference_alt_ref[sl] = 0;

-          ref_frame_config->update_buffer_slot[sl] |=

-              1 << ref_frame_config->alt_fb_idx[sl];

+          // Non reference frame on top temporal top spatial.

+          ref_frame_config->update_buffer_slot[sl] = 0;

+        // Force no update on all spatial layers for temporal enhancement layer

+        // frames.

+        if (noupdate_tlenh) ref_frame_config->update_buffer_slot[sl] = 0;

@@ -244,6 +249,22 @@

+    if (ksvc_flex_noupd_tlenh_) {

+      vpx_svc_layer_id_t layer_id;

+      layer_id.spatial_layer_id = 0;

+      layer_id.temporal_layer_id = (video->frame() % 2 != 0);

+      temporal_layer_id_ = layer_id.temporal_layer_id;

+      for (int i = 0; i < number_spatial_layers_; i++) {

+        layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_;

+        ref_frame_config.duration[i] = 1;

+      }

+      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);

+      set_frame_flags_bypass_mode(layer_id.temporal_layer_id,

+                                  number_spatial_layers_, 0, &ref_frame_config,

+                                  1);

+      encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);

+    }

     if (update_pattern_ && video->frame() >= 100) {

       vpx_svc_layer_id_t layer_id;

       if (video->frame() == 100) {

@@ -254,11 +275,14 @@

       layer_id.spatial_layer_id = 0;

       layer_id.temporal_layer_id = (video->frame() % 2 != 0);

       temporal_layer_id_ = layer_id.temporal_layer_id;

-      for (int i = 0; i < number_spatial_layers_; i++)

+      for (int i = 0; i < number_spatial_layers_; i++) {

         layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_;

+        ref_frame_config.duration[i] = 1;

+      }

       encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);

       set_frame_flags_bypass_mode(layer_id.temporal_layer_id,

-                                  number_spatial_layers_, 0, &ref_frame_config);

+                                  number_spatial_layers_, 0, &ref_frame_config,

+                                  0);

       encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);

@@ -557,9 +581,14 @@

   virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) {

-    double mismatch_psnr = compute_psnr(img1, img2);

-    mismatch_psnr_ += mismatch_psnr;

-    ++mismatch_nframes_;

+    // TODO(marpan): Look into why an assert is triggered in compute_psnr

+    // for mismatch frames for the special test case: ksvc_flex_noupd_tlenh.

+    // Has to do with dropped frames in bypass/flexible svc mode.

+    if (!ksvc_flex_noupd_tlenh_) {

+      double mismatch_psnr = compute_psnr(img1, img2);

+      mismatch_psnr_ += mismatch_psnr;

+      ++mismatch_nframes_;

+    }

   unsigned int GetMismatchFrames() { return mismatch_nframes_; }

@@ -604,6 +633,7 @@

   int num_resize_down_;

   unsigned int prev_frame_width[VPX_MAX_LAYERS];

   unsigned int prev_frame_height[VPX_MAX_LAYERS];

+  bool ksvc_flex_noupd_tlenh_;

  private:

   virtual void SetConfig(const int num_temporal_layer) {

@@ -722,8 +752,6 @@

   cfg_.g_threads = 1;

   cfg_.rc_dropframe_thresh = 30;

   cfg_.kf_max_dist = 9999;

-  // Change SVC pattern on the fly.

-  update_pattern_ = 1;

   ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,

                                        0, 400);

   top_sl_width_ = 640;

@@ -730,6 +758,8 @@

   top_sl_height_ = 480;

   cfg_.rc_target_bitrate = 800;

   ResetModel();

+  // Change SVC pattern on the fly.

+  update_pattern_ = 1;

   AssignLayerBitrates();

   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

   CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78,

@@ -1104,6 +1134,36 @@

   // encoder will avoid loopfilter on these frames.

   EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames());

 #endif

+}

+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and

+// 2 temporal layers, for KSVC in flexible mode with no update of reference

+// frames for all spatial layers on TL > 0 superframes.

+// Run HD clip with 4 threads.

+TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc3SL2TL4ThKSVCFlex) {

+  SetSvcConfig(3, 2);

+  cfg_.rc_buf_initial_sz = 500;

+  cfg_.rc_buf_optimal_sz = 500;

+  cfg_.rc_buf_sz = 1000;

+  cfg_.rc_min_quantizer = 0;

+  cfg_.rc_max_quantizer = 63;

+  cfg_.g_threads = 4;

+  cfg_.rc_dropframe_thresh = 30;

+  cfg_.kf_max_dist = 9999;

+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);

+  top_sl_width_ = 1280;

+  top_sl_height_ = 720;

+  layer_framedrop_ = 0;

+  const int bitrates[3] = { 200, 400, 600 };

+  cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)];

+  ResetModel();

+  layer_framedrop_ = GET_PARAM(2);

+  AssignLayerBitrates();

+  ksvc_flex_noupd_tlenh_ = true;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;

+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.58,

+                          1.2);

 // Params: speed setting, inter-layer prediction mode.

--- a/test/test-data.mk

+++ b/test/test-data.mk

@@ -27,8 +27,6 @@

 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m

 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m

 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv

-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rc_interface_test_one_layer

-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rc_interface_test_svc

 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += bus_352x288_420_f20_b8.yuv

 # Test vectors

--- a/test/test-data.sha1

+++ b/test/test-data.sha1

@@ -869,5 +869,3 @@

 518a0be998afece76d3df76047d51e256c591ff2 *invalid-bug-148271109.ivf

 d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-148271109.ivf.res

 ad18ca16f0a249fb3b7c38de0d9b327fed273f96 *hantro_collage_w352h288_nv12.yuv

-03f827c0e36ff9a6e23c5cc11936924e4f1827ab *rc_interface_test_one_layer

-99e4f4c2961d46dc286db230090a39d78460b25d *rc_interface_test_svc

--- a/test/test.mk

+++ b/test/test.mk

@@ -193,10 +193,8 @@

 endif

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)

-ifneq (, $(filter yes, $(HAVE_SSE2) $(HAVE_AVX2)))

 LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc

 endif

-endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc

 ifeq ($(CONFIG_VP9_ENCODER),yes)

@@ -215,7 +213,14 @@

 TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc

 TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c

-RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) := ratectrl_rtc_test.cc

+RC_INTERFACE_TEST_SRCS-yes := test_rc_interface.cc

+RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ratectrl_rtc_test.cc

+RC_INTERFACE_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_ratectrl_rtc_test.cc

+RC_INTERFACE_TEST_SRCS-$(CONFIG_ENCODERS) += encode_test_driver.cc

+RC_INTERFACE_TEST_SRCS-$(CONFIG_ENCODERS) += encode_test_driver.h

+RC_INTERFACE_TEST_SRCS-yes += decode_test_driver.cc

+RC_INTERFACE_TEST_SRCS-yes += decode_test_driver.h

+RC_INTERFACE_TEST_SRCS-yes += codec_factory.h

 endif # CONFIG_SHARED

--- /dev/null

+++ b/test/test_rc_interface.cc

@@ -1,0 +1,6 @@

+#include "third_party/googletest/src/include/gtest/gtest.h"

+int main(int argc, char **argv) {

+  ::testing::InitGoogleTest(&argc, argv);

+  return RUN_ALL_TESTS();

+}

--- a/test/vp8_denoiser_sse2_test.cc

+++ b/test/vp8_denoiser_sse2_test.cc

@@ -40,7 +40,12 @@

   int increase_denoising_;

};

+// TODO(https://crbug.com/webm/1718): This test fails with gcc 8-10.

+#if defined(__GNUC__) && __GNUC__ >= 8

+TEST_P(VP8DenoiserTest, DISABLED_BitexactCheck) {

+#else

 TEST_P(VP8DenoiserTest, BitexactCheck) {

+#endif

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   const int count_test_block = 4000;

   const int stride = 16;

@@ -87,7 +92,7 @@

     // Check bitexactness.

     for (int h = 0; h < 16; ++h) {

       for (int w = 0; w < 16; ++w) {

-        EXPECT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);

+        ASSERT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);

@@ -103,7 +108,7 @@

     // Check bitexactness.

     for (int h = 0; h < 16; ++h) {

       for (int w = 0; w < 16; ++w) {

-        EXPECT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);

+        ASSERT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);

--- /dev/null

+++ b/test/vp8_ratectrl_rtc_test.cc

@@ -1,0 +1,343 @@

+/*

+ *  Copyright (c) 2021 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <fstream>  // NOLINT

+#include <string>

+#include "./vpx_config.h"

+#include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/codec_factory.h"

+#include "test/encode_test_driver.h"

+#include "test/i420_video_source.h"

+#include "test/util.h"

+#include "test/video_source.h"

+#include "vp8/vp8_ratectrl_rtc.h"

+#include "vpx/vpx_codec.h"

+#include "vpx_ports/bitops.h"

+namespace {

+struct Vp8RCTestVideo {

+  Vp8RCTestVideo() {}

+  Vp8RCTestVideo(const char *name_, int width_, int height_,

+                 unsigned int frames_)

+      : name(name_), width(width_), height(height_), frames(frames_) {}

+  friend std::ostream &operator<<(std::ostream &os,

+                                  const Vp8RCTestVideo &video) {

+    os << video.name << " " << video.width << " " << video.height << " "

+       << video.frames;

+    return os;

+  }

+  const char *name;

+  int width;

+  int height;

+  unsigned int frames;

+};

+const Vp8RCTestVideo kVp8RCTestVectors[] = {

+  Vp8RCTestVideo("niklas_640_480_30.yuv", 640, 480, 470),

+  Vp8RCTestVideo("desktop_office1.1280_720-020.yuv", 1280, 720, 300),

+};

+class Vp8RcInterfaceTest

+    : public ::libvpx_test::EncoderTest,

+      public ::libvpx_test::CodecTestWith2Params<int, Vp8RCTestVideo> {

+ public:

+  Vp8RcInterfaceTest()

+      : EncoderTest(GET_PARAM(0)), key_interval_(3000), encoder_exit_(false) {}

+  virtual ~Vp8RcInterfaceTest() {}

+ protected:

+  virtual void SetUp() {

+    InitializeConfig();

+    SetMode(::libvpx_test::kRealTime);

+  }

+  // From error_resilience_test.cc

+  int SetFrameFlags(int frame_num, int num_temp_layers) {

+    int frame_flags = 0;

+    if (num_temp_layers == 2) {

+      if (frame_num % 2 == 0) {

+        // Layer 0: predict from L and ARF, update L.

+        frame_flags =

+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;

+      } else {

+        // Layer 1: predict from L, G and ARF, and update G.

+        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |

+                      VP8_EFLAG_NO_UPD_ENTROPY;

+      }

+    } else if (num_temp_layers == 3) {

+      if (frame_num % 4 == 0) {

+        // Layer 0: predict from L, update L.

+        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |

+                      VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;

+      } else if ((frame_num - 2) % 4 == 0) {

+        // Layer 1: predict from L, G,  update G.

+        frame_flags =

+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF;

+      } else if ((frame_num - 1) % 2 == 0) {

+        // Layer 2: predict from L, G, ARF; update ARG.

+        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;

+      }

+    }

+    return frame_flags;

+  }

+  int SetLayerId(int frame_num, int num_temp_layers) {

+    int layer_id = 0;

+    if (num_temp_layers == 2) {

+      if (frame_num % 2 == 0) {

+        layer_id = 0;

+      } else {

+        layer_id = 1;

+      }

+    } else if (num_temp_layers == 3) {

+      if (frame_num % 4 == 0) {

+        layer_id = 0;

+      } else if ((frame_num - 2) % 4 == 0) {

+        layer_id = 1;

+      } else if ((frame_num - 1) % 2 == 0) {

+        layer_id = 2;

+      }

+    }

+    return layer_id;

+  }

+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,

+                                  ::libvpx_test::Encoder *encoder) {

+    if (rc_cfg_.ts_number_layers > 1) {

+      const int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);

+      const int frame_flags =

+          SetFrameFlags(video->frame(), cfg_.ts_number_layers);

+      frame_params_.temporal_layer_id = layer_id;

+      if (video->frame() > 0) {

+        encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);

+        encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);

+      }

+    } else {

+      if (video->frame() == 0) {

+        encoder->Control(VP8E_SET_CPUUSED, -6);

+        encoder->Control(VP8E_SET_RTC_EXTERNAL_RATECTRL, 1);

+        encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000);

+      }

+      if (frame_params_.frame_type == INTER_FRAME) {

+        // Disable golden frame update.

+        frame_flags_ |= VP8_EFLAG_NO_UPD_GF;

+        frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;

+      }

+    }

+    frame_params_.frame_type =

+        video->frame() % key_interval_ == 0 ? KEY_FRAME : INTER_FRAME;

+    encoder_exit_ = video->frame() == test_video_.frames;

+  }

+  virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {

+    if (encoder_exit_) {

+      return;

+    }

+    int qp;

+    encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp);

+    rc_api_->ComputeQP(frame_params_);

+    ASSERT_EQ(rc_api_->GetQP(), qp);

+  }

+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {

+    rc_api_->PostEncodeUpdate(pkt->data.frame.sz);

+  }

+  void RunOneLayer() {

+    test_video_ = GET_PARAM(2);

+    target_bitrate_ = GET_PARAM(1);

+    if (test_video_.width == 1280 && target_bitrate_ == 200) return;

+    if (test_video_.width == 640 && target_bitrate_ == 1000) return;

+    SetConfig();

+    rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);

+    rc_api_->UpdateRateControl(rc_cfg_);

+    ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,

+                                         test_video_.height, 30, 1, 0,

+                                         test_video_.frames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+  void RunPeriodicKey() {

+    test_video_ = GET_PARAM(2);

+    target_bitrate_ = GET_PARAM(1);

+    if (test_video_.width == 1280 && target_bitrate_ == 200) return;

+    if (test_video_.width == 640 && target_bitrate_ == 1000) return;

+    key_interval_ = 100;

+    SetConfig();

+    rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);

+    rc_api_->UpdateRateControl(rc_cfg_);

+    ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,

+                                         test_video_.height, 30, 1, 0,

+                                         test_video_.frames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+  void RunTemporalLayers2TL() {

+    test_video_ = GET_PARAM(2);

+    target_bitrate_ = GET_PARAM(1);

+    if (test_video_.width == 1280 && target_bitrate_ == 200) return;

+    if (test_video_.width == 640 && target_bitrate_ == 1000) return;

+    SetConfigTemporalLayers(2);

+    rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);

+    rc_api_->UpdateRateControl(rc_cfg_);

+    ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,

+                                         test_video_.height, 30, 1, 0,

+                                         test_video_.frames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+  void RunTemporalLayers3TL() {

+    test_video_ = GET_PARAM(2);

+    target_bitrate_ = GET_PARAM(1);

+    if (test_video_.width == 1280 && target_bitrate_ == 200) return;

+    if (test_video_.width == 640 && target_bitrate_ == 1000) return;

+    SetConfigTemporalLayers(3);

+    rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);

+    rc_api_->UpdateRateControl(rc_cfg_);

+    ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,

+                                         test_video_.height, 30, 1, 0,

+                                         test_video_.frames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+ private:

+  void SetConfig() {

+    rc_cfg_.width = test_video_.width;

+    rc_cfg_.height = test_video_.height;

+    rc_cfg_.max_quantizer = 60;

+    rc_cfg_.min_quantizer = 2;

+    rc_cfg_.target_bandwidth = target_bitrate_;

+    rc_cfg_.buf_initial_sz = 600;

+    rc_cfg_.buf_optimal_sz = 600;

+    rc_cfg_.buf_sz = target_bitrate_;

+    rc_cfg_.undershoot_pct = 50;

+    rc_cfg_.overshoot_pct = 50;

+    rc_cfg_.max_intra_bitrate_pct = 1000;

+    rc_cfg_.framerate = 30.0;

+    rc_cfg_.layer_target_bitrate[0] = target_bitrate_;

+    // Encoder settings for ground truth.

+    cfg_.g_w = test_video_.width;

+    cfg_.g_h = test_video_.height;

+    cfg_.rc_undershoot_pct = 50;

+    cfg_.rc_overshoot_pct = 50;

+    cfg_.rc_buf_initial_sz = 600;

+    cfg_.rc_buf_optimal_sz = 600;

+    cfg_.rc_buf_sz = target_bitrate_;

+    cfg_.rc_dropframe_thresh = 0;

+    cfg_.rc_min_quantizer = 2;

+    cfg_.rc_max_quantizer = 60;

+    cfg_.rc_end_usage = VPX_CBR;

+    cfg_.g_lag_in_frames = 0;

+    cfg_.g_error_resilient = 1;

+    cfg_.rc_target_bitrate = target_bitrate_;

+    cfg_.kf_min_dist = key_interval_;

+    cfg_.kf_max_dist = key_interval_;

+  }

+  void SetConfigTemporalLayers(int temporal_layers) {

+    rc_cfg_.width = test_video_.width;

+    rc_cfg_.height = test_video_.height;

+    rc_cfg_.max_quantizer = 60;

+    rc_cfg_.min_quantizer = 2;

+    rc_cfg_.target_bandwidth = target_bitrate_;

+    rc_cfg_.buf_initial_sz = 600;

+    rc_cfg_.buf_optimal_sz = 600;

+    rc_cfg_.buf_sz = target_bitrate_;

+    rc_cfg_.undershoot_pct = 50;

+    rc_cfg_.overshoot_pct = 50;

+    rc_cfg_.max_intra_bitrate_pct = 1000;

+    rc_cfg_.framerate = 30.0;

+    if (temporal_layers == 2) {

+      rc_cfg_.layer_target_bitrate[0] = 60 * target_bitrate_ / 100;

+      rc_cfg_.layer_target_bitrate[1] = target_bitrate_;

+      rc_cfg_.ts_rate_decimator[0] = 2;

+      rc_cfg_.ts_rate_decimator[1] = 1;

+    } else if (temporal_layers == 3) {

+      rc_cfg_.layer_target_bitrate[0] = 40 * target_bitrate_ / 100;

+      rc_cfg_.layer_target_bitrate[1] = 60 * target_bitrate_ / 100;

+      rc_cfg_.layer_target_bitrate[2] = target_bitrate_;

+      rc_cfg_.ts_rate_decimator[0] = 4;

+      rc_cfg_.ts_rate_decimator[1] = 2;

+      rc_cfg_.ts_rate_decimator[2] = 1;

+    }

+    rc_cfg_.ts_number_layers = temporal_layers;

+    // Encoder settings for ground truth.

+    cfg_.g_w = test_video_.width;

+    cfg_.g_h = test_video_.height;

+    cfg_.rc_undershoot_pct = 50;

+    cfg_.rc_overshoot_pct = 50;

+    cfg_.rc_buf_initial_sz = 600;

+    cfg_.rc_buf_optimal_sz = 600;

+    cfg_.rc_buf_sz = target_bitrate_;

+    cfg_.rc_dropframe_thresh = 0;

+    cfg_.rc_min_quantizer = 2;

+    cfg_.rc_max_quantizer = 60;

+    cfg_.rc_end_usage = VPX_CBR;

+    cfg_.g_lag_in_frames = 0;

+    cfg_.g_error_resilient = 1;

+    cfg_.rc_target_bitrate = target_bitrate_;

+    cfg_.kf_min_dist = key_interval_;

+    cfg_.kf_max_dist = key_interval_;

+    // 2 Temporal layers, no spatial layers, CBR mode.

+    cfg_.ss_number_layers = 1;

+    cfg_.ts_number_layers = temporal_layers;

+    if (temporal_layers == 2) {

+      cfg_.ts_rate_decimator[0] = 2;

+      cfg_.ts_rate_decimator[1] = 1;

+      cfg_.ts_periodicity = 2;

+      cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;

+      cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;

+    } else if (temporal_layers == 3) {

+      cfg_.ts_rate_decimator[0] = 4;

+      cfg_.ts_rate_decimator[1] = 2;

+      cfg_.ts_rate_decimator[2] = 1;

+      cfg_.ts_periodicity = 4;

+      cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;

+      cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;

+      cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;

+    }

+  }

+  std::unique_ptr<libvpx::VP8RateControlRTC> rc_api_;

+  libvpx::VP8RateControlRtcConfig rc_cfg_;

+  int key_interval_;

+  int target_bitrate_;

+  Vp8RCTestVideo test_video_;

+  libvpx::VP8FrameParamsQpRTC frame_params_;

+  bool encoder_exit_;

+};

+TEST_P(Vp8RcInterfaceTest, OneLayer) { RunOneLayer(); }

+TEST_P(Vp8RcInterfaceTest, OneLayerPeriodicKey) { RunPeriodicKey(); }

+TEST_P(Vp8RcInterfaceTest, TemporalLayers2TL) { RunTemporalLayers2TL(); }

+TEST_P(Vp8RcInterfaceTest, TemporalLayers3TL) { RunTemporalLayers3TL(); }

+VP8_INSTANTIATE_TEST_SUITE(Vp8RcInterfaceTest,

+                           ::testing::Values(200, 400, 1000),

+                           ::testing::ValuesIn(kVp8RCTestVectors));

+}  // namespace

--- a/test/vp9_end_to_end_test.cc

+++ b/test/vp9_end_to_end_test.cc

@@ -31,7 +31,7 @@

   { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 },

   { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 },

   { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 28.0, 32.0, 32.0, 32.0, 32.0 },

-  { 28.5, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 },

+  { 28.4, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 },

};

 typedef struct {

@@ -342,7 +342,7 @@

 VP9_INSTANTIATE_TEST_SUITE(EndToEndNV12,

                            ::testing::Values(::libvpx_test::kRealTime),

                            ::testing::ValuesIn(kTestVectorsNv12),

-                           ::testing::ValuesIn({ 6, 7, 8 }));

+                           ::testing::Values(6, 7, 8));

 VP9_INSTANTIATE_TEST_SUITE(EndToEndTestAdaptiveRDThresh,

                            ::testing::Values(5, 6, 7), ::testing::Values(8, 9));

--- a/test/vp9_ext_ratectrl_test.cc

+++ b/test/vp9_ext_ratectrl_test.cc

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include <cstdint>

 #include <new>

 #include "test/codec_factory.h"

@@ -20,7 +21,7 @@

 namespace {

 constexpr int kModelMagicNumber = 51396;

-constexpr unsigned int PrivMagicNumber = 5566;

+constexpr uintptr_t PrivMagicNumber = 5566;

 constexpr int kFrameNum = 5;

 constexpr int kLosslessCodingIndex = 2;

@@ -73,6 +74,7 @@

   EXPECT_EQ(encode_frame_info->coding_index, toy_rate_ctrl->coding_index);

   if (encode_frame_info->coding_index == 0) {

+    EXPECT_EQ(encode_frame_info->show_index, 0);

     EXPECT_EQ(encode_frame_info->gop_index, 0);

     EXPECT_EQ(encode_frame_info->frame_type, 0 /*kFrameTypeKey*/);

     EXPECT_EQ(encode_frame_info->ref_frame_valid_list[0],

@@ -84,6 +86,7 @@

   if (encode_frame_info->coding_index == 1) {

+    EXPECT_EQ(encode_frame_info->show_index, 4);

     EXPECT_EQ(encode_frame_info->gop_index, 1);

     EXPECT_EQ(encode_frame_info->frame_type, 2 /*kFrameTypeAltRef*/);

     EXPECT_EQ(encode_frame_info->ref_frame_valid_list[0],

@@ -104,6 +107,7 @@

   if (encode_frame_info->coding_index == 5) {

+    EXPECT_EQ(encode_frame_info->show_index, 4);

     EXPECT_EQ(encode_frame_info->gop_index, 0);

     EXPECT_EQ(encode_frame_info->frame_type, 3 /*kFrameTypeOverlay*/);

     EXPECT_EQ(encode_frame_info->ref_frame_valid_list[0],

@@ -125,6 +129,7 @@

   } else {

     frame_decision->q_index = 100;

+  frame_decision->max_frame_size = 0;

   return VPX_RC_OK;

@@ -139,6 +144,11 @@

   EXPECT_EQ(encode_frame_result->pixel_count, ref_pixel_count);

   if (toy_rate_ctrl->coding_index == kLosslessCodingIndex) {

     EXPECT_EQ(encode_frame_result->sse, 0);

+  }

+  if (toy_rate_ctrl->coding_index == kLosslessCodingIndex) {

+    EXPECT_EQ(encode_frame_result->actual_encoding_qindex, 0);

+  } else {

+    EXPECT_EQ(encode_frame_result->actual_encoding_qindex, 100);

   return VPX_RC_OK;

--- /dev/null

+++ b/test/vp9_ratectrl_rtc_test.cc

@@ -1,0 +1,373 @@

+/*

+ *  Copyright (c) 2020 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vp9/ratectrl_rtc.h"

+#include <fstream>  // NOLINT

+#include <string>

+#include "./vpx_config.h"

+#include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/codec_factory.h"

+#include "test/encode_test_driver.h"

+#include "test/i420_video_source.h"

+#include "test/util.h"

+#include "test/video_source.h"

+#include "vpx/vpx_codec.h"

+#include "vpx_ports/bitops.h"

+namespace {

+const size_t kNumFrames = 300;

+const int kTemporalId[4] = { 0, 2, 1, 2 };

+class RcInterfaceTest

+    : public ::libvpx_test::EncoderTest,

+      public ::libvpx_test::CodecTestWith2Params<int, vpx_rc_mode> {

+ public:

+  RcInterfaceTest()

+      : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)), key_interval_(3000),

+        encoder_exit_(false) {}

+  virtual ~RcInterfaceTest() {}

+ protected:

+  virtual void SetUp() {

+    InitializeConfig();

+    SetMode(::libvpx_test::kRealTime);

+  }

+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,

+                                  libvpx_test::Encoder *encoder) {

+    if (video->frame() == 0) {

+      encoder->Control(VP8E_SET_CPUUSED, 7);

+      encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);

+      encoder->Control(VP9E_SET_TUNE_CONTENT, 0);

+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000);

+      encoder->Control(VP9E_SET_RTC_EXTERNAL_RATECTRL, 1);

+    }

+    frame_params_.frame_type =

+        video->frame() % key_interval_ == 0 ? KEY_FRAME : INTER_FRAME;

+    if (rc_cfg_.rc_mode == VPX_CBR && frame_params_.frame_type == INTER_FRAME) {

+      // Disable golden frame update.

+      frame_flags_ |= VP8_EFLAG_NO_UPD_GF;

+      frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;

+    }

+    encoder_exit_ = video->frame() == kNumFrames;

+  }

+  virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {

+    if (encoder_exit_) {

+      return;

+    }

+    int loopfilter_level, qp;

+    encoder->Control(VP9E_GET_LOOPFILTER_LEVEL, &loopfilter_level);

+    encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp);

+    rc_api_->ComputeQP(frame_params_);

+    ASSERT_EQ(rc_api_->GetQP(), qp);

+    ASSERT_EQ(rc_api_->GetLoopfilterLevel(), loopfilter_level);

+  }

+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {

+    rc_api_->PostEncodeUpdate(pkt->data.frame.sz);

+  }

+  void RunOneLayer() {

+    SetConfig(GET_PARAM(2));

+    rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);

+    frame_params_.spatial_layer_id = 0;

+    frame_params_.temporal_layer_id = 0;

+    ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv",

+                                         1280, 720, 30, 1, 0, kNumFrames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+  void RunOneLayerVBRPeriodicKey() {

+    if (GET_PARAM(2) != VPX_VBR) return;

+    key_interval_ = 100;

+    SetConfig(VPX_VBR);

+    rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);

+    frame_params_.spatial_layer_id = 0;

+    frame_params_.temporal_layer_id = 0;

+    ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv",

+                                         1280, 720, 30, 1, 0, kNumFrames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+ private:

+  void SetConfig(vpx_rc_mode rc_mode) {

+    rc_cfg_.width = 1280;

+    rc_cfg_.height = 720;

+    rc_cfg_.max_quantizer = 52;

+    rc_cfg_.min_quantizer = 2;

+    rc_cfg_.target_bandwidth = 1000;

+    rc_cfg_.buf_initial_sz = 600;

+    rc_cfg_.buf_optimal_sz = 600;

+    rc_cfg_.buf_sz = 1000;

+    rc_cfg_.undershoot_pct = 50;

+    rc_cfg_.overshoot_pct = 50;

+    rc_cfg_.max_intra_bitrate_pct = 1000;

+    rc_cfg_.framerate = 30.0;

+    rc_cfg_.ss_number_layers = 1;

+    rc_cfg_.ts_number_layers = 1;

+    rc_cfg_.scaling_factor_num[0] = 1;

+    rc_cfg_.scaling_factor_den[0] = 1;

+    rc_cfg_.layer_target_bitrate[0] = 1000;

+    rc_cfg_.max_quantizers[0] = 52;

+    rc_cfg_.min_quantizers[0] = 2;

+    rc_cfg_.rc_mode = rc_mode;

+    rc_cfg_.aq_mode = aq_mode_;

+    // Encoder settings for ground truth.

+    cfg_.g_w = 1280;

+    cfg_.g_h = 720;

+    cfg_.rc_undershoot_pct = 50;

+    cfg_.rc_overshoot_pct = 50;

+    cfg_.rc_buf_initial_sz = 600;

+    cfg_.rc_buf_optimal_sz = 600;

+    cfg_.rc_buf_sz = 1000;

+    cfg_.rc_dropframe_thresh = 0;

+    cfg_.rc_min_quantizer = 2;

+    cfg_.rc_max_quantizer = 52;

+    cfg_.rc_end_usage = rc_mode;

+    cfg_.g_lag_in_frames = 0;

+    cfg_.g_error_resilient = 0;

+    cfg_.rc_target_bitrate = 1000;

+    cfg_.kf_min_dist = key_interval_;

+    cfg_.kf_max_dist = key_interval_;

+  }

+  std::unique_ptr<libvpx::VP9RateControlRTC> rc_api_;

+  libvpx::VP9RateControlRtcConfig rc_cfg_;

+  int aq_mode_;

+  int key_interval_;

+  libvpx::VP9FrameParamsQpRTC frame_params_;

+  bool encoder_exit_;

+};

+class RcInterfaceSvcTest : public ::libvpx_test::EncoderTest,

+                           public ::libvpx_test::CodecTestWithParam<int> {

+ public:

+  RcInterfaceSvcTest() : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)) {}

+  virtual ~RcInterfaceSvcTest() {}

+ protected:

+  virtual void SetUp() {

+    InitializeConfig();

+    SetMode(::libvpx_test::kRealTime);

+  }

+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,

+                                  ::libvpx_test::Encoder *encoder) {

+    if (video->frame() == 0) {

+      encoder->Control(VP8E_SET_CPUUSED, 7);

+      encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);

+      encoder->Control(VP9E_SET_TUNE_CONTENT, 0);

+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 900);

+      encoder->Control(VP9E_SET_RTC_EXTERNAL_RATECTRL, 1);

+      encoder->Control(VP9E_SET_SVC, 1);

+      encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);

+    }

+    frame_params_.frame_type = video->frame() == 0 ? KEY_FRAME : INTER_FRAME;

+    if (rc_cfg_.rc_mode == VPX_CBR && frame_params_.frame_type == INTER_FRAME) {

+      // Disable golden frame update.

+      frame_flags_ |= VP8_EFLAG_NO_UPD_GF;

+      frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;

+    }

+    encoder_exit_ = video->frame() == kNumFrames;

+    current_superframe_ = video->frame();

+  }

+  virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {

+    ::libvpx_test::CxDataIterator iter = encoder->GetCxData();

+    while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {

+      ParseSuperframeSizes(static_cast<const uint8_t *>(pkt->data.frame.buf),

+                           pkt->data.frame.sz);

+      for (int sl = 0; sl < rc_cfg_.ss_number_layers; sl++) {

+        frame_params_.spatial_layer_id = sl;

+        frame_params_.temporal_layer_id = kTemporalId[current_superframe_ % 4];

+        rc_api_->ComputeQP(frame_params_);

+        frame_params_.frame_type = INTER_FRAME;

+        rc_api_->PostEncodeUpdate(sizes_[sl]);

+      }

+    }

+    if (!encoder_exit_) {

+      int loopfilter_level, qp;

+      encoder->Control(VP9E_GET_LOOPFILTER_LEVEL, &loopfilter_level);

+      encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp);

+      ASSERT_EQ(rc_api_->GetQP(), qp);

+      ASSERT_EQ(rc_api_->GetLoopfilterLevel(), loopfilter_level);

+    }

+  }

+  // This method needs to be overridden because non-reference frames are

+  // expected to be mismatched frames as the encoder will avoid loopfilter on

+  // these frames.

+  virtual void MismatchHook(const vpx_image_t * /*img1*/,

+                            const vpx_image_t * /*img2*/) {}

+  void RunSvc() {

+    SetConfigSvc();

+    rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);

+    SetEncoderSvc();

+    ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv",

+                                         1280, 720, 30, 1, 0, kNumFrames);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+  }

+ private:

+  vpx_codec_err_t ParseSuperframeSizes(const uint8_t *data, size_t data_sz) {

+    uint8_t marker = *(data + data_sz - 1);

+    if ((marker & 0xe0) == 0xc0) {

+      const uint32_t frames = (marker & 0x7) + 1;

+      const uint32_t mag = ((marker >> 3) & 0x3) + 1;

+      const size_t index_sz = 2 + mag * frames;

+      // This chunk is marked as having a superframe index but doesn't have

+      // enough data for it, thus it's an invalid superframe index.

+      if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;

+      {

+        const uint8_t marker2 = *(data + data_sz - index_sz);

+        // This chunk is marked as having a superframe index but doesn't have

+        // the matching marker byte at the front of the index therefore it's an

+        // invalid chunk.

+        if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;

+      }

+      const uint8_t *x = &data[data_sz - index_sz + 1];

+      for (uint32_t i = 0; i < frames; ++i) {

+        uint32_t this_sz = 0;

+        for (uint32_t j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);

+        sizes_[i] = this_sz;

+      }

+    }

+    return VPX_CODEC_OK;

+  }

+  void SetEncoderSvc() {

+    cfg_.ss_number_layers = 3;

+    cfg_.ts_number_layers = 3;

+    cfg_.g_timebase.num = 1;

+    cfg_.g_timebase.den = 30;

+    svc_params_.scaling_factor_num[0] = 72;

+    svc_params_.scaling_factor_den[0] = 288;

+    svc_params_.scaling_factor_num[1] = 144;

+    svc_params_.scaling_factor_den[1] = 288;

+    svc_params_.scaling_factor_num[2] = 288;

+    svc_params_.scaling_factor_den[2] = 288;

+    for (int i = 0; i < VPX_MAX_LAYERS; ++i) {

+      svc_params_.max_quantizers[i] = 56;

+      svc_params_.min_quantizers[i] = 2;

+      svc_params_.speed_per_layer[i] = 7;

+    }

+    cfg_.rc_end_usage = VPX_CBR;

+    cfg_.g_lag_in_frames = 0;

+    cfg_.g_error_resilient = 0;

+    // 3 temporal layers

+    cfg_.ts_rate_decimator[0] = 4;

+    cfg_.ts_rate_decimator[1] = 2;

+    cfg_.ts_rate_decimator[2] = 1;

+    cfg_.temporal_layering_mode = 3;

+    cfg_.rc_buf_initial_sz = 500;

+    cfg_.rc_buf_optimal_sz = 600;

+    cfg_.rc_buf_sz = 1000;

+    cfg_.rc_min_quantizer = 2;

+    cfg_.rc_max_quantizer = 56;

+    cfg_.g_threads = 1;

+    cfg_.kf_max_dist = 9999;

+    cfg_.rc_target_bitrate = 1600;

+    cfg_.rc_overshoot_pct = 50;

+    cfg_.rc_undershoot_pct = 50;

+    cfg_.layer_target_bitrate[0] = 100;

+    cfg_.layer_target_bitrate[1] = 140;

+    cfg_.layer_target_bitrate[2] = 200;

+    cfg_.layer_target_bitrate[3] = 250;

+    cfg_.layer_target_bitrate[4] = 350;

+    cfg_.layer_target_bitrate[5] = 500;

+    cfg_.layer_target_bitrate[6] = 450;

+    cfg_.layer_target_bitrate[7] = 630;

+    cfg_.layer_target_bitrate[8] = 900;

+  }

+  void SetConfigSvc() {

+    rc_cfg_.width = 1280;

+    rc_cfg_.height = 720;

+    rc_cfg_.max_quantizer = 56;

+    rc_cfg_.min_quantizer = 2;

+    rc_cfg_.target_bandwidth = 1600;

+    rc_cfg_.buf_initial_sz = 500;

+    rc_cfg_.buf_optimal_sz = 600;

+    rc_cfg_.buf_sz = 1000;

+    rc_cfg_.undershoot_pct = 50;

+    rc_cfg_.overshoot_pct = 50;

+    rc_cfg_.max_intra_bitrate_pct = 900;

+    rc_cfg_.framerate = 30.0;

+    rc_cfg_.ss_number_layers = 3;

+    rc_cfg_.ts_number_layers = 3;

+    rc_cfg_.rc_mode = VPX_CBR;

+    rc_cfg_.aq_mode = aq_mode_;

+    rc_cfg_.scaling_factor_num[0] = 1;

+    rc_cfg_.scaling_factor_den[0] = 4;

+    rc_cfg_.scaling_factor_num[1] = 2;

+    rc_cfg_.scaling_factor_den[1] = 4;

+    rc_cfg_.scaling_factor_num[2] = 4;

+    rc_cfg_.scaling_factor_den[2] = 4;

+    rc_cfg_.ts_rate_decimator[0] = 4;

+    rc_cfg_.ts_rate_decimator[1] = 2;

+    rc_cfg_.ts_rate_decimator[2] = 1;

+    rc_cfg_.layer_target_bitrate[0] = 100;

+    rc_cfg_.layer_target_bitrate[1] = 140;

+    rc_cfg_.layer_target_bitrate[2] = 200;

+    rc_cfg_.layer_target_bitrate[3] = 250;

+    rc_cfg_.layer_target_bitrate[4] = 350;

+    rc_cfg_.layer_target_bitrate[5] = 500;

+    rc_cfg_.layer_target_bitrate[6] = 450;

+    rc_cfg_.layer_target_bitrate[7] = 630;

+    rc_cfg_.layer_target_bitrate[8] = 900;

+    for (int sl = 0; sl < rc_cfg_.ss_number_layers; ++sl) {

+      for (int tl = 0; tl < rc_cfg_.ts_number_layers; ++tl) {

+        const int i = sl * rc_cfg_.ts_number_layers + tl;

+        rc_cfg_.max_quantizers[i] = 56;

+        rc_cfg_.min_quantizers[i] = 2;

+      }

+    }

+  }

+  int aq_mode_;

+  std::unique_ptr<libvpx::VP9RateControlRTC> rc_api_;

+  libvpx::VP9RateControlRtcConfig rc_cfg_;

+  vpx_svc_extra_cfg_t svc_params_;

+  libvpx::VP9FrameParamsQpRTC frame_params_;

+  bool encoder_exit_;

+  int current_superframe_;

+  uint32_t sizes_[8];

+};

+TEST_P(RcInterfaceTest, OneLayer) { RunOneLayer(); }

+TEST_P(RcInterfaceTest, OneLayerVBRPeriodicKey) { RunOneLayerVBRPeriodicKey(); }

+TEST_P(RcInterfaceSvcTest, Svc) { RunSvc(); }

+VP9_INSTANTIATE_TEST_SUITE(RcInterfaceTest, ::testing::Values(0, 3),

+                           ::testing::Values(VPX_CBR, VPX_VBR));

+VP9_INSTANTIATE_TEST_SUITE(RcInterfaceSvcTest, ::testing::Values(0, 3));

+}  // namespace

--- a/test/y4m_test.cc

+++ b/test/y4m_test.cc

@@ -188,4 +188,55 @@

 INSTANTIATE_TEST_SUITE_P(C, Y4mVideoWriteTest,

                          ::testing::ValuesIn(kY4mTestVectors));

+static const char kY4MRegularHeader[] =

+    "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG\n"

+    "FRAME\n"

+    "012345678912345601230123";

+TEST(Y4MHeaderTest, RegularHeader) {

+  libvpx_test::TempOutFile f;

+  fwrite(kY4MRegularHeader, 1, sizeof(kY4MRegularHeader), f.file());

+  fflush(f.file());

+  EXPECT_EQ(0, fseek(f.file(), 0, 0));

+  y4m_input y4m;

+  EXPECT_EQ(y4m_input_open(&y4m, f.file(), /*skip_buffer=*/NULL,

+                           /*num_skip=*/0, /*only_420=*/0),

+            0);

+  EXPECT_EQ(y4m.pic_w, 4);

+  EXPECT_EQ(y4m.pic_h, 4);

+  EXPECT_EQ(y4m.fps_n, 30);

+  EXPECT_EQ(y4m.fps_d, 1);

+  EXPECT_EQ(y4m.interlace, 'p');

+  EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);

+  y4m_input_close(&y4m);

+}

+// Testing that headers over 100 characters can be parsed.

+static const char kY4MLongHeader[] =

+    "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG "

+    "XCOLORRANGE=LIMITED XSOME_UNKNOWN_METADATA XOTHER_UNKNOWN_METADATA\n"

+    "FRAME\n"

+    "012345678912345601230123";

+TEST(Y4MHeaderTest, LongHeader) {

+  libvpx_test::TempOutFile f;

+  fwrite(kY4MLongHeader, 1, sizeof(kY4MLongHeader), f.file());

+  fflush(f.file());

+  EXPECT_EQ(fseek(f.file(), 0, 0), 0);

+  y4m_input y4m;

+  EXPECT_EQ(y4m_input_open(&y4m, f.file(), /*skip_buffer=*/NULL,

+                           /*num_skip=*/0, /*only_420=*/0),

+            0);

+  EXPECT_EQ(y4m.pic_w, 4);

+  EXPECT_EQ(y4m.pic_h, 4);

+  EXPECT_EQ(y4m.fps_n, 30);

+  EXPECT_EQ(y4m.fps_d, 1);

+  EXPECT_EQ(y4m.interlace, 'p');

+  EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);

+  y4m_input_close(&y4m);

+}

 }  // namespace

--- a/third_party/libwebm/Android.mk

+++ b/third_party/libwebm/Android.mk

@@ -3,7 +3,7 @@

 include $(CLEAR_VARS)

 LOCAL_MODULE:= libwebm

 LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS

-LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11

+LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=gnu++11

 LOCAL_C_INCLUDES:= $(LOCAL_PATH)

 LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH)

--- a/tools.mk

+++ b/tools.mk

@@ -79,6 +79,7 @@

             --ver=$$(CONFIG_VS_VERSION)\

             --proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\

             --src-path-bare="$(SRC_PATH_BARE)" \

+            --as=$$(AS) \

             $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \

             --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \

             $$(INTERNAL_LDFLAGS) $$(LDFLAGS) $$^

--- a/tools_common.h

+++ b/tools_common.h

@@ -110,6 +110,8 @@

 #if defined(__GNUC__)

 #define VPX_NO_RETURN __attribute__((noreturn))

+#elif defined(_MSC_VER)

+#define VPX_NO_RETURN __declspec(noreturn)

 #else

 #define VPX_NO_RETURN

 #endif

@@ -117,14 +119,14 @@

 /* Sets a stdio stream into binary mode */

 FILE *set_binary_mode(FILE *stream);

-void die(const char *fmt, ...) VPX_NO_RETURN;

-void fatal(const char *fmt, ...) VPX_NO_RETURN;

+VPX_NO_RETURN void die(const char *fmt, ...);

+VPX_NO_RETURN void fatal(const char *fmt, ...);

 void warn(const char *fmt, ...);

-void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;

+VPX_NO_RETURN void die_codec(vpx_codec_ctx_t *ctx, const char *s);

 /* The tool including this file must define usage_exit() */

-void usage_exit(void) VPX_NO_RETURN;

+VPX_NO_RETURN void usage_exit(void);

 #undef VPX_NO_RETURN

--- a/vp8/decoder/threading.c

+++ b/vp8/decoder/threading.c

@@ -10,7 +10,7 @@

 #include "vpx_config.h"

 #include "vp8_rtcd.h"

-#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1

+#if !defined(_WIN32) && CONFIG_OS_SUPPORT == 1

 #include <unistd.h>

 #endif

 #include "onyxd_int.h"

--- a/vp8/encoder/bitstream.c

+++ b/vp8/encoder/bitstream.c

@@ -866,7 +866,6 @@

 #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)

   vp8_writer *const w = cpi->bc;

 #endif

-  int savings = 0;

   vpx_clear_system_state();

@@ -940,8 +939,6 @@

 #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)

             vp8_write_literal(w, newp, 8);

 #endif

-            savings += s;

         } while (++t < ENTROPY_NODES);

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -183,7 +183,7 @@

 extern FILE *vpxlogc;

 #endif

-static void save_layer_context(VP8_COMP *cpi) {

+void vp8_save_layer_context(VP8_COMP *cpi) {

   LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer];

   /* Save layer dependent coding state */

@@ -222,7 +222,7 @@

          sizeof(cpi->mb.count_mb_ref_frame_usage));

-static void restore_layer_context(VP8_COMP *cpi, const int layer) {

+void vp8_restore_layer_context(VP8_COMP *cpi, const int layer) {

   LAYER_CONTEXT *lc = &cpi->layer_context[layer];

   /* Restore layer dependent coding state */

@@ -269,9 +269,9 @@

   return (int)(llval * llnum / llden);

-static void init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,

-                                        const int layer,

-                                        double prev_layer_framerate) {

+void vp8_init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,

+                                     const int layer,

+                                     double prev_layer_framerate) {

   LAYER_CONTEXT *lc = &cpi->layer_context[layer];

   lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];

@@ -301,9 +301,9 @@

   /* Work out the average size of a frame within this layer */

   if (layer > 0) {

     lc->avg_frame_size_for_layer =

-        (int)((cpi->oxcf.target_bitrate[layer] -

-               cpi->oxcf.target_bitrate[layer - 1]) *

-              1000 / (lc->framerate - prev_layer_framerate));

+        (int)round((cpi->oxcf.target_bitrate[layer] -

+                    cpi->oxcf.target_bitrate[layer - 1]) *

+                   1000 / (lc->framerate - prev_layer_framerate));

   lc->active_worst_quality = cpi->oxcf.worst_allowed_q;

@@ -336,12 +336,12 @@

   // We need this to set the layer context for the new layers below.

   if (prev_num_layers == 1) {

     cpi->current_layer = 0;

-    save_layer_context(cpi);

+    vp8_save_layer_context(cpi);

   for (i = 0; i < curr_num_layers; ++i) {

     LAYER_CONTEXT *lc = &cpi->layer_context[i];

     if (i >= prev_num_layers) {

-      init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);

+      vp8_init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);

     // The initial buffer levels are set based on their starting levels.

     // We could set the buffer levels based on the previous state (normalized

@@ -356,7 +356,7 @@

     // state (to smooth-out quality dips/rate fluctuation at transition)?

     // We need to treat the 1 layer case separately: oxcf.target_bitrate[i]

-    // is not set for 1 layer, and the restore_layer_context/save_context()

+    // is not set for 1 layer, and the vp8_restore_layer_context/save_context()

     // are not called in the encoding loop, so we need to call it here to

     // pass the layer context state to |cpi|.

     if (curr_num_layers == 1) {

@@ -364,7 +364,7 @@

       lc->buffer_level =

           cpi->oxcf.starting_buffer_level_in_ms * lc->target_bandwidth / 1000;

       lc->bits_off_target = lc->buffer_level;

-      restore_layer_context(cpi, 0);

+      vp8_restore_layer_context(cpi, 0);

     prev_layer_framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[i];

@@ -1274,7 +1274,7 @@

   cpi->framerate = framerate;

   cpi->output_framerate = framerate;

   cpi->per_frame_bandwidth =

-      (int)(cpi->oxcf.target_bandwidth / cpi->output_framerate);

+      (int)round(cpi->oxcf.target_bandwidth / cpi->output_framerate);

   cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;

   cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *

                                    cpi->oxcf.two_pass_vbrmin_section / 100);

@@ -1365,7 +1365,7 @@

     double prev_layer_framerate = 0;

     for (i = 0; i < cpi->oxcf.number_of_layers; ++i) {

-      init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);

+      vp8_init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);

       prev_layer_framerate =

           cpi->output_framerate / cpi->oxcf.rate_decimator[i];

@@ -1382,7 +1382,7 @@

 #endif

-static void update_layer_contexts(VP8_COMP *cpi) {

+void vp8_update_layer_contexts(VP8_COMP *cpi) {

   VP8_CONFIG *oxcf = &cpi->oxcf;

   /* Update snapshots of the layer contexts to reflect new parameters */

@@ -1417,8 +1417,8 @@

       /* Work out the average size of a frame within this layer */

       if (i > 0) {

         lc->avg_frame_size_for_layer =

-            (int)((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) *

-                  1000 / (lc->framerate - prev_layer_framerate));

+            (int)round((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) *

+                       1000 / (lc->framerate - prev_layer_framerate));

       prev_layer_framerate = lc->framerate;

@@ -1910,6 +1910,7 @@

   cpi->force_maxqp = 0;

   cpi->frames_since_last_drop_overshoot = 0;

+  cpi->rt_always_update_correction_factor = 0;

   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;

 #if CONFIG_INTERNAL_STATS

@@ -3260,7 +3261,7 @@

 #endif  // !CONFIG_REALTIME_ONLY

     default:

       cpi->per_frame_bandwidth =

-          (int)(cpi->target_bandwidth / cpi->output_framerate);

+          (int)round(cpi->target_bandwidth / cpi->output_framerate);

       break;

@@ -3480,7 +3481,7 @@

    * Note that dropping a key frame can be problematic if spatial

    * resampling is also active

*/

-  if (cpi->decimation_factor > 0) {

+  if (cpi->decimation_factor > 0 && cpi->drop_frames_allowed) {

     switch (cpi->decimation_factor) {

       case 1:

         cpi->per_frame_bandwidth = cpi->per_frame_bandwidth * 3 / 2;

@@ -4016,7 +4017,8 @@

     if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;

     /* Are we are overshooting and up against the limit of active max Q. */

-    if (((cpi->pass != 2) ||

+    if (!cpi->rt_always_update_correction_factor &&

+        ((cpi->pass != 2) ||

          (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) &&

         (Q == cpi->active_worst_quality) &&

         (cpi->active_worst_quality < cpi->worst_quality) &&

@@ -4514,10 +4516,10 @@

     cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;

-  // If the frame dropper is not enabled, don't let the buffer level go below

-  // some threshold, given here by -|maximum_buffer_size|. For now we only do

-  // this for screen content input.

-  if (cpi->drop_frames_allowed == 0 && cpi->oxcf.screen_content_mode &&

+  // Don't let the buffer level go below some threshold, given here

+  // by -|maximum_buffer_size|. For now we only do this for

+  // screen content input.

+  if (cpi->oxcf.screen_content_mode &&

       cpi->bits_off_target < -cpi->oxcf.maximum_buffer_size) {

     cpi->bits_off_target = -cpi->oxcf.maximum_buffer_size;

@@ -4552,8 +4554,8 @@

     for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) {

       LAYER_CONTEXT *lc = &cpi->layer_context[i];

-      int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -

-                                          cpi->projected_frame_size);

+      int bits_off_for_this_layer = (int)round(

+          lc->target_bandwidth / lc->framerate - cpi->projected_frame_size);

       lc->bits_off_target += bits_off_for_this_layer;

@@ -4919,6 +4921,8 @@

       this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen;

       last_duration = cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;

+      // Cap this to avoid overflow of (this_duration - last_duration) * 10

+      this_duration = VPXMIN(this_duration, INT64_MAX / 10);

       /* do a step update if the duration changes by 10% */

       if (last_duration) {

         step = (int)(((this_duration - last_duration) * 10 / last_duration));

@@ -4988,7 +4992,7 @@

   if (cpi->oxcf.number_of_layers > 1) {

     int layer;

-    update_layer_contexts(cpi);

+    vp8_update_layer_contexts(cpi);

     /* Restore layer specific context & set frame rate */

     if (cpi->temporal_layer_id >= 0) {

@@ -4998,7 +5002,7 @@

           cpi->oxcf

               .layer_id[cpi->temporal_pattern_counter % cpi->oxcf.periodicity];

-    restore_layer_context(cpi, layer);

+    vp8_restore_layer_context(cpi, layer);

     vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);

@@ -5129,7 +5133,7 @@

   /* Save layer specific state */

-  if (cpi->oxcf.number_of_layers > 1) save_layer_context(cpi);

+  if (cpi->oxcf.number_of_layers > 1) vp8_save_layer_context(cpi);

   vpx_usec_timer_mark(&cmptimer);

   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);

@@ -5316,17 +5320,13 @@

     return -1;

-  // Range check the delta Q values and convert the external Q range values

-  // to internal ones.

-  if ((abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) ||

-      (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range)) {

-    return -1;

-  }

-  // Range check the delta lf values

-  if ((abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) ||

-      (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range)) {

-    return -1;

+  for (i = 0; i < MAX_MB_SEGMENTS; ++i) {

+    // Note abs() alone can't be used as the behavior of abs(INT_MIN) is

+    // undefined.

+    if (delta_q[i] > range || delta_q[i] < -range || delta_lf[i] > range ||

+        delta_lf[i] < -range) {

+      return -1;

+    }

   // Also disable segmentation if no deltas are specified.

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -702,6 +702,10 @@

   int use_roi_static_threshold;

   int ext_refresh_frame_flags_pending;

+  // Always update correction factor used for rate control after each frame for

+  // realtime encoding.

+  int rt_always_update_correction_factor;

 } VP8_COMP;

 void vp8_initialize_enc(void);

@@ -708,6 +712,12 @@

 void vp8_alloc_compressor_data(VP8_COMP *cpi);

 int vp8_reverse_trans(int x);

+void vp8_init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,

+                                     const int layer,

+                                     double prev_layer_framerate);

+void vp8_update_layer_contexts(VP8_COMP *cpi);

+void vp8_save_layer_context(VP8_COMP *cpi);

+void vp8_restore_layer_context(VP8_COMP *cpi, const int layer);

 void vp8_new_framerate(VP8_COMP *cpi, double framerate);

 void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);

--- a/vp8/encoder/ratectrl.c

+++ b/vp8/encoder/ratectrl.c

@@ -327,7 +327,8 @@

     int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */

     /* Boost depends somewhat on frame rate: only used for 1 layer case. */

     if (cpi->oxcf.number_of_layers == 1) {

-      kf_boost = VPXMAX(initial_boost, (int)(2 * cpi->output_framerate - 16));

+      kf_boost =

+          VPXMAX(initial_boost, (int)round(2 * cpi->output_framerate - 16));

     } else {

       /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */

       kf_boost = initial_boost;

@@ -349,8 +350,12 @@

   if (cpi->oxcf.rc_max_intra_bitrate_pct) {

-    unsigned int max_rate =

-        cpi->per_frame_bandwidth * cpi->oxcf.rc_max_intra_bitrate_pct / 100;

+    unsigned int max_rate;

+    // This product may overflow unsigned int

+    uint64_t product = cpi->per_frame_bandwidth;

+    product *= cpi->oxcf.rc_max_intra_bitrate_pct;

+    product /= 100;

+    max_rate = (unsigned int)VPXMIN(INT_MAX, product);

     if (target > max_rate) target = max_rate;

--- a/vp8/vp8_cx_iface.c

+++ b/vp8/vp8_cx_iface.c

@@ -152,8 +152,8 @@

   RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);

 #endif

   RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);

-  RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);

-  RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);

+  RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);

+  RANGE_CHECK_HI(cfg, rc_overshoot_pct, 100);

   RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);

   RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);

@@ -257,6 +257,23 @@

     ERROR("g_threads cannot be bigger than number of token partitions");

 #endif

+  // The range below shall be further tuned.

+  RANGE_CHECK(cfg, use_vizier_rc_params, 0, 1);

+  RANGE_CHECK(cfg, active_wq_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, err_per_mb_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, sr_default_decay_limit.den, 1, 1000);

+  RANGE_CHECK(cfg, sr_diff_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_err_per_mb_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_frame_min_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_frame_max_boost_subs_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_max_total_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, gf_max_total_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, gf_frame_max_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, zm_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, rd_mult_inter_qp_fac.den, 1, 1000);

+  RANGE_CHECK(cfg, rd_mult_arf_qp_fac.den, 1, 1000);

+  RANGE_CHECK(cfg, rd_mult_key_qp_fac.den, 1, 1000);

   return VPX_CODEC_OK;

@@ -378,6 +395,9 @@

 #endif

   oxcf->cpu_used = vp8_cfg.cpu_used;

+  if (cfg.g_pass == VPX_RC_FIRST_PASS) {

+    oxcf->cpu_used = VPXMAX(4, oxcf->cpu_used);

+  }

   oxcf->encode_breakout = vp8_cfg.static_thresh;

   oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref;

   oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity;

@@ -585,6 +605,17 @@

   return update_extracfg(ctx, &extra_cfg);

+static vpx_codec_err_t ctrl_set_rtc_external_ratectrl(vpx_codec_alg_priv_t *ctx,

+                                                      va_list args) {

+  VP8_COMP *cpi = ctx->cpi;

+  const unsigned int data = CAST(VP8E_SET_GF_CBR_BOOST_PCT, args);

+  if (data) {

+    cpi->cyclic_refresh_mode_enabled = 0;

+    cpi->rt_always_update_correction_factor = 1;

+  }

+  return VPX_CODEC_OK;

+}

 static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg,

                                          void **mem_loc) {

   vpx_codec_err_t res = VPX_CODEC_OK;

@@ -1223,6 +1254,7 @@

   { VP8E_SET_MAX_INTRA_BITRATE_PCT, set_rc_max_intra_bitrate_pct },

   { VP8E_SET_SCREEN_CONTENT_MODE, set_screen_content_mode },

   { VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct },

+  { VP8E_SET_RTC_EXTERNAL_RATECTRL, ctrl_set_rtc_external_ratectrl },

   { -1, NULL },

};

@@ -1256,7 +1288,7 @@

         VPX_VBR,     /* rc_end_usage */

         { NULL, 0 }, /* rc_twopass_stats_in */

         { NULL, 0 }, /* rc_firstpass_mb_stats_in */

-        256,         /* rc_target_bandwidth */

+        256,         /* rc_target_bitrate */

         4,           /* rc_min_quantizer */

         63,          /* rc_max_quantizer */

         100,         /* rc_undershoot_pct */

@@ -1278,14 +1310,30 @@

         VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */

         { 0 },

-        { 0 }, /* ss_target_bitrate */

-        1,     /* ts_number_layers */

-        { 0 }, /* ts_target_bitrate */

-        { 0 }, /* ts_rate_decimator */

-        0,     /* ts_periodicity */

-        { 0 }, /* ts_layer_id */

-        { 0 }, /* layer_target_bitrate */

-        0      /* temporal_layering_mode */

+        { 0 },    /* ss_target_bitrate */

+        1,        /* ts_number_layers */

+        { 0 },    /* ts_target_bitrate */

+        { 0 },    /* ts_rate_decimator */

+        0,        /* ts_periodicity */

+        { 0 },    /* ts_layer_id */

+        { 0 },    /* layer_target_bitrate */

+        0,        /* temporal_layering_mode */

+        0,        /* use_vizier_rc_params */

+        { 1, 1 }, /* active_wq_factor */

+        { 1, 1 }, /* err_per_mb_factor */

+        { 1, 1 }, /* sr_default_decay_limit */

+        { 1, 1 }, /* sr_diff_factor */

+        { 1, 1 }, /* kf_err_per_mb_factor */

+        { 1, 1 }, /* kf_frame_min_boost_factor */

+        { 1, 1 }, /* kf_frame_max_boost_first_factor */

+        { 1, 1 }, /* kf_frame_max_boost_subs_factor */

+        { 1, 1 }, /* kf_max_total_boost_factor */

+        { 1, 1 }, /* gf_max_total_boost_factor */

+        { 1, 1 }, /* gf_frame_max_boost_factor */

+        { 1, 1 }, /* zm_factor */

+        { 1, 1 }, /* rd_mult_inter_qp_fac */

+        { 1, 1 }, /* rd_mult_arf_qp_fac */

+        { 1, 1 }, /* rd_mult_key_qp_fac */

     } },

};

--- /dev/null

+++ b/vp8/vp8_ratectrl_rtc.cc

@@ -1,0 +1,347 @@

+/*

+ *  Copyright (c) 2021 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <math.h>

+#include <new>

+#include "vp8/vp8_ratectrl_rtc.h"

+#include "vp8/encoder/ratectrl.h"

+#include "vpx_ports/system_state.h"

+namespace libvpx {

+/* Quant MOD */

+static const int kQTrans[] = {

+  0,  1,  2,  3,  4,  5,  7,   8,   9,   10,  12,  13,  15,  17,  18,  19,

+  20, 21, 23, 24, 25, 26, 27,  28,  29,  30,  31,  33,  35,  37,  39,  41,

+  43, 45, 47, 49, 51, 53, 55,  57,  59,  61,  64,  67,  70,  73,  76,  79,

+  82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124, 127,

+};

+static const unsigned char kf_high_motion_minq[QINDEX_RANGE] = {

+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,

+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,

+  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  5,

+  5,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  8,  8,  8,  8,  9,  9,  10, 10,

+  10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 15, 15, 15, 15, 16,

+  16, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,

+  22, 22, 23, 23, 24, 25, 25, 26, 26, 27, 28, 28, 29, 30

+};

+static const unsigned char inter_minq[QINDEX_RANGE] = {

+  0,  0,  1,  1,  2,  3,  3,  4,  4,  5,  6,  6,  7,  8,  8,  9,  9,  10, 11,

+  11, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 24,

+  24, 25, 26, 27, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38,

+  39, 39, 40, 41, 42, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 50, 51, 52, 53,

+  54, 55, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69,

+  70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86,

+  87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100

+};

+static int rescale(int val, int num, int denom) {

+  int64_t llnum = num;

+  int64_t llden = denom;

+  int64_t llval = val;

+  return (int)(llval * llnum / llden);

+}

+std::unique_ptr<VP8RateControlRTC> VP8RateControlRTC::Create(

+    const VP8RateControlRtcConfig &cfg) {

+  std::unique_ptr<VP8RateControlRTC> rc_api(new (std::nothrow)

+                                                VP8RateControlRTC());

+  if (!rc_api) return nullptr;

+  rc_api->cpi_ = static_cast<VP8_COMP *>(vpx_memalign(32, sizeof(*cpi_)));

+  if (!rc_api->cpi_) return nullptr;

+  vp8_zero(*rc_api->cpi_);

+  rc_api->InitRateControl(cfg);

+  return rc_api;

+}

+void VP8RateControlRTC::InitRateControl(const VP8RateControlRtcConfig &rc_cfg) {

+  VP8_COMMON *cm = &cpi_->common;

+  VP8_CONFIG *oxcf = &cpi_->oxcf;

+  oxcf->end_usage = USAGE_STREAM_FROM_SERVER;

+  cpi_->pass = 0;

+  cm->show_frame = 1;

+  oxcf->drop_frames_water_mark = 0;

+  cm->current_video_frame = 0;

+  cpi_->auto_gold = 1;

+  cpi_->key_frame_count = 1;

+  cpi_->rate_correction_factor = 1.0;

+  cpi_->key_frame_rate_correction_factor = 1.0;

+  cpi_->cyclic_refresh_mode_enabled = 0;

+  cpi_->auto_worst_q = 1;

+  cpi_->kf_overspend_bits = 0;

+  cpi_->kf_bitrate_adjustment = 0;

+  cpi_->gf_overspend_bits = 0;

+  cpi_->non_gf_bitrate_adjustment = 0;

+  UpdateRateControl(rc_cfg);

+  cpi_->buffer_level = oxcf->starting_buffer_level;

+  cpi_->bits_off_target = oxcf->starting_buffer_level;

+}

+void VP8RateControlRTC::UpdateRateControl(

+    const VP8RateControlRtcConfig &rc_cfg) {

+  VP8_COMMON *cm = &cpi_->common;

+  VP8_CONFIG *oxcf = &cpi_->oxcf;

+  vpx_clear_system_state();

+  cm->Width = rc_cfg.width;

+  cm->Height = rc_cfg.height;

+  oxcf->Width = rc_cfg.width;

+  oxcf->Height = rc_cfg.height;

+  oxcf->worst_allowed_q = kQTrans[rc_cfg.max_quantizer];

+  oxcf->best_allowed_q = kQTrans[rc_cfg.min_quantizer];

+  cpi_->worst_quality = oxcf->worst_allowed_q;

+  cpi_->best_quality = oxcf->best_allowed_q;

+  cpi_->output_framerate = rc_cfg.framerate;

+  oxcf->target_bandwidth =

+      static_cast<unsigned int>(1000 * rc_cfg.target_bandwidth);

+  cpi_->ref_framerate = cpi_->output_framerate;

+  oxcf->fixed_q = -1;

+  oxcf->error_resilient_mode = 1;

+  oxcf->starting_buffer_level_in_ms = rc_cfg.buf_initial_sz;

+  oxcf->optimal_buffer_level_in_ms = rc_cfg.buf_optimal_sz;

+  oxcf->maximum_buffer_size_in_ms = rc_cfg.buf_sz;

+  oxcf->starting_buffer_level = rc_cfg.buf_initial_sz;

+  oxcf->optimal_buffer_level = rc_cfg.buf_optimal_sz;

+  oxcf->maximum_buffer_size = rc_cfg.buf_sz;

+  oxcf->number_of_layers = rc_cfg.ts_number_layers;

+  cpi_->buffered_mode = oxcf->optimal_buffer_level > 0;

+  oxcf->under_shoot_pct = rc_cfg.undershoot_pct;

+  oxcf->over_shoot_pct = rc_cfg.overshoot_pct;

+  cpi_->oxcf.rc_max_intra_bitrate_pct = rc_cfg.max_intra_bitrate_pct;

+  cpi_->framerate = rc_cfg.framerate;

+  for (int i = 0; i < KEY_FRAME_CONTEXT; ++i) {

+    cpi_->prior_key_frame_distance[i] =

+        static_cast<int>(cpi_->output_framerate);

+  }

+  if (oxcf->number_of_layers > 1) {

+    memcpy(oxcf->target_bitrate, rc_cfg.layer_target_bitrate,

+           sizeof(rc_cfg.layer_target_bitrate));

+    memcpy(oxcf->rate_decimator, rc_cfg.ts_rate_decimator,

+           sizeof(rc_cfg.ts_rate_decimator));

+    oxcf->periodicity = 2;

+    double prev_layer_framerate = 0;

+    for (unsigned int i = 0; i < oxcf->number_of_layers; ++i) {

+      vp8_init_temporal_layer_context(cpi_, oxcf, i, prev_layer_framerate);

+      prev_layer_framerate = cpi_->output_framerate / oxcf->rate_decimator[i];

+    }

+  }

+  cpi_->total_actual_bits = 0;

+  cpi_->total_target_vs_actual = 0;

+  cm->mb_rows = cm->Height >> 4;

+  cm->mb_cols = cm->Width >> 4;

+  cm->MBs = cm->mb_rows * cm->mb_cols;

+  cm->mode_info_stride = cm->mb_cols + 1;

+  oxcf->starting_buffer_level =

+      rescale((int)oxcf->starting_buffer_level, oxcf->target_bandwidth, 1000);

+  /* Set or reset optimal and maximum buffer levels. */

+  if (oxcf->optimal_buffer_level == 0) {

+    oxcf->optimal_buffer_level = oxcf->target_bandwidth / 8;

+  } else {

+    oxcf->optimal_buffer_level =

+        rescale((int)oxcf->optimal_buffer_level, oxcf->target_bandwidth, 1000);

+  }

+  if (oxcf->maximum_buffer_size == 0) {

+    oxcf->maximum_buffer_size = oxcf->target_bandwidth / 8;

+  } else {

+    oxcf->maximum_buffer_size =

+        rescale((int)oxcf->maximum_buffer_size, oxcf->target_bandwidth, 1000);

+  }

+  if (cpi_->bits_off_target > oxcf->maximum_buffer_size) {

+    cpi_->bits_off_target = oxcf->maximum_buffer_size;

+    cpi_->buffer_level = cpi_->bits_off_target;

+  }

+  vp8_new_framerate(cpi_, cpi_->framerate);

+  vpx_clear_system_state();

+}

+void VP8RateControlRTC::ComputeQP(const VP8FrameParamsQpRTC &frame_params) {

+  VP8_COMMON *const cm = &cpi_->common;

+  vpx_clear_system_state();

+  if (cpi_->oxcf.number_of_layers > 1) {

+    cpi_->temporal_layer_id = frame_params.temporal_layer_id;

+    const int layer = frame_params.temporal_layer_id;

+    vp8_update_layer_contexts(cpi_);

+    /* Restore layer specific context & set frame rate */

+    vp8_restore_layer_context(cpi_, layer);

+    vp8_new_framerate(cpi_, cpi_->layer_context[layer].framerate);

+  }

+  cm->frame_type = frame_params.frame_type;

+  cm->refresh_golden_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;

+  cm->refresh_alt_ref_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;

+  if (cm->frame_type == KEY_FRAME && cpi_->common.current_video_frame > 0) {

+    cpi_->common.frame_flags |= FRAMEFLAGS_KEY;

+  }

+  vp8_pick_frame_size(cpi_);

+  if (cpi_->buffer_level >= cpi_->oxcf.optimal_buffer_level &&

+      cpi_->buffered_mode) {

+    /* Max adjustment is 1/4 */

+    int Adjustment = cpi_->active_worst_quality / 4;

+    if (Adjustment) {

+      int buff_lvl_step;

+      if (cpi_->buffer_level < cpi_->oxcf.maximum_buffer_size) {

+        buff_lvl_step = (int)((cpi_->oxcf.maximum_buffer_size -

+                               cpi_->oxcf.optimal_buffer_level) /

+                              Adjustment);

+        if (buff_lvl_step) {

+          Adjustment =

+              (int)((cpi_->buffer_level - cpi_->oxcf.optimal_buffer_level) /

+                    buff_lvl_step);

+        } else {

+          Adjustment = 0;

+        }

+      }

+      cpi_->active_worst_quality -= Adjustment;

+      if (cpi_->active_worst_quality < cpi_->active_best_quality) {

+        cpi_->active_worst_quality = cpi_->active_best_quality;

+      }

+    }

+  }

+  if (cpi_->ni_frames > 150) {

+    int q = cpi_->active_worst_quality;

+    if (cm->frame_type == KEY_FRAME) {

+      cpi_->active_best_quality = kf_high_motion_minq[q];

+    } else {

+      cpi_->active_best_quality = inter_minq[q];

+    }

+    if (cpi_->buffer_level >= cpi_->oxcf.maximum_buffer_size) {

+      cpi_->active_best_quality = cpi_->best_quality;

+    } else if (cpi_->buffer_level > cpi_->oxcf.optimal_buffer_level) {

+      int Fraction =

+          (int)(((cpi_->buffer_level - cpi_->oxcf.optimal_buffer_level) * 128) /

+                (cpi_->oxcf.maximum_buffer_size -

+                 cpi_->oxcf.optimal_buffer_level));

+      int min_qadjustment =

+          ((cpi_->active_best_quality - cpi_->best_quality) * Fraction) / 128;

+      cpi_->active_best_quality -= min_qadjustment;

+    }

+  }

+  /* Clip the active best and worst quality values to limits */

+  if (cpi_->active_worst_quality > cpi_->worst_quality) {

+    cpi_->active_worst_quality = cpi_->worst_quality;

+  }

+  if (cpi_->active_best_quality < cpi_->best_quality) {

+    cpi_->active_best_quality = cpi_->best_quality;

+  }

+  if (cpi_->active_worst_quality < cpi_->active_best_quality) {

+    cpi_->active_worst_quality = cpi_->active_best_quality;

+  }

+  q_ = vp8_regulate_q(cpi_, cpi_->this_frame_target);

+  vp8_set_quantizer(cpi_, q_);

+  vpx_clear_system_state();

+}

+int VP8RateControlRTC::GetQP() const { return q_; }

+void VP8RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {

+  VP8_COMMON *const cm = &cpi_->common;

+  vpx_clear_system_state();

+  cpi_->total_byte_count += encoded_frame_size;

+  cpi_->projected_frame_size = static_cast<int>(encoded_frame_size << 3);

+  if (cpi_->oxcf.number_of_layers > 1) {

+    for (unsigned int i = cpi_->current_layer + 1;

+         i < cpi_->oxcf.number_of_layers; ++i) {

+      cpi_->layer_context[i].total_byte_count += encoded_frame_size;

+    }

+  }

+  vp8_update_rate_correction_factors(cpi_, 2);

+  cpi_->last_q[cm->frame_type] = cm->base_qindex;

+  if (cm->frame_type == KEY_FRAME) {

+    vp8_adjust_key_frame_context(cpi_);

+  }

+  /* Keep a record of ambient average Q. */

+  if (cm->frame_type != KEY_FRAME) {

+    cpi_->avg_frame_qindex =

+        (2 + 3 * cpi_->avg_frame_qindex + cm->base_qindex) >> 2;

+  }

+  /* Keep a record from which we can calculate the average Q excluding

+   * key frames.

+   */

+  if (cm->frame_type != KEY_FRAME) {

+    cpi_->ni_frames++;

+    /* Damp value for first few frames */

+    if (cpi_->ni_frames > 150) {

+      cpi_->ni_tot_qi += q_;

+      cpi_->ni_av_qi = (cpi_->ni_tot_qi / cpi_->ni_frames);

+    } else {

+      cpi_->ni_tot_qi += q_;

+      cpi_->ni_av_qi =

+          ((cpi_->ni_tot_qi / cpi_->ni_frames) + cpi_->worst_quality + 1) / 2;

+    }

+    /* If the average Q is higher than what was used in the last

+     * frame (after going through the recode loop to keep the frame

+     * size within range) then use the last frame value - 1. The -1

+     * is designed to stop Q and hence the data rate, from

+     * progressively falling away during difficult sections, but at

+     * the same time reduce the number of itterations around the

+     * recode loop.

+     */

+    if (q_ > cpi_->ni_av_qi) cpi_->ni_av_qi = q_ - 1;

+  }

+  cpi_->bits_off_target +=

+      cpi_->av_per_frame_bandwidth - cpi_->projected_frame_size;

+  if (cpi_->bits_off_target > cpi_->oxcf.maximum_buffer_size) {

+    cpi_->bits_off_target = cpi_->oxcf.maximum_buffer_size;

+  }

+  cpi_->total_actual_bits += cpi_->projected_frame_size;

+  cpi_->buffer_level = cpi_->bits_off_target;

+  /* Propagate values to higher temporal layers */

+  if (cpi_->oxcf.number_of_layers > 1) {

+    for (unsigned int i = cpi_->current_layer + 1;

+         i < cpi_->oxcf.number_of_layers; ++i) {

+      LAYER_CONTEXT *lc = &cpi_->layer_context[i];

+      int bits_off_for_this_layer = (int)round(

+          lc->target_bandwidth / lc->framerate - cpi_->projected_frame_size);

+      lc->bits_off_target += bits_off_for_this_layer;

+      /* Clip buffer level to maximum buffer size for the layer */

+      if (lc->bits_off_target > lc->maximum_buffer_size) {

+        lc->bits_off_target = lc->maximum_buffer_size;

+      }

+      lc->total_actual_bits += cpi_->projected_frame_size;

+      lc->total_target_vs_actual += bits_off_for_this_layer;

+      lc->buffer_level = lc->bits_off_target;

+    }

+  }

+  cpi_->common.current_video_frame++;

+  cpi_->frames_since_key++;

+  if (cpi_->oxcf.number_of_layers > 1) vp8_save_layer_context(cpi_);

+  vpx_clear_system_state();

+}

+}  // namespace libvpx

--- /dev/null

+++ b/vp8/vp8_ratectrl_rtc.h

@@ -1,0 +1,63 @@

+/*

+ *  Copyright (c) 2021 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef VPX_VP8_RATECTRL_RTC_H_

+#define VPX_VP8_RATECTRL_RTC_H_

+#include <cstdint>

+#include <memory>

+#include "vp8/encoder/onyx_int.h"

+#include "vp8/common/common.h"

+#include "vpx/internal/vpx_ratectrl_rtc.h"

+namespace libvpx {

+struct VP8RateControlRtcConfig : public VpxRateControlRtcConfig {

+ public:

+  VP8RateControlRtcConfig() {

+    vp8_zero(layer_target_bitrate);

+    vp8_zero(ts_rate_decimator);

+  }

+};

+struct VP8FrameParamsQpRTC {

+  FRAME_TYPE frame_type;

+  int temporal_layer_id;

+};

+class VP8RateControlRTC {

+ public:

+  static std::unique_ptr<VP8RateControlRTC> Create(

+      const VP8RateControlRtcConfig &cfg);

+  ~VP8RateControlRTC() {

+    if (cpi_) {

+      vpx_free(cpi_->gf_active_flags);

+      vpx_free(cpi_);

+    }

+  }

+  void UpdateRateControl(const VP8RateControlRtcConfig &rc_cfg);

+  // GetQP() needs to be called after ComputeQP() to get the latest QP

+  int GetQP() const;

+  // int GetLoopfilterLevel() const;

+  void ComputeQP(const VP8FrameParamsQpRTC &frame_params);

+  // Feedback to rate control with the size of current encoded frame

+  void PostEncodeUpdate(uint64_t encoded_frame_size);

+ private:

+  VP8RateControlRTC() {}

+  void InitRateControl(const VP8RateControlRtcConfig &cfg);

+  VP8_COMP *cpi_;

+  int q_;

+};

+}  // namespace libvpx

+#endif  // VPX_VP8_RATECTRL_RTC_H_

--- a/vp9/encoder/arm/neon/vp9_denoiser_neon.c

+++ b/vp9/encoder/arm/neon/vp9_denoiser_neon.c

@@ -21,6 +21,9 @@

 // Compute the sum of all pixel differences of this MB.

 static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {

+#if defined(__aarch64__)

+  return vaddlvq_s8(v_sum_diff_total);

+#else

   const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);

   const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10);

   const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210);

@@ -28,6 +31,7 @@

                                 vget_low_s64(fedcba98_76543210));

   const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0);

   return sum_diff;

+#endif

 // Denoise a 16x1 vector.

--- a/vp9/encoder/vp9_aq_cyclicrefresh.c

+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c

@@ -48,6 +48,7 @@

   assert(MAXQ <= 255);

   memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);

   cr->counter_encode_maxq_scene_change = 0;

+  cr->content_mode = 1;

   return cr;

@@ -326,7 +327,8 @@

   else

     rc->baseline_gf_interval = 40;

   if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20;

-  if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40)

+  if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40 &&

+      cr->content_mode)

     rc->baseline_gf_interval = 10;

@@ -388,7 +390,8 @@

           ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)

           : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);

   // More aggressive settings for noisy content.

-  if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {

+  if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium &&

+      cr->content_mode) {

     consec_zero_mv_thresh = 60;

     qindex_thresh =

         VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex),

@@ -409,7 +412,7 @@

 #if CONFIG_VP9_HIGHBITDEPTH

     if (cpi->common.use_highbitdepth) compute_content = 0;

 #endif

-    if (cpi->Last_Source == NULL ||

+    if (cr->content_mode == 0 || cpi->Last_Source == NULL ||

         cpi->Last_Source->y_width != cpi->Source->y_width ||

         cpi->Last_Source->y_height != cpi->Source->y_height)

       compute_content = 0;

@@ -430,7 +433,8 @@

         // reset to 0 later depending on the coding mode.

         if (cr->map[bl_index2] == 0) {

           count_tot++;

-          if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||

+          if (cr->content_mode == 0 ||

+              cr->last_coded_q_map[bl_index2] > qindex_thresh ||

               cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh_block) {

             sum_map++;

             count_sel++;

@@ -489,7 +493,8 @@

       rc->avg_frame_qindex[INTER_FRAME] < qp_thresh ||

       (cpi->use_svc &&

        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||

-      (!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion &&

+      (!cpi->use_svc && cr->content_mode &&

+       rc->avg_frame_low_motion < thresh_low_motion &&

        rc->frames_since_key > 40) ||

       (!cpi->use_svc && rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh &&

        rc->frames_since_key > 20)) {

@@ -511,7 +516,8 @@

     cr->rate_ratio_qdelta = 3.0;

   } else {

     cr->rate_ratio_qdelta = 2.0;

-    if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {

+    if (cr->content_mode && cpi->noise_estimate.enabled &&

+        cpi->noise_estimate.level >= kMedium) {

       // Reduce the delta-qp if the estimated source noise is above threshold.

       cr->rate_ratio_qdelta = 1.7;

       cr->rate_boost_fac = 13;

@@ -528,7 +534,7 @@

     cr->percent_refresh = (cr->skip_flat_static_blocks) ? 5 : 10;

     // Increase the amount of refresh on scene change that is encoded at max Q,

     // increase for a few cycles of the refresh period (~100 / percent_refresh).

-    if (cr->counter_encode_maxq_scene_change < 30)

+    if (cr->content_mode && cr->counter_encode_maxq_scene_change < 30)

       cr->percent_refresh = (cr->skip_flat_static_blocks) ? 10 : 15;

     cr->rate_ratio_qdelta = 2.0;

     cr->rate_boost_fac = 10;

@@ -575,6 +581,12 @@

         (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) /

         num8x8bl;

   cr->weight_segment = weight_segment;

+  if (cr->content_mode == 0) {

+    cr->actual_num_seg1_blocks =

+        cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;

+    cr->actual_num_seg2_blocks = 0;

+    cr->weight_segment = (double)(cr->actual_num_seg1_blocks) / num8x8bl;

+  }

 // Setup cyclic background refresh: set delta q and segmentation map.

--- a/vp9/encoder/vp9_aq_cyclicrefresh.h

+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h

@@ -70,6 +70,7 @@

   int apply_cyclic_refresh;

   int counter_encode_maxq_scene_change;

   int skip_flat_static_blocks;

+  int content_mode;

};

 struct VP9_COMP;

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -157,6 +157,9 @@

   // skip forward transform and quantization

   uint8_t skip_txfm[MAX_MB_PLANE << 2];

 #define SKIP_TXFM_NONE 0

+// TODO(chengchen): consider remove SKIP_TXFM_AC_DC from vp9 completely

+// since it increases risks of bad perceptual quality.

+// https://crbug.com/webm/1729

 #define SKIP_TXFM_AC_DC 1

 #define SKIP_TXFM_AC_ONLY 2

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -159,37 +159,6 @@

 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if !CONFIG_REALTIME_ONLY

-static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,

-                                                   const struct buf_2d *ref,

-                                                   int mi_row, int mi_col,

-                                                   BLOCK_SIZE bs) {

-  unsigned int sse, var;

-  uint8_t *last_y;

-  const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);

-  assert(last != NULL);

-  last_y =

-      &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];

-  var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);

-  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);

-}

-static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,

-                                                   int mi_row, int mi_col) {

-  unsigned int var = get_sby_perpixel_diff_variance(

-      cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);

-  if (var < 8)

-    return BLOCK_64X64;

-  else if (var < 128)

-    return BLOCK_32X32;

-  else if (var < 2048)

-    return BLOCK_16X16;

-  else

-    return BLOCK_8X8;

-}

-#endif  // !CONFIG_REALTIME_ONLY

 static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row,

                               int mi_col, BLOCK_SIZE bsize, int segment_index) {

   VP9_COMMON *const cm = &cpi->common;

@@ -815,8 +784,8 @@

 // Check if most of the superblock is skin content, and if so, force split to

 // 32x32, and set x->sb_is_skin for use in mode selection.

-static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,

-                         int mi_row, int mi_col, int *force_split) {

+static int skin_sb_split(VP9_COMP *cpi, const int low_res, int mi_row,

+                         int mi_col, int *force_split) {

   VP9_COMMON *const cm = &cpi->common;

 #if CONFIG_VP9_HIGHBITDEPTH

   if (cm->use_highbitdepth) return 0;

@@ -828,11 +797,6 @@

                    mi_row + 8 < cm->mi_rows)) {

     int num_16x16_skin = 0;

     int num_16x16_nonskin = 0;

-    uint8_t *ysignal = x->plane[0].src.buf;

-    uint8_t *usignal = x->plane[1].src.buf;

-    uint8_t *vsignal = x->plane[2].src.buf;

-    int sp = x->plane[0].src.stride;

-    int spuv = x->plane[1].src.stride;

     const int block_index = mi_row * cm->mi_cols + mi_col;

     const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];

     const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];

@@ -851,13 +815,7 @@

           i = ymis;

           break;

-        ysignal += 16;

-        usignal += 8;

-        vsignal += 8;

-      ysignal += (sp << 4) - 64;

-      usignal += (spuv << 3) - 32;

-      vsignal += (spuv << 3) - 32;

     if (num_16x16_skin > 12) {

       *force_split = 1;

@@ -1534,8 +1492,7 @@

     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);

     if (cpi->use_skin_detection)

-      x->sb_is_skin =

-          skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);

+      x->sb_is_skin = skin_sb_split(cpi, low_res, mi_row, mi_col, force_split);

     d = xd->plane[0].dst.buf;

     dp = xd->plane[0].dst.stride;

@@ -1842,7 +1799,8 @@

     // Else for cyclic refresh mode update the segment map, set the segment id

     // and then update the quantizer.

-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {

+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&

+        cpi->cyclic_refresh->content_mode) {

       vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,

                                         ctx->rate, ctx->dist, x->skip, p);

@@ -2539,7 +2497,8 @@

   if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) {

     // Setting segmentation map for cyclic_refresh.

-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {

+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&

+        cpi->cyclic_refresh->content_mode) {

       vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,

                                         ctx->rate, ctx->dist, x->skip, p);

     } else {

@@ -3119,54 +3078,6 @@

   memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));

-#if CONFIG_FP_MB_STATS

-const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,

-                                                        1, 2, 2, 2, 4, 4 };

-const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,

-                                                        2, 1, 2, 4, 2, 4 };

-const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 0,   10,  10, 30, 40,

-                                                        40,  60,  80, 80, 90,

-                                                        100, 100, 120 };

-const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 0,  3,  3,  7,  15,

-                                                         15, 30, 40, 40, 60,

-                                                         80, 80, 120 };

-const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 1, 1, 1, 1, 1,

-                                                             1, 1, 1, 1, 1,

-                                                             4, 4, 6 };

-typedef enum {

-  MV_ZERO = 0,

-  MV_LEFT = 1,

-  MV_UP = 2,

-  MV_RIGHT = 3,

-  MV_DOWN = 4,

-  MV_INVALID

-} MOTION_DIRECTION;

-static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {

-  if (fp_byte & FPMB_MOTION_ZERO_MASK) {

-    return MV_ZERO;

-  } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {

-    return MV_LEFT;

-  } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {

-    return MV_RIGHT;

-  } else if (fp_byte & FPMB_MOTION_UP_MASK) {

-    return MV_UP;

-  } else {

-    return MV_DOWN;

-  }

-}

-static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,

-                                           MOTION_DIRECTION that_mv) {

-  if (this_mv == that_mv) {

-    return 0;

-  } else {

-    return abs(this_mv - that_mv) == 2 ? 2 : 1;

-  }

-}

-#endif

 // Calculate prediction based on the given input features and neural net config.

 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden

 // layer.

@@ -4064,11 +3975,6 @@

   BLOCK_SIZE min_size = x->min_partition_size;

   BLOCK_SIZE max_size = x->max_partition_size;

-#if CONFIG_FP_MB_STATS

-  unsigned int src_diff_var = UINT_MAX;

-  int none_complexity = 0;

-#endif

   int partition_none_allowed = !force_horz_split && !force_vert_split;

   int partition_horz_allowed =

       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;

@@ -4155,65 +4061,6 @@

   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);

-#if CONFIG_FP_MB_STATS

-  if (cpi->use_fp_mb_stats) {

-    set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);

-    src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,

-                                                  mi_col, bsize);

-  }

-#endif

-#if CONFIG_FP_MB_STATS

-  // Decide whether we shall split directly and skip searching NONE by using

-  // the first pass block statistics

-  if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&

-      partition_none_allowed && src_diff_var > 4 &&

-      cm->base_qindex < qindex_split_threshold_lookup[bsize]) {

-    int mb_row = mi_row >> 1;

-    int mb_col = mi_col >> 1;

-    int mb_row_end =

-        VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);

-    int mb_col_end =

-        VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);

-    int r, c;

-    // compute a complexity measure, basically measure inconsistency of motion

-    // vectors obtained from the first pass in the current block

-    for (r = mb_row; r < mb_row_end; r++) {

-      for (c = mb_col; c < mb_col_end; c++) {

-        const int mb_index = r * cm->mb_cols + c;

-        MOTION_DIRECTION this_mv;

-        MOTION_DIRECTION right_mv;

-        MOTION_DIRECTION bottom_mv;

-        this_mv =

-            get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);

-        // to its right

-        if (c != mb_col_end - 1) {

-          right_mv = get_motion_direction_fp(

-              cpi->twopass.this_frame_mb_stats[mb_index + 1]);

-          none_complexity += get_motion_inconsistency(this_mv, right_mv);

-        }

-        // to its bottom

-        if (r != mb_row_end - 1) {

-          bottom_mv = get_motion_direction_fp(

-              cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);

-          none_complexity += get_motion_inconsistency(this_mv, bottom_mv);

-        }

-        // do not count its left and top neighbors to avoid double counting

-      }

-    }

-    if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {

-      partition_none_allowed = 0;

-    }

-  }

-#endif

   pc_tree->partitioning = PARTITION_NONE;

   if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) {

@@ -4291,53 +4138,6 @@

-#if CONFIG_FP_MB_STATS

-        // Check if every 16x16 first pass block statistics has zero

-        // motion and the corresponding first pass residue is small enough.

-        // If that is the case, check the difference variance between the

-        // current frame and the last frame. If the variance is small enough,

-        // stop further splitting in RD optimization

-        if (cpi->use_fp_mb_stats && do_split != 0 &&

-            cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {

-          int mb_row = mi_row >> 1;

-          int mb_col = mi_col >> 1;

-          int mb_row_end =

-              VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);

-          int mb_col_end =

-              VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);

-          int r, c;

-          int skip = 1;

-          for (r = mb_row; r < mb_row_end; r++) {

-            for (c = mb_col; c < mb_col_end; c++) {

-              const int mb_index = r * cm->mb_cols + c;

-              if (!(cpi->twopass.this_frame_mb_stats[mb_index] &

-                    FPMB_MOTION_ZERO_MASK) ||

-                  !(cpi->twopass.this_frame_mb_stats[mb_index] &

-                    FPMB_ERROR_SMALL_MASK)) {

-                skip = 0;

-                break;

-              }

-            }

-            if (skip == 0) {

-              break;

-            }

-          }

-          if (skip) {

-            if (src_diff_var == UINT_MAX) {

-              set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);

-              src_diff_var = get_sby_perpixel_diff_variance(

-                  cpi, &x->plane[0].src, mi_row, mi_col, bsize);

-            }

-            if (src_diff_var < 8) {

-              do_split = 0;

-              do_rect = 0;

-            }

-          }

-        }

-#endif

     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);

@@ -4603,15 +4403,18 @@

     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,

               pc_tree);

 #if CONFIG_RATE_CTRL

-    // Store partition, motion vector of the superblock.

-    if (output_enabled) {

-      const int num_unit_rows = get_num_unit_4x4(cpi->frame_info.frame_height);

-      const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width);

-      store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride,

-                            num_4x4_blocks_wide_lookup[BLOCK_64X64],

-                            num_unit_rows, num_unit_cols, mi_row << 1,

-                            mi_col << 1, cpi->partition_info,

-                            cpi->motion_vector_info);

+    if (oxcf->use_simple_encode_api) {

+      // Store partition, motion vector of the superblock.

+      if (output_enabled) {

+        const int num_unit_rows =

+            get_num_unit_4x4(cpi->frame_info.frame_height);

+        const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width);

+        store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride,

+                              num_4x4_blocks_wide_lookup[BLOCK_64X64],

+                              num_unit_rows, num_unit_cols, mi_row << 1,

+                              mi_col << 1, cpi->partition_info,

+                              cpi->motion_vector_info);

+      }

 #endif  // CONFIG_RATE_CTRL

@@ -4700,13 +4503,6 @@

       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);

       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,

                        &dummy_rate, &dummy_dist, 1, td->pc_root);

-    } else if (cpi->partition_search_skippable_frame) {

-      BLOCK_SIZE bsize;

-      set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);

-      bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);

-      set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);

-      rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,

-                       &dummy_rate, &dummy_dist, 1, td->pc_root);

     } else if (sf->partition_search_type == VAR_BASED_PARTITION &&

                cm->frame_type != KEY_FRAME) {

       choose_partitioning(cpi, tile_info, x, mi_row, mi_col);

@@ -5981,9 +5777,14 @@

         for (i = 0; i < BLOCK_SIZES; ++i) {

           for (j = 0; j < MAX_MODES; ++j) {

             tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;

-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL

+#if CONFIG_RATE_CTRL

+            if (cpi->oxcf.use_simple_encode_api) {

+              tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;

+            }

+#endif  // CONFIG_RATE_CTRL

+#if CONFIG_CONSISTENT_RECODE

             tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;

-#endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL

+#endif  // CONFIG_CONSISTENT_RECODE

             tile_data->mode_map[i][j] = j;

@@ -6072,20 +5873,6 @@

       vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);

-#if CONFIG_FP_MB_STATS

-static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,

-                            VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {

-  uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +

-                         cm->current_video_frame * cm->MBs * sizeof(uint8_t);

-  if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;

-  *this_frame_mb_stats = mb_stats_in;

-  return 1;

-}

-#endif

 static int compare_kmeans_data(const void *a, const void *b) {

   if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) {

     return 1;

@@ -6292,13 +6079,6 @@

     struct vpx_usec_timer emr_timer;

     vpx_usec_timer_start(&emr_timer);

-#if CONFIG_FP_MB_STATS

-    if (cpi->use_fp_mb_stats) {

-      input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,

-                       &cpi->twopass.this_frame_mb_stats);

-    }

-#endif

     if (!cpi->row_mt) {

       cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;

       cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;

@@ -6406,7 +6186,12 @@

 void vp9_encode_frame(VP9_COMP *cpi) {

   VP9_COMMON *const cm = &cpi->common;

-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL

+#if CONFIG_RATE_CTRL

+  if (cpi->oxcf.use_simple_encode_api) {

+    restore_encode_params(cpi);

+  }

+#endif  // CONFIG_RATE_CTRL

+#if CONFIG_CONSISTENT_RECODE

   restore_encode_params(cpi);

 #endif

@@ -6703,7 +6488,8 @@

     ++td->counts->tx.tx_totals[mi->tx_size];

     ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];

-    if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)

+    if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&

+        cpi->cyclic_refresh->content_mode)

       vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);

     if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&

         (!cpi->use_svc ||

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -654,10 +654,15 @@

 static int check_seg_range(int seg_data[8], int range) {

-  return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||

-           abs(seg_data[2]) > range || abs(seg_data[3]) > range ||

-           abs(seg_data[4]) > range || abs(seg_data[5]) > range ||

-           abs(seg_data[6]) > range || abs(seg_data[7]) > range);

+  int i;

+  for (i = 0; i < 8; ++i) {

+    // Note abs() alone can't be used as the behavior of abs(INT_MIN) is

+    // undefined.

+    if (seg_data[i] > range || seg_data[i] < -range) {

+      return 0;

+    }

+  }

+  return 1;

 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {

@@ -1022,10 +1027,12 @@

   cpi->mi_ssim_rdmult_scaling_factors = NULL;

 #if CONFIG_RATE_CTRL

-  free_partition_info(cpi);

-  free_motion_vector_info(cpi);

-  free_fp_motion_vector_info(cpi);

-  free_tpl_stats_info(cpi);

+  if (cpi->oxcf.use_simple_encode_api) {

+    free_partition_info(cpi);

+    free_motion_vector_info(cpi);

+    free_fp_motion_vector_info(cpi);

+    free_tpl_stats_info(cpi);

+  }

 #endif

   vp9_free_ref_frame_buffers(cm->buffer_pool);

@@ -2302,6 +2309,7 @@

       cm, cm->frame_contexts,

       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));

+  cpi->compute_frame_low_motion_onepass = 1;

   cpi->use_svc = 0;

   cpi->resize_state = ORIG;

   cpi->external_resize = 0;

@@ -2317,9 +2325,9 @@

   cpi->frame_info = vp9_get_frame_info(oxcf);

   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);

+  vp9_init_rd_parameters(cpi);

   init_frame_indexes(cm);

-  cpi->partition_search_skippable_frame = 0;

   cpi->tile_data = NULL;

   realloc_segmentation_maps(cpi);

@@ -2360,17 +2368,6 @@

         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));

-#if CONFIG_FP_MB_STATS

-  cpi->use_fp_mb_stats = 0;

-  if (cpi->use_fp_mb_stats) {

-    // a place holder used to store the first pass mb stats in the first pass

-    CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,

-                    vpx_calloc(cm->MBs * sizeof(uint8_t), 1));

-  } else {

-    cpi->twopass.frame_mb_stats_buf = NULL;

-  }

-#endif

   cpi->refresh_alt_ref_frame = 0;

   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;

@@ -2464,7 +2461,12 @@

   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;

-  vp9_extrc_init(&cpi->ext_ratectrl);

+  {

+    vpx_codec_err_t codec_status = vp9_extrc_init(&cpi->ext_ratectrl);

+    if (codec_status != VPX_CODEC_OK) {

+      vpx_internal_error(&cm->error, codec_status, "vp9_extrc_init() failed");

+    }

+  }

 #if !CONFIG_REALTIME_ONLY

   if (oxcf->pass == 1) {

@@ -2518,19 +2520,7 @@

       vp9_init_second_pass_spatial_svc(cpi);

     } else {

       int num_frames;

-#if CONFIG_FP_MB_STATS

-      if (cpi->use_fp_mb_stats) {

-        const size_t psz = cpi->common.MBs * sizeof(uint8_t);

-        const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);

-        cpi->twopass.firstpass_mb_stats.mb_stats_start =

-            oxcf->firstpass_mb_stats_in.buf;

-        cpi->twopass.firstpass_mb_stats.mb_stats_end =

-            cpi->twopass.firstpass_mb_stats.mb_stats_start +

-            (ps - 1) * cpi->common.MBs * sizeof(uint8_t);

-      }

-#endif

       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;

       cpi->twopass.stats_in = cpi->twopass.stats_in_start;

       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];

@@ -2663,10 +2653,12 @@

 #if CONFIG_RATE_CTRL

   encode_command_init(&cpi->encode_command);

-  partition_info_init(cpi);

-  motion_vector_info_init(cpi);

-  fp_motion_vector_info_init(cpi);

-  tpl_stats_info_init(cpi);

+  if (oxcf->use_simple_encode_api) {

+    partition_info_init(cpi);

+    motion_vector_info_init(cpi);

+    fp_motion_vector_info_init(cpi);

+    tpl_stats_info_init(cpi);

+  }

 #endif

   return cpi;

@@ -2831,13 +2823,6 @@

     vpx_free(cpi->mbgraph_stats[i].mb_stats);

-#if CONFIG_FP_MB_STATS

-  if (cpi->use_fp_mb_stats) {

-    vpx_free(cpi->twopass.frame_mb_stats_buf);

-    cpi->twopass.frame_mb_stats_buf = NULL;

-  }

-#endif

   vp9_extrc_delete(&cpi->ext_ratectrl);

   vp9_remove_common(cm);

@@ -3698,6 +3683,10 @@

     cpi->rc.force_max_q = 0;

+  if (cpi->use_svc) {

+    cpi->svc.base_qindex[cpi->svc.spatial_layer_id] = *q;

+  }

   if (!frame_is_intra_only(cm)) {

     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);

@@ -4198,7 +4187,7 @@

   // Update some stats from cyclic refresh, and check for golden frame update.

   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&

-      !frame_is_intra_only(cm))

+      !frame_is_intra_only(cm) && cpi->cyclic_refresh->content_mode)

     vp9_cyclic_refresh_postencode(cpi);

   // Update the skip mb flag probabilities based on the distribution

@@ -4392,11 +4381,24 @@

   int frame_over_shoot_limit;

   int frame_under_shoot_limit;

   int q = 0, q_low = 0, q_high = 0;

+  int last_q_attempt = 0;

   int enable_acl;

 #ifdef AGGRESSIVE_VBR

   int qrange_adj = 1;

 #endif

+  // A flag which indicates whether we are recoding the current frame

+  // when the current frame size is larger than the max frame size in the

+  // external rate control model.

+  // This flag doesn't have any impact when external rate control is not used.

+  int ext_rc_recode = 0;

+  // Maximal frame size allowed by the external rate control.

+  // case: 0, we ignore the max frame size limit, and encode with the qindex

+  // passed in by the external rate control model.

+  // case: -1, we take VP9's decision for the max frame size.

+  int ext_rc_max_frame_size = 0;

+  const int orig_rc_max_frame_bandwidth = rc->max_frame_bandwidth;

 #if CONFIG_RATE_CTRL

   const FRAME_UPDATE_TYPE update_type =

       cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];

@@ -4451,11 +4453,6 @@

       loop_at_this_size = 0;

-#if CONFIG_RATE_CTRL

-    if (cpi->encode_command.use_external_target_frame_bits) {

-      q = rq_model_predict_q_index(rq_model, rq_history, rc->this_frame_target);

-    }

-#endif  // CONFIG_RATE_CTRL

     // Decide frame size bounds first time through.

     if (loop_count == 0) {

       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,

@@ -4498,22 +4495,36 @@

 #if CONFIG_RATE_CTRL

     // TODO(angiebird): This is a hack for making sure the encoder use the

     // external_quantize_index exactly. Avoid this kind of hack later.

-    if (cpi->encode_command.use_external_quantize_index) {

-      q = cpi->encode_command.external_quantize_index;

+    if (cpi->oxcf.use_simple_encode_api) {

+      if (cpi->encode_command.use_external_target_frame_bits) {

+        q = rq_model_predict_q_index(rq_model, rq_history,

+                                     rc->this_frame_target);

+      }

+      if (cpi->encode_command.use_external_quantize_index) {

+        q = cpi->encode_command.external_quantize_index;

+      }

-#endif

-    if (cpi->ext_ratectrl.ready) {

+#endif  // CONFIG_RATE_CTRL

+    if (cpi->ext_ratectrl.ready && !ext_rc_recode) {

+      vpx_codec_err_t codec_status;

       const GF_GROUP *gf_group = &cpi->twopass.gf_group;

       vpx_rc_encodeframe_decision_t encode_frame_decision;

       FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];

       const int ref_frame_flags = get_ref_frame_flags(cpi);

       RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];

+      const RefCntBuffer *curr_frame_buf =

+          get_ref_cnt_buffer(cm, cm->new_fb_idx);

       get_ref_frame_bufs(cpi, ref_frame_bufs);

-      vp9_extrc_get_encodeframe_decision(

-          &cpi->ext_ratectrl, cm->current_video_frame,

+      codec_status = vp9_extrc_get_encodeframe_decision(

+          &cpi->ext_ratectrl, curr_frame_buf->frame_index,

           cm->current_frame_coding_index, gf_group->index, update_type,

           ref_frame_bufs, ref_frame_flags, &encode_frame_decision);

+      if (codec_status != VPX_CODEC_OK) {

+        vpx_internal_error(&cm->error, codec_status,

+                           "vp9_extrc_get_encodeframe_decision() failed");

+      }

       q = encode_frame_decision.q_index;

+      ext_rc_max_frame_size = encode_frame_decision.max_frame_size;

     vp9_set_quantizer(cpi, q);

@@ -4555,36 +4566,61 @@

     if (cpi->ext_ratectrl.ready) {

-      break;

+      last_q_attempt = q;

+      // In general, for the external rate control, we take the qindex provided

+      // as input and encode the frame with this qindex faithfully. However,

+      // in some extreme scenarios, the provided qindex leads to a massive

+      // overshoot of frame size. In this case, we fall back to VP9's decision

+      // to pick a new qindex and recode the frame. We return the new qindex

+      // through the API to the external model.

+      if (ext_rc_max_frame_size == 0) {

+        break;

+      } else if (ext_rc_max_frame_size == -1) {

+        if (rc->projected_frame_size < rc->max_frame_bandwidth) {

+          break;

+        }

+      } else {

+        if (rc->projected_frame_size < ext_rc_max_frame_size) {

+          break;

+        }

+      }

+      rc->max_frame_bandwidth = ext_rc_max_frame_size;

+      // If the current frame size exceeds the ext_rc_max_frame_size,

+      // we adjust the worst qindex to meet the frame size constraint.

+      q_high = 255;

+      ext_rc_recode = 1;

 #if CONFIG_RATE_CTRL

-    // This part needs to be after save_coding_context() because

-    // restore_coding_context will be called in the end of this function.

-    // TODO(angiebird): This is a hack for making sure the encoder use the

-    // external_quantize_index exactly. Avoid this kind of hack later.

-    if (cpi->encode_command.use_external_quantize_index) {

-      break;

-    }

+    if (cpi->oxcf.use_simple_encode_api) {

+      // This part needs to be after save_coding_context() because

+      // restore_coding_context will be called in the end of this function.

+      // TODO(angiebird): This is a hack for making sure the encoder use the

+      // external_quantize_index exactly. Avoid this kind of hack later.

+      if (cpi->encode_command.use_external_quantize_index) {

+        break;

+      }

-    if (cpi->encode_command.use_external_target_frame_bits) {

-      const double percent_diff = get_bits_percent_diff(

-          rc->this_frame_target, rc->projected_frame_size);

-      update_rq_history(rq_history, rc->this_frame_target,

-                        rc->projected_frame_size, q);

-      loop_count += 1;

+      if (cpi->encode_command.use_external_target_frame_bits) {

+        const double percent_diff = get_bits_percent_diff(

+            rc->this_frame_target, rc->projected_frame_size);

+        update_rq_history(rq_history, rc->this_frame_target,

+                          rc->projected_frame_size, q);

+        loop_count += 1;

-      rq_model_update(rq_history, rc->this_frame_target, rq_model);

+        rq_model_update(rq_history, rc->this_frame_target, rq_model);

-      // Check if we hit the target bitrate.

-      if (percent_diff <= cpi->encode_command.target_frame_bits_error_percent ||

-          rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM ||

-          rq_history->q_index_low >= rq_history->q_index_high) {

-        break;

-      }

+        // Check if we hit the target bitrate.

+        if (percent_diff <=

+                cpi->encode_command.target_frame_bits_error_percent ||

+            rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM ||

+            rq_history->q_index_low >= rq_history->q_index_high) {

+          break;

+        }

-      loop = 1;

-      restore_coding_context(cpi);

-      continue;

+        loop = 1;

+        restore_coding_context(cpi);

+        continue;

+      }

 #endif  // CONFIG_RATE_CTRL

@@ -4754,6 +4790,23 @@

         rc->projected_frame_size < rc->max_frame_bandwidth)

       loop = 0;

+    // Special handling of external max frame size constraint

+    if (ext_rc_recode) {

+      // If the largest q is not able to meet the max frame size limit,

+      // do nothing.

+      if (rc->projected_frame_size > ext_rc_max_frame_size &&

+          last_q_attempt == 255) {

+        break;

+      }

+      // If VP9's q selection leads to a smaller q, we force it to use

+      // a larger q to better approximate the external max frame size

+      // constraint.

+      if (rc->projected_frame_size > ext_rc_max_frame_size &&

+          q <= last_q_attempt) {

+        q = VPXMIN(255, last_q_attempt + 1);

+      }

+    }

     if (loop) {

       ++loop_count;

       ++loop_at_this_size;

@@ -4767,6 +4820,8 @@

       if (loop) restore_coding_context(cpi);

   } while (loop);

+  rc->max_frame_bandwidth = orig_rc_max_frame_bandwidth;

 #ifdef AGGRESSIVE_VBR

   if (two_pass_first_group_inter(cpi)) {

     cpi->twopass.active_worst_quality =

@@ -5300,17 +5355,81 @@

 #if !CONFIG_REALTIME_ONLY

-static void update_encode_frame_result(

+static void update_encode_frame_result_basic(

+    FRAME_UPDATE_TYPE update_type, int show_idx, int quantize_index,

+    ENCODE_FRAME_RESULT *encode_frame_result) {

+  encode_frame_result->show_idx = show_idx;

+  encode_frame_result->update_type = update_type;

+  encode_frame_result->quantize_index = quantize_index;

+}

+#if CONFIG_RATE_CTRL

+static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,

+                                        IMAGE_BUFFER *image_buffer) {

+  const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,

+                                   yv12_buffer->v_buffer };

+  const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,

+                                 yv12_buffer->uv_stride };

+  const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,

+                        yv12_buffer->uv_crop_width };

+  const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,

+                        yv12_buffer->uv_crop_height };

+  int plane;

+  for (plane = 0; plane < 3; ++plane) {

+    const int src_stride = src_stride_ls[plane];

+    const int w = w_ls[plane];

+    const int h = h_ls[plane];

+    const uint8_t *src_buf = src_buf_ls[plane];

+    uint8_t *dst_buf = image_buffer->plane_buffer[plane];

+    int r;

+    assert(image_buffer->plane_width[plane] == w);

+    assert(image_buffer->plane_height[plane] == h);

+    for (r = 0; r < h; ++r) {

+      memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);

+      src_buf += src_stride;

+      dst_buf += w;

+    }

+  }

+}

+// This function will update extra information specific for simple_encode APIs

+static void update_encode_frame_result_simple_encode(

     int ref_frame_flags, FRAME_UPDATE_TYPE update_type,

     const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,

-    RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,

+    RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,

     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,

-#if CONFIG_RATE_CTRL

     const PARTITION_INFO *partition_info,

     const MOTION_VECTOR_INFO *motion_vector_info,

     const TplDepStats *tpl_stats_info,

+    ENCODE_FRAME_RESULT *encode_frame_result) {

+  PSNR_STATS psnr;

+  update_encode_frame_result_basic(update_type, coded_frame_buf->frame_index,

+                                   quantize_index, encode_frame_result);

+#if CONFIG_VP9_HIGHBITDEPTH

+  vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth,

+                       input_bit_depth);

+#else   // CONFIG_VP9_HIGHBITDEPTH

+  (void)bit_depth;

+  (void)input_bit_depth;

+  vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+  encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;

+  vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,

+                         encode_frame_result->ref_frame_coding_indexes,

+                         encode_frame_result->ref_frame_valid_list);

+  encode_frame_result->psnr = psnr.psnr[0];

+  encode_frame_result->sse = psnr.sse[0];

+  encode_frame_result->frame_counts = *counts;

+  encode_frame_result->partition_info = partition_info;

+  encode_frame_result->motion_vector_info = motion_vector_info;

+  encode_frame_result->tpl_stats_info = tpl_stats_info;

+  if (encode_frame_result->coded_frame.allocated) {

+    yv12_buffer_to_image_buffer(&coded_frame_buf->buf,

+                                &encode_frame_result->coded_frame);

+  }

+}

 #endif  // CONFIG_RATE_CTRL

-    ENCODE_FRAME_RESULT *encode_frame_result);

 #endif  // !CONFIG_REALTIME_ONLY

 static void encode_frame_to_data_rate(

@@ -5405,10 +5524,14 @@

   memset(cpi->mode_chosen_counts, 0,

          MAX_MODES * sizeof(*cpi->mode_chosen_counts));

 #endif

-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL

+#if CONFIG_CONSISTENT_RECODE

   // Backup to ensure consistency between recodes

   save_encode_params(cpi);

-#endif  // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL

+#elif CONFIG_RATE_CTRL

+  if (cpi->oxcf.use_simple_encode_api) {

+    save_encode_params(cpi);

+  }

+#endif

   if (cpi->sf.recode_loop == DISALLOW_RECODE) {

     if (!encode_without_recode_loop(cpi, size, dest)) return;

@@ -5487,9 +5610,13 @@

     const RefCntBuffer *coded_frame_buf =

         get_ref_cnt_buffer(cm, cm->new_fb_idx);

-    vp9_extrc_update_encodeframe_result(

+    vpx_codec_err_t codec_status = vp9_extrc_update_encodeframe_result(

         &cpi->ext_ratectrl, (*size) << 3, cpi->Source, &coded_frame_buf->buf,

-        cm->bit_depth, cpi->oxcf.input_bit_depth);

+        cm->bit_depth, cpi->oxcf.input_bit_depth, cm->base_qindex);

+    if (codec_status != VPX_CODEC_OK) {

+      vpx_internal_error(&cm->error, codec_status,

+                         "vp9_extrc_update_encodeframe_result() failed");

+    }

 #if CONFIG_REALTIME_ONLY

   (void)encode_frame_result;

@@ -5496,10 +5623,12 @@

   assert(encode_frame_result == NULL);

 #else  // CONFIG_REALTIME_ONLY

   if (encode_frame_result != NULL) {

-    const int ref_frame_flags = get_ref_frame_flags(cpi);

     const RefCntBuffer *coded_frame_buf =

         get_ref_cnt_buffer(cm, cm->new_fb_idx);

     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];

+    FRAME_UPDATE_TYPE update_type =

+        cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];

+    int quantize_index = vp9_get_quantizer(cpi);

     get_ref_frame_bufs(cpi, ref_frame_bufs);

     // update_encode_frame_result() depends on twopass.gf_group.index and

     // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and

@@ -5517,15 +5646,21 @@

     // This function needs to be called before vp9_update_reference_frames().

     // TODO(angiebird): Improve the codebase to make the update of frame

     // dependent variables more robust.

-    update_encode_frame_result(

-        ref_frame_flags,

-        cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],

-        cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),

-        cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts,

+    update_encode_frame_result_basic(update_type, coded_frame_buf->frame_index,

+                                     quantize_index, encode_frame_result);

 #if CONFIG_RATE_CTRL

-        cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info,

+    if (cpi->oxcf.use_simple_encode_api) {

+      const int ref_frame_flags = get_ref_frame_flags(cpi);

+      update_encode_frame_result_simple_encode(

+          ref_frame_flags,

+          cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],

+          cpi->Source, coded_frame_buf, ref_frame_bufs, quantize_index,

+          cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts,

+          cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info,

+          encode_frame_result);

+    }

 #endif  // CONFIG_RATE_CTRL

-        encode_frame_result);

 #endif  // CONFIG_REALTIME_ONLY

@@ -5591,7 +5726,8 @@

   vp9_rc_postencode_update(cpi, *size);

-  if (oxcf->pass == 0 && !frame_is_intra_only(cm) &&

+  if (cpi->compute_frame_low_motion_onepass && oxcf->pass == 0 &&

+      !frame_is_intra_only(cm) &&

       (!cpi->use_svc ||

        (cpi->use_svc &&

         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&

@@ -5680,8 +5816,13 @@

   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;

   if (cpi->common.current_frame_coding_index == 0) {

-    vp9_extrc_send_firstpass_stats(&cpi->ext_ratectrl,

-                                   &cpi->twopass.first_pass_info);

+    VP9_COMMON *cm = &cpi->common;

+    const vpx_codec_err_t codec_status = vp9_extrc_send_firstpass_stats(

+        &cpi->ext_ratectrl, &cpi->twopass.first_pass_info);

+    if (codec_status != VPX_CODEC_OK) {

+      vpx_internal_error(&cm->error, codec_status,

+                         "vp9_extrc_send_firstpass_stats() failed");

+    }

 #if CONFIG_MISMATCH_DEBUG

   mismatch_move_frame_idx_w();

@@ -7440,7 +7581,9 @@

 #endif  // CONFIG_NON_GREEDY_MV

 #if CONFIG_RATE_CTRL

-  accumulate_frame_tpl_stats(cpi);

+  if (cpi->oxcf.use_simple_encode_api) {

+    accumulate_frame_tpl_stats(cpi);

+  }

 #endif  // CONFIG_RATE_CTRL

@@ -7468,206 +7611,6 @@

-#if !CONFIG_REALTIME_ONLY

-#if CONFIG_RATE_CTRL

-static void copy_frame_counts(const FRAME_COUNTS *input_counts,

-                              FRAME_COUNTS *output_counts) {

-  int i, j, k, l, m, n;

-  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {

-    for (j = 0; j < INTRA_MODES; ++j) {

-      output_counts->y_mode[i][j] = input_counts->y_mode[i][j];

-    }

-  }

-  for (i = 0; i < INTRA_MODES; ++i) {

-    for (j = 0; j < INTRA_MODES; ++j) {

-      output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];

-    }

-  }

-  for (i = 0; i < PARTITION_CONTEXTS; ++i) {

-    for (j = 0; j < PARTITION_TYPES; ++j) {

-      output_counts->partition[i][j] = input_counts->partition[i][j];

-    }

-  }

-  for (i = 0; i < TX_SIZES; ++i) {

-    for (j = 0; j < PLANE_TYPES; ++j) {

-      for (k = 0; k < REF_TYPES; ++k) {

-        for (l = 0; l < COEF_BANDS; ++l) {

-          for (m = 0; m < COEFF_CONTEXTS; ++m) {

-            output_counts->eob_branch[i][j][k][l][m] =

-                input_counts->eob_branch[i][j][k][l][m];

-            for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {

-              output_counts->coef[i][j][k][l][m][n] =

-                  input_counts->coef[i][j][k][l][m][n];

-            }

-          }

-        }

-      }

-    }

-  }

-  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {

-    for (j = 0; j < SWITCHABLE_FILTERS; ++j) {

-      output_counts->switchable_interp[i][j] =

-          input_counts->switchable_interp[i][j];

-    }

-  }

-  for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {

-    for (j = 0; j < INTER_MODES; ++j) {

-      output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];

-    }

-  }

-  for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {

-    for (j = 0; j < 2; ++j) {

-      output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];

-    }

-  }

-  for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {

-    for (j = 0; j < 2; ++j) {

-      output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];

-    }

-  }

-  for (i = 0; i < REF_CONTEXTS; ++i) {

-    for (j = 0; j < 2; ++j) {

-      for (k = 0; k < 2; ++k) {

-        output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];

-      }

-    }

-  }

-  for (i = 0; i < REF_CONTEXTS; ++i) {

-    for (j = 0; j < 2; ++j) {

-      output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];

-    }

-  }

-  for (i = 0; i < SKIP_CONTEXTS; ++i) {

-    for (j = 0; j < 2; ++j) {

-      output_counts->skip[i][j] = input_counts->skip[i][j];

-    }

-  }

-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {

-    for (j = 0; j < TX_SIZES; j++) {

-      output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];

-    }

-    for (j = 0; j < TX_SIZES - 1; j++) {

-      output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];

-    }

-    for (j = 0; j < TX_SIZES - 2; j++) {

-      output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];

-    }

-  }

-  for (i = 0; i < TX_SIZES; i++) {

-    output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];

-  }

-  for (i = 0; i < MV_JOINTS; i++) {

-    output_counts->mv.joints[i] = input_counts->mv.joints[i];

-  }

-  for (k = 0; k < 2; k++) {

-    nmv_component_counts *const comps = &output_counts->mv.comps[k];

-    const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];

-    for (i = 0; i < 2; i++) {

-      comps->sign[i] = comps_t->sign[i];

-      comps->class0_hp[i] = comps_t->class0_hp[i];

-      comps->hp[i] = comps_t->hp[i];

-    }

-    for (i = 0; i < MV_CLASSES; i++) {

-      comps->classes[i] = comps_t->classes[i];

-    }

-    for (i = 0; i < CLASS0_SIZE; i++) {

-      comps->class0[i] = comps_t->class0[i];

-      for (j = 0; j < MV_FP_SIZE; j++) {

-        comps->class0_fp[i][j] = comps_t->class0_fp[i][j];

-      }

-    }

-    for (i = 0; i < MV_OFFSET_BITS; i++) {

-      for (j = 0; j < 2; j++) {

-        comps->bits[i][j] = comps_t->bits[i][j];

-      }

-    }

-    for (i = 0; i < MV_FP_SIZE; i++) {

-      comps->fp[i] = comps_t->fp[i];

-    }

-  }

-}

-static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,

-                                        IMAGE_BUFFER *image_buffer) {

-  const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,

-                                   yv12_buffer->v_buffer };

-  const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,

-                                 yv12_buffer->uv_stride };

-  const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,

-                        yv12_buffer->uv_crop_width };

-  const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,

-                        yv12_buffer->uv_crop_height };

-  int plane;

-  for (plane = 0; plane < 3; ++plane) {

-    const int src_stride = src_stride_ls[plane];

-    const int w = w_ls[plane];

-    const int h = h_ls[plane];

-    const uint8_t *src_buf = src_buf_ls[plane];

-    uint8_t *dst_buf = image_buffer->plane_buffer[plane];

-    int r;

-    assert(image_buffer->plane_width[plane] == w);

-    assert(image_buffer->plane_height[plane] == h);

-    for (r = 0; r < h; ++r) {

-      memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);

-      src_buf += src_stride;

-      dst_buf += w;

-    }

-  }

-}

-#endif  // CONFIG_RATE_CTRL

-static void update_encode_frame_result(

-    int ref_frame_flags, FRAME_UPDATE_TYPE update_type,

-    const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,

-    RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,

-    uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,

-#if CONFIG_RATE_CTRL

-    const PARTITION_INFO *partition_info,

-    const MOTION_VECTOR_INFO *motion_vector_info,

-    const TplDepStats *tpl_stats_info,

-#endif  // CONFIG_RATE_CTRL

-    ENCODE_FRAME_RESULT *encode_frame_result) {

-#if CONFIG_RATE_CTRL

-  PSNR_STATS psnr;

-#if CONFIG_VP9_HIGHBITDEPTH

-  vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth,

-                       input_bit_depth);

-#else   // CONFIG_VP9_HIGHBITDEPTH

-  (void)bit_depth;

-  (void)input_bit_depth;

-  vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);

-#endif  // CONFIG_VP9_HIGHBITDEPTH

-  encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;

-  vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,

-                         encode_frame_result->ref_frame_coding_indexes,

-                         encode_frame_result->ref_frame_valid_list);

-  encode_frame_result->psnr = psnr.psnr[0];

-  encode_frame_result->sse = psnr.sse[0];

-  copy_frame_counts(counts, &encode_frame_result->frame_counts);

-  encode_frame_result->partition_info = partition_info;

-  encode_frame_result->motion_vector_info = motion_vector_info;

-  encode_frame_result->tpl_stats_info = tpl_stats_info;

-  if (encode_frame_result->coded_frame.allocated) {

-    yv12_buffer_to_image_buffer(&coded_frame_buf->buf,

-                                &encode_frame_result->coded_frame);

-  }

-#else   // CONFIG_RATE_CTRL

-  (void)ref_frame_flags;

-  (void)bit_depth;

-  (void)input_bit_depth;

-  (void)source_frame;

-  (void)coded_frame_buf;

-  (void)ref_frame_bufs;

-  (void)counts;

-#endif  // CONFIG_RATE_CTRL

-  encode_frame_result->show_idx = coded_frame_buf->frame_index;

-  encode_frame_result->update_type = update_type;

-  encode_frame_result->quantize_index = quantize_index;

-}

-#endif  // !CONFIG_REALTIME_ONLY

 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {

   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.

 #if CONFIG_RATE_CTRL

@@ -7861,9 +7804,12 @@

   cm->new_fb_idx = get_free_fb(cm);

   if (cm->new_fb_idx == INVALID_IDX) return -1;

   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];

+  // If the frame buffer for current frame is the same as previous frame, MV in

+  // the base layer shouldn't be used as it'll cause data race.

+  if (cpi->svc.spatial_layer_id > 0 && cm->cur_frame == cm->prev_frame) {

+    cpi->svc.use_base_mv = 0;

+  }

   // Start with a 0 size frame.

   *size = 0;

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -273,10 +273,6 @@

   vpx_fixed_buf_t two_pass_stats_in;

-#if CONFIG_FP_MB_STATS

-  vpx_fixed_buf_t firstpass_mb_stats_in;

-#endif

   vp8e_tuning tuning;

   vp9e_tune_content content;

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -291,6 +287,7 @@

   int row_mt;

   unsigned int motion_vector_unit_test;

   int delta_q_uv;

+  int use_simple_encode_api;  // Use SimpleEncode APIs or not

 } VP9EncoderConfig;

 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {

@@ -710,9 +707,6 @@

   TileDataEnc *tile_data;

   int allocated_tiles;  // Keep track of memory allocated for tiles.

-  // For a still frame, this flag is set to 1 to skip partition search.

-  int partition_search_skippable_frame;

   int scaled_ref_idx[REFS_PER_FRAME];

   int lst_fb_idx;

   int gld_fb_idx;

@@ -746,6 +740,7 @@

   // Ambient reconstruction err target for force key frames

   int64_t ambient_err;

+  RD_CONTROL rd_ctrl;

   RD_OPT rd;

   CODING_CONTEXT coding_context;

@@ -804,10 +799,6 @@

   uint64_t time_pick_lpf;

   uint64_t time_encode_sb_row;

-#if CONFIG_FP_MB_STATS

-  int use_fp_mb_stats;

-#endif

   TWO_PASS twopass;

   // Force recalculation of segment_ids for each mode info

@@ -958,6 +949,8 @@

   uint8_t *content_state_sb_fd;

   int compute_source_sad_onepass;

+  int compute_frame_low_motion_onepass;

   LevelConstraint level_constraint;

--- a/vp9/encoder/vp9_ext_ratectrl.c

+++ b/vp9/encoder/vp9_ext_ratectrl.c

@@ -13,31 +13,56 @@

 #include "vp9/common/vp9_common.h"

 #include "vpx_dsp/psnr.h"

-void vp9_extrc_init(EXT_RATECTRL *ext_ratectrl) { vp9_zero(*ext_ratectrl); }

+vpx_codec_err_t vp9_extrc_init(EXT_RATECTRL *ext_ratectrl) {

+  if (ext_ratectrl == NULL) {

+    return VPX_CODEC_INVALID_PARAM;

+  }

+  vp9_zero(*ext_ratectrl);

+  return VPX_CODEC_OK;

+}

-void vp9_extrc_create(vpx_rc_funcs_t funcs, vpx_rc_config_t ratectrl_config,

-                      EXT_RATECTRL *ext_ratectrl) {

+vpx_codec_err_t vp9_extrc_create(vpx_rc_funcs_t funcs,

+                                 vpx_rc_config_t ratectrl_config,

+                                 EXT_RATECTRL *ext_ratectrl) {

+  vpx_rc_status_t rc_status;

   vpx_rc_firstpass_stats_t *rc_firstpass_stats;

+  if (ext_ratectrl == NULL) {

+    return VPX_CODEC_INVALID_PARAM;

+  }

   vp9_extrc_delete(ext_ratectrl);

   ext_ratectrl->funcs = funcs;

   ext_ratectrl->ratectrl_config = ratectrl_config;

-  ext_ratectrl->funcs.create_model(ext_ratectrl->funcs.priv,

-                                   &ext_ratectrl->ratectrl_config,

-                                   &ext_ratectrl->model);

+  rc_status = ext_ratectrl->funcs.create_model(ext_ratectrl->funcs.priv,

+                                               &ext_ratectrl->ratectrl_config,

+                                               &ext_ratectrl->model);

+  if (rc_status == VPX_RC_ERROR) {

+    return VPX_CODEC_ERROR;

+  }

   rc_firstpass_stats = &ext_ratectrl->rc_firstpass_stats;

   rc_firstpass_stats->num_frames = ratectrl_config.show_frame_count;

   rc_firstpass_stats->frame_stats =

       vpx_malloc(sizeof(*rc_firstpass_stats->frame_stats) *

                  rc_firstpass_stats->num_frames);

+  if (rc_firstpass_stats->frame_stats == NULL) {

+    return VPX_CODEC_MEM_ERROR;

+  }

   ext_ratectrl->ready = 1;

+  return VPX_CODEC_OK;

-void vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl) {

+vpx_codec_err_t vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl) {

+  if (ext_ratectrl == NULL) {

+    return VPX_CODEC_INVALID_PARAM;

+  }

   if (ext_ratectrl->ready) {

-    ext_ratectrl->funcs.delete_model(ext_ratectrl->model);

+    vpx_rc_status_t rc_status =

+        ext_ratectrl->funcs.delete_model(ext_ratectrl->model);

+    if (rc_status == VPX_RC_ERROR) {

+      return VPX_CODEC_ERROR;

+    }

     vpx_free(ext_ratectrl->rc_firstpass_stats.frame_stats);

-  vp9_extrc_init(ext_ratectrl);

+  return vp9_extrc_init(ext_ratectrl);

 static void gen_rc_firstpass_stats(const FIRSTPASS_STATS *stats,

@@ -69,9 +94,13 @@

   rc_frame_stats->count = stats->count;

-void vp9_extrc_send_firstpass_stats(EXT_RATECTRL *ext_ratectrl,

-                                    const FIRST_PASS_INFO *first_pass_info) {

+vpx_codec_err_t vp9_extrc_send_firstpass_stats(

+    EXT_RATECTRL *ext_ratectrl, const FIRST_PASS_INFO *first_pass_info) {

+  if (ext_ratectrl == NULL) {

+    return VPX_CODEC_INVALID_PARAM;

+  }

   if (ext_ratectrl->ready) {

+    vpx_rc_status_t rc_status;

     vpx_rc_firstpass_stats_t *rc_firstpass_stats =

         &ext_ratectrl->rc_firstpass_stats;

     int i;

@@ -80,9 +109,13 @@

       gen_rc_firstpass_stats(&first_pass_info->stats[i],

                              &rc_firstpass_stats->frame_stats[i]);

-    ext_ratectrl->funcs.send_firstpass_stats(ext_ratectrl->model,

-                                             rc_firstpass_stats);

+    rc_status = ext_ratectrl->funcs.send_firstpass_stats(ext_ratectrl->model,

+                                                         rc_firstpass_stats);

+    if (rc_status == VPX_RC_ERROR) {

+      return VPX_CODEC_ERROR;

+    }

+  return VPX_CODEC_OK;

 static int extrc_get_frame_type(FRAME_UPDATE_TYPE update_type) {

@@ -102,12 +135,16 @@

-void vp9_extrc_get_encodeframe_decision(

+vpx_codec_err_t vp9_extrc_get_encodeframe_decision(

     EXT_RATECTRL *ext_ratectrl, int show_index, int coding_index, int gop_index,

     FRAME_UPDATE_TYPE update_type,

     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int ref_frame_flags,

     vpx_rc_encodeframe_decision_t *encode_frame_decision) {

+  if (ext_ratectrl == NULL) {

+    return VPX_CODEC_INVALID_PARAM;

+  }

   if (ext_ratectrl->ready) {

+    vpx_rc_status_t rc_status;

     vpx_rc_encodeframe_info_t encode_frame_info;

     encode_frame_info.show_index = show_index;

     encode_frame_info.coding_index = coding_index;

@@ -118,24 +155,32 @@

                            encode_frame_info.ref_frame_coding_indexes,

                            encode_frame_info.ref_frame_valid_list);

-    ext_ratectrl->funcs.get_encodeframe_decision(

+    rc_status = ext_ratectrl->funcs.get_encodeframe_decision(

         ext_ratectrl->model, &encode_frame_info, encode_frame_decision);

+    if (rc_status == VPX_RC_ERROR) {

+      return VPX_CODEC_ERROR;

+    }

+  return VPX_CODEC_OK;

-void vp9_extrc_update_encodeframe_result(EXT_RATECTRL *ext_ratectrl,

-                                         int64_t bit_count,

-                                         const YV12_BUFFER_CONFIG *source_frame,

-                                         const YV12_BUFFER_CONFIG *coded_frame,

-                                         uint32_t bit_depth,

-                                         uint32_t input_bit_depth) {

+vpx_codec_err_t vp9_extrc_update_encodeframe_result(

+    EXT_RATECTRL *ext_ratectrl, int64_t bit_count,

+    const YV12_BUFFER_CONFIG *source_frame,

+    const YV12_BUFFER_CONFIG *coded_frame, uint32_t bit_depth,

+    uint32_t input_bit_depth, const int actual_encoding_qindex) {

+  if (ext_ratectrl == NULL) {

+    return VPX_CODEC_INVALID_PARAM;

+  }

   if (ext_ratectrl->ready) {

     PSNR_STATS psnr;

+    vpx_rc_status_t rc_status;

     vpx_rc_encodeframe_result_t encode_frame_result;

     encode_frame_result.bit_count = bit_count;

     encode_frame_result.pixel_count =

-        source_frame->y_width * source_frame->y_height +

-        2 * source_frame->uv_width * source_frame->uv_height;

+        source_frame->y_crop_width * source_frame->y_crop_height +

+        2 * source_frame->uv_crop_width * source_frame->uv_crop_height;

+    encode_frame_result.actual_encoding_qindex = actual_encoding_qindex;

 #if CONFIG_VP9_HIGHBITDEPTH

     vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,

                          input_bit_depth);

@@ -145,7 +190,11 @@

     vpx_calc_psnr(source_frame, coded_frame, &psnr);

 #endif

     encode_frame_result.sse = psnr.sse[0];

-    ext_ratectrl->funcs.update_encodeframe_result(ext_ratectrl->model,

-                                                  &encode_frame_result);

+    rc_status = ext_ratectrl->funcs.update_encodeframe_result(

+        ext_ratectrl->model, &encode_frame_result);

+    if (rc_status == VPX_RC_ERROR) {

+      return VPX_CODEC_ERROR;

+    }

+  return VPX_CODEC_OK;

--- a/vp9/encoder/vp9_ext_ratectrl.h

+++ b/vp9/encoder/vp9_ext_ratectrl.h

@@ -22,27 +22,27 @@

   vpx_rc_firstpass_stats_t rc_firstpass_stats;

 } EXT_RATECTRL;

-void vp9_extrc_init(EXT_RATECTRL *ext_ratectrl);

+vpx_codec_err_t vp9_extrc_init(EXT_RATECTRL *ext_ratectrl);

-void vp9_extrc_create(vpx_rc_funcs_t funcs, vpx_rc_config_t ratectrl_config,

-                      EXT_RATECTRL *ext_ratectrl);

+vpx_codec_err_t vp9_extrc_create(vpx_rc_funcs_t funcs,

+                                 vpx_rc_config_t ratectrl_config,

+                                 EXT_RATECTRL *ext_ratectrl);

-void vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl);

+vpx_codec_err_t vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl);

-void vp9_extrc_send_firstpass_stats(EXT_RATECTRL *ext_ratectrl,

-                                    const FIRST_PASS_INFO *first_pass_info);

+vpx_codec_err_t vp9_extrc_send_firstpass_stats(

+    EXT_RATECTRL *ext_ratectrl, const FIRST_PASS_INFO *first_pass_info);

-void vp9_extrc_get_encodeframe_decision(

+vpx_codec_err_t vp9_extrc_get_encodeframe_decision(

     EXT_RATECTRL *ext_ratectrl, int show_index, int coding_index, int gop_index,

     FRAME_UPDATE_TYPE update_type,

     RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int ref_frame_flags,

     vpx_rc_encodeframe_decision_t *encode_frame_decision);

-void vp9_extrc_update_encodeframe_result(EXT_RATECTRL *ext_ratectrl,

-                                         int64_t bit_count,

-                                         const YV12_BUFFER_CONFIG *source_frame,

-                                         const YV12_BUFFER_CONFIG *coded_frame,

-                                         uint32_t bit_depth,

-                                         uint32_t input_bit_depth);

+vpx_codec_err_t vp9_extrc_update_encodeframe_result(

+    EXT_RATECTRL *ext_ratectrl, int64_t bit_count,

+    const YV12_BUFFER_CONFIG *source_frame,

+    const YV12_BUFFER_CONFIG *coded_frame, uint32_t bit_depth,

+    uint32_t input_bit_depth, const int actual_encoding_qindex);

 #endif  // VPX_VP9_ENCODER_VP9_EXT_RATECTRL_H_

--- a/vp9/encoder/vp9_firstpass.c

+++ b/vp9/encoder/vp9_firstpass.c

@@ -54,6 +54,30 @@

 #define NCOUNT_INTRA_THRESH 8192

 #define NCOUNT_INTRA_FACTOR 3

+#define INTRA_PART 0.005

+#define DEFAULT_DECAY_LIMIT 0.75

+#define LOW_SR_DIFF_TRHESH 0.1

+#define LOW_CODED_ERR_PER_MB 10.0

+#define NCOUNT_FRAME_II_THRESH 6.0

+#define BASELINE_ERR_PER_MB 12500.0

+#define GF_MAX_FRAME_BOOST 96.0

+#ifdef AGGRESSIVE_VBR

+#define KF_MIN_FRAME_BOOST 40.0

+#define KF_MAX_FRAME_BOOST 80.0

+#define MAX_KF_TOT_BOOST 4800

+#else

+#define KF_MIN_FRAME_BOOST 40.0

+#define KF_MAX_FRAME_BOOST 96.0

+#define MAX_KF_TOT_BOOST 5400

+#endif

+#define DEFAULT_ZM_FACTOR 0.5

+#define MINQ_ADJ_LIMIT 48

+#define MINQ_ADJ_LIMIT_CQ 20

+#define HIGH_UNDERSHOOT_RATIO 2

+#define AV_WQ_FACTOR 4.0

 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)

 #if ARF_STATS_OUTPUT

@@ -111,17 +135,6 @@

 #endif

-#if CONFIG_FP_MB_STATS

-static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm,

-                              struct vpx_codec_pkt_list *pktlist) {

-  struct vpx_codec_cx_pkt pkt;

-  pkt.kind = VPX_CODEC_FPMB_STATS_PKT;

-  pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;

-  pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t);

-  vpx_codec_pkt_list_add(pktlist, &pkt);

-}

-#endif

 static void zero_stats(FIRSTPASS_STATS *section) {

   section->frame = 0.0;

   section->weight = 0.0;

@@ -929,10 +942,6 @@

     int level_sample;

     const int mb_index = mb_row * cm->mb_cols + mb_col;

-#if CONFIG_FP_MB_STATS

-    const int mb_index = mb_row * cm->mb_cols + mb_col;

-#endif

     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c);

     // Adjust to the next column of MBs.

@@ -1068,13 +1077,6 @@

     // Accumulate the intra error.

     fp_acc_data->intra_error += (int64_t)this_error;

-#if CONFIG_FP_MB_STATS

-    if (cpi->use_fp_mb_stats) {

-      // initialization

-      cpi->twopass.frame_mb_stats_buf[mb_index] = 0;

-    }

-#endif

     // Set up limit values for motion vectors to prevent them extending

     // outside the UMV borders.

     x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);

@@ -1081,8 +1083,8 @@

     x->mv_limits.col_max =

         ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;

-    // Other than for the first frame do a motion search.

-    if (cm->current_video_frame > 0) {

+    // Other than for intra-only frame do a motion search.

+    if (!frame_is_intra_only(cm)) {

       int tmp_err, motion_error, this_motion_error, raw_motion_error;

       // Assume 0,0 motion with no mv overhead.

       MV mv = { 0, 0 }, tmp_mv = { 0, 0 };

@@ -1090,8 +1092,10 @@

       vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];

 #if CONFIG_RATE_CTRL

-      // Store zero mv as default

-      store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);

+      if (cpi->oxcf.use_simple_encode_api) {

+        // Store zero mv as default

+        store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);

+      }

 #endif  // CONFIG_RAGE_CTRL

       xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;

@@ -1159,7 +1163,9 @@

 #if CONFIG_RATE_CTRL

-        store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);

+        if (cpi->oxcf.use_simple_encode_api) {

+          store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);

+        }

 #endif  // CONFIG_RAGE_CTRL

         // Search in an older reference frame.

@@ -1183,7 +1189,10 @@

           first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);

 #if CONFIG_RATE_CTRL

-          store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME, 1);

+          if (cpi->oxcf.use_simple_encode_api) {

+            store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME,

+                                   1);

+          }

 #endif  // CONFIG_RAGE_CTRL

           if (gf_motion_error < motion_error && gf_motion_error < this_error)

@@ -1213,20 +1222,6 @@

       best_ref_mv->row = 0;

       best_ref_mv->col = 0;

-#if CONFIG_FP_MB_STATS

-      if (cpi->use_fp_mb_stats) {

-        // intra prediction statistics

-        cpi->twopass.frame_mb_stats_buf[mb_index] = 0;

-        cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK;

-        cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;

-        if (this_error > FPMB_ERROR_LARGE_TH) {

-          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;

-        } else if (this_error < FPMB_ERROR_SMALL_TH) {

-          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;

-        }

-      }

-#endif

       if (motion_error <= this_error) {

         vpx_clear_system_state();

@@ -1271,47 +1266,9 @@

         *best_ref_mv = mv;

-#if CONFIG_FP_MB_STATS

-        if (cpi->use_fp_mb_stats) {

-          // inter prediction statistics

-          cpi->twopass.frame_mb_stats_buf[mb_index] = 0;

-          cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK;

-          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;

-          if (this_error > FPMB_ERROR_LARGE_TH) {

-            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;

-          } else if (this_error < FPMB_ERROR_SMALL_TH) {

-            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;

-          }

-        }

-#endif

         if (!is_zero_mv(&mv)) {

           ++(fp_acc_data->mvcount);

-#if CONFIG_FP_MB_STATS

-          if (cpi->use_fp_mb_stats) {

-            cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_MOTION_ZERO_MASK;

-            // check estimated motion direction

-            if (mv.as_mv.col > 0 && mv.as_mv.col >= abs(mv.as_mv.row)) {

-              // right direction

-              cpi->twopass.frame_mb_stats_buf[mb_index] |=

-                  FPMB_MOTION_RIGHT_MASK;

-            } else if (mv.as_mv.row < 0 &&

-                       abs(mv.as_mv.row) >= abs(mv.as_mv.col)) {

-              // up direction

-              cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_UP_MASK;

-            } else if (mv.as_mv.col < 0 &&

-                       abs(mv.as_mv.col) >= abs(mv.as_mv.row)) {

-              // left direction

-              cpi->twopass.frame_mb_stats_buf[mb_index] |=

-                  FPMB_MOTION_LEFT_MASK;

-            } else {

-              // down direction

-              cpi->twopass.frame_mb_stats_buf[mb_index] |=

-                  FPMB_MOTION_DOWN_MASK;

-            }

-          }

-#endif

           // Does the row vector point inwards or outwards?

           if (mb_row < cm->mb_rows / 2) {

             if (mv.row > 0)

@@ -1359,7 +1316,9 @@

     } else {

       fp_acc_data->sr_coded_error += (int64_t)this_error;

 #if CONFIG_RATE_CTRL

-      store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0);

+      if (cpi->oxcf.use_simple_encode_api) {

+        store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0);

+      }

 #endif  // CONFIG_RAGE_CTRL

     fp_acc_data->coded_error += (int64_t)this_error;

@@ -1388,9 +1347,11 @@

   vp9_tile_init(tile, cm, 0, 0);

 #if CONFIG_RATE_CTRL

-  fp_motion_vector_info_reset(cpi->frame_info.frame_width,

-                              cpi->frame_info.frame_height,

-                              cpi->fp_motion_vector_info);

+  if (cpi->oxcf.use_simple_encode_api) {

+    fp_motion_vector_info_reset(cpi->frame_info.frame_width,

+                                cpi->frame_info.frame_height,

+                                cpi->fp_motion_vector_info);

+  }

 #endif

   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {

@@ -1424,12 +1385,6 @@

   assert(new_yv12 != NULL);

   assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));

-#if CONFIG_FP_MB_STATS

-  if (cpi->use_fp_mb_stats) {

-    vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->initial_mbs);

-  }

-#endif

   set_first_pass_params(cpi);

   vp9_set_quantizer(cpi, find_fp_qindex(cm->bit_depth));

@@ -1490,12 +1445,6 @@

     twopass->this_frame_stats = fps;

     output_stats(&twopass->this_frame_stats);

     accumulate_stats(&twopass->total_stats, &fps);

-#if CONFIG_FP_MB_STATS

-    if (cpi->use_fp_mb_stats) {

-      output_fpmb_stats(twopass->frame_mb_stats_buf, cm, cpi->output_pkt_list);

-    }

-#endif

   // Copy the previous Last Frame back into gf and and arf buffers if

@@ -1807,61 +1756,60 @@

   twopass->arnr_strength_adjustment = 0;

-#define SR_DIFF_PART 0.0015

-#define INTRA_PART 0.005

-#define DEFAULT_DECAY_LIMIT 0.75

-#define LOW_SR_DIFF_TRHESH 0.1

-#define SR_DIFF_MAX 128.0

-#define LOW_CODED_ERR_PER_MB 10.0

-#define NCOUNT_FRAME_II_THRESH 6.0

-static double get_sr_decay_rate(const FRAME_INFO *frame_info,

+/* This function considers how the quality of prediction may be deteriorating

+ * with distance. It compares the coded error for the last frame and the

+ * second reference frame (usually two frames old) and also applies a factor

+ * based on the extent of INTRA coding.

+ *

+ * The decay factor is then used to reduce the contribution of frames further

+ * from the alt-ref or golden frame, to the bitrate boost calculation for that

+ * alt-ref or golden frame.

+ */

+static double get_sr_decay_rate(const TWO_PASS *const twopass,

                                 const FIRSTPASS_STATS *frame) {

   double sr_diff = (frame->sr_coded_error - frame->coded_error);

   double sr_decay = 1.0;

-  double modified_pct_inter;

-  double modified_pcnt_intra;

-  const double motion_amplitude_part =

-      frame->pcnt_motion *

-      ((frame->mvc_abs + frame->mvr_abs) /

-       (frame_info->frame_height + frame_info->frame_width));

-  modified_pct_inter = frame->pcnt_inter;

-  if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&

-      ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <

-       (double)NCOUNT_FRAME_II_THRESH)) {

-    modified_pct_inter =

-        frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral;

-  }

-  modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);

+  // Do nothing if the second ref to last frame error difference is

+  // very small or even negative.

   if ((sr_diff > LOW_SR_DIFF_TRHESH)) {

-    sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);

-    sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - motion_amplitude_part -

-               (INTRA_PART * modified_pcnt_intra);

+    const double sr_diff_part =

+        twopass->sr_diff_factor * ((sr_diff * 0.25) / frame->intra_error);

+    double modified_pct_inter = frame->pcnt_inter;

+    double modified_pcnt_intra;

+    if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&

+        ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <

+         (double)NCOUNT_FRAME_II_THRESH)) {

+      modified_pct_inter =

+          frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral;

+    }

+    modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);

+    sr_decay = 1.0 - sr_diff_part - (INTRA_PART * modified_pcnt_intra);

-  return VPXMAX(sr_decay, DEFAULT_DECAY_LIMIT);

+  return VPXMAX(sr_decay, twopass->sr_default_decay_limit);

 // This function gives an estimate of how badly we believe the prediction

 // quality is decaying from frame to frame.

-static double get_zero_motion_factor(const FRAME_INFO *frame_info,

+static double get_zero_motion_factor(const TWO_PASS *const twopass,

                                      const FIRSTPASS_STATS *frame_stats) {

   const double zero_motion_pct =

       frame_stats->pcnt_inter - frame_stats->pcnt_motion;

-  double sr_decay = get_sr_decay_rate(frame_info, frame_stats);

+  double sr_decay = get_sr_decay_rate(twopass, frame_stats);

   return VPXMIN(sr_decay, zero_motion_pct);

-#define ZM_POWER_FACTOR 0.75

-static double get_prediction_decay_rate(const FRAME_INFO *frame_info,

+static double get_prediction_decay_rate(const TWO_PASS *const twopass,

                                         const FIRSTPASS_STATS *frame_stats) {

-  const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats);

-  const double zero_motion_factor =

-      (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion),

-                  ZM_POWER_FACTOR));

+  const double sr_decay_rate = get_sr_decay_rate(twopass, frame_stats);

+  double zero_motion_factor =

+      twopass->zm_factor * (frame_stats->pcnt_inter - frame_stats->pcnt_motion);

+  // Check that the zero motion factor is valid

+  assert(zero_motion_factor >= 0.0 && zero_motion_factor <= 1.0);

   return VPXMAX(zero_motion_factor,

                 (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));

@@ -1942,10 +1890,9 @@

-#define BASELINE_ERR_PER_MB 12500.0

-#define GF_MAX_BOOST 96.0

 static double calc_frame_boost(const FRAME_INFO *frame_info,

                                const FIRSTPASS_STATS *this_frame,

+                               const TWO_PASS *const twopass,

                                int avg_frame_qindex,

                                double this_frame_mv_in_out) {

   double frame_boost;

@@ -1954,8 +1901,8 @@

   const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);

   const double active_area = calculate_active_area(frame_info, this_frame);

-  // Underlying boost factor is based on inter error ratio.

-  frame_boost = (BASELINE_ERR_PER_MB * active_area) /

+  // Frame booost is based on inter error.

+  frame_boost = (twopass->err_per_mb * active_area) /

                 DOUBLE_DIVIDE_CHECK(this_frame->coded_error);

   // Small adjustment for cases where there is a zoom out

@@ -1965,28 +1912,15 @@

   // Q correction and scalling

   frame_boost = frame_boost * boost_q_correction;

-  return VPXMIN(frame_boost, GF_MAX_BOOST * boost_q_correction);

+  return VPXMIN(frame_boost, twopass->gf_frame_max_boost * boost_q_correction);

-static double kf_err_per_mb(VP9_COMP *cpi) {

-  const VP9_COMMON *const cm = &cpi->common;

-  unsigned int screen_area = (cm->width * cm->height);

-  // Use a different error per mb factor for calculating boost for

-  //  different formats.

-  if (screen_area < 1280 * 720) {

-    return 2000.0;

-  } else if (screen_area < 1920 * 1080) {

-    return 500.0;

-  }

-  return 250.0;

-}

 static double calc_kf_frame_boost(VP9_COMP *cpi,

                                   const FIRSTPASS_STATS *this_frame,

                                   double *sr_accumulator,

                                   double this_frame_mv_in_out,

-                                  double max_boost) {

+                                  double zm_factor) {

+  TWO_PASS *const twopass = &cpi->twopass;

   double frame_boost;

   const double lq = vp9_convert_qindex_to_q(

       cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);

@@ -1993,9 +1927,10 @@

   const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00);

   const double active_area =

       calculate_active_area(&cpi->frame_info, this_frame);

+  double max_boost;

-  // Underlying boost factor is based on inter error ratio.

-  frame_boost = (kf_err_per_mb(cpi) * active_area) /

+  // Frame booost is based on inter error.

+  frame_boost = (twopass->kf_err_per_mb * active_area) /

                 DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator);

   // Update the accumulator for second ref error difference.

@@ -2012,15 +1947,23 @@

   // The 40.0 value here is an experimentally derived baseline minimum.

   // This value is in line with the minimum per frame boost in the alt_ref

   // boost calculation.

-  frame_boost = ((frame_boost + 40.0) * boost_q_correction);

+  frame_boost =

+      (frame_boost + twopass->kf_frame_min_boost) * boost_q_correction;

-  return VPXMIN(frame_boost, max_boost * boost_q_correction);

+  // Maximum allowed boost this frame. May be different for first vs subsequent

+  // key frames.

+  max_boost = (cpi->common.current_video_frame == 0)

+                  ? twopass->kf_frame_max_boost_first

+                  : twopass->kf_frame_max_boost_subs;

+  max_boost *= zm_factor * boost_q_correction;

+  return VPXMIN(frame_boost, max_boost);

 static int compute_arf_boost(const FRAME_INFO *frame_info,

-                             const FIRST_PASS_INFO *first_pass_info,

-                             int arf_show_idx, int f_frames, int b_frames,

-                             int avg_frame_qindex) {

+                             TWO_PASS *const twopass, int arf_show_idx,

+                             int f_frames, int b_frames, int avg_frame_qindex) {

+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;

   int i;

   double boost_score = 0.0;

   double mv_ratio_accumulator = 0.0;

@@ -2051,14 +1994,14 @@

     // Accumulate the effect of prediction quality decay.

     if (!flash_detected) {

-      decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);

+      decay_accumulator *= get_prediction_decay_rate(twopass, this_frame);

       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR

                               ? MIN_DECAY_FACTOR

                               : decay_accumulator;

-    boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,

-                                                        avg_frame_qindex,

-                                                        this_frame_mv_in_out);

+    boost_score += decay_accumulator *

+                   calc_frame_boost(frame_info, this_frame, twopass,

+                                    avg_frame_qindex, this_frame_mv_in_out);

   arf_boost = (int)boost_score;

@@ -2091,14 +2034,14 @@

     // Cumulative effect of prediction quality decay.

     if (!flash_detected) {

-      decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);

+      decay_accumulator *= get_prediction_decay_rate(twopass, this_frame);

       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR

                               ? MIN_DECAY_FACTOR

                               : decay_accumulator;

-    boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,

-                                                        avg_frame_qindex,

-                                                        this_frame_mv_in_out);

+    boost_score += decay_accumulator *

+                   calc_frame_boost(frame_info, this_frame, twopass,

+                                    avg_frame_qindex, this_frame_mv_in_out);

   arf_boost += (int)boost_score;

@@ -2114,8 +2057,8 @@

   TWO_PASS *const twopass = &cpi->twopass;

   const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME];

   int arf_show_idx = get_show_idx(twopass);

-  return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx,

-                           f_frames, b_frames, avg_inter_frame_qindex);

+  return compute_arf_boost(frame_info, twopass, arf_show_idx, f_frames,

+                           b_frames, avg_inter_frame_qindex);

 // Calculate a section intra ratio used in setting max loop filter.

@@ -2530,6 +2473,9 @@

  * (The following fields will remain unchanged after initialization of encoder.)

  *   rc->static_scene_max_gf_interval

  *   rc->min_gf_interval

+ *   twopass->sr_diff_factor

+ *   twopass->sr_default_decay_limit

+ *   twopass->zm_factor

  * Dynamic fields:

  * (The following fields will be updated before or after coding each frame.)

@@ -2545,9 +2491,10 @@

*/

 static int get_gop_coding_frame_num(

     int *use_alt_ref, const FRAME_INFO *frame_info,

-    const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc,

+    const TWO_PASS *const twopass, const RATE_CONTROL *rc,

     int gf_start_show_idx, const RANGE *active_gf_interval,

     double gop_intra_factor, int lag_in_frames) {

+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;

   double loop_decay_rate = 1.00;

   double mv_ratio_accumulator = 0.0;

   double this_frame_mv_in_out = 0.0;

@@ -2588,15 +2535,14 @@

     // Monitor for static sections.

     if ((rc->frames_since_key + gop_coding_frames - 1) > 1) {

-      zero_motion_accumulator =

-          VPXMIN(zero_motion_accumulator,

-                 get_zero_motion_factor(frame_info, next_frame));

+      zero_motion_accumulator = VPXMIN(

+          zero_motion_accumulator, get_zero_motion_factor(twopass, next_frame));

     // Accumulate the effect of prediction quality decay.

     if (!flash_detected) {

       double last_loop_decay_rate = loop_decay_rate;

-      loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);

+      loop_decay_rate = get_prediction_decay_rate(twopass, next_frame);

       // Break clause to detect very still sections after motion. For example,

       // a static image after a fade or other transition.

@@ -2656,25 +2602,25 @@

   return gop_coding_frames;

-static RANGE get_active_gf_inverval_range(

-    const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf,

-    int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) {

+static RANGE get_active_gf_inverval_range_simple(int min_gf_interval,

+                                                 int arf_active_or_kf,

+                                                 int frames_to_key) {

   RANGE active_gf_interval;

-#if CONFIG_RATE_CTRL

-  (void)frame_info;

-  (void)gf_start_show_idx;

-  (void)active_worst_quality;

-  (void)last_boosted_qindex;

-  active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2;

+  active_gf_interval.min = min_gf_interval + arf_active_or_kf + 2;

   active_gf_interval.max = 16 + arf_active_or_kf;

-  if ((active_gf_interval.max <= rc->frames_to_key) &&

-      (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) {

-    active_gf_interval.min = rc->frames_to_key / 2;

-    active_gf_interval.max = rc->frames_to_key / 2;

+  if ((active_gf_interval.max <= frames_to_key) &&

+      (active_gf_interval.max >= (frames_to_key - min_gf_interval))) {

+    active_gf_interval.min = frames_to_key / 2;

+    active_gf_interval.max = frames_to_key / 2;

-#else

+  return active_gf_interval;

+}

+static RANGE get_active_gf_inverval_range(

+    const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf,

+    int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) {

+  RANGE active_gf_interval;

   int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality,

                                                 frame_info->bit_depth));

   int q_term = (gf_start_show_idx == 0)

@@ -2712,7 +2658,6 @@

   active_gf_interval.max =

       VPXMAX(active_gf_interval.max, active_gf_interval.min);

-#endif

   return active_gf_interval;

@@ -2773,9 +2718,14 @@

   vpx_clear_system_state();

-  active_gf_interval = get_active_gf_inverval_range(

-      frame_info, rc, arf_active_or_kf, gf_start_show_idx,

-      twopass->active_worst_quality, rc->last_boosted_qindex);

+  if (oxcf->use_simple_encode_api) {

+    active_gf_interval = get_active_gf_inverval_range_simple(

+        rc->min_gf_interval, arf_active_or_kf, rc->frames_to_key);

+  } else {

+    active_gf_interval = get_active_gf_inverval_range(

+        frame_info, rc, arf_active_or_kf, gf_start_show_idx,

+        twopass->active_worst_quality, rc->last_boosted_qindex);

+  }

   if (cpi->multi_layer_arf) {

     int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf,

@@ -2785,25 +2735,21 @@

     gop_intra_factor = 1.0;

+  gop_coding_frames = get_gop_coding_frame_num(

+      &use_alt_ref, frame_info, twopass, rc, gf_start_show_idx,

+      &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);

+  use_alt_ref &= allow_alt_ref;

 #if CONFIG_RATE_CTRL

-  {

+  // If the external gop_command is on, we will override the decisions

+  // of gop_coding_frames and use_alt_ref.

+  if (cpi->oxcf.use_simple_encode_api) {

     const GOP_COMMAND *gop_command = &cpi->encode_command.gop_command;

     assert(allow_alt_ref == 1);

     if (gop_command->use) {

       gop_coding_frames = gop_command_coding_frame_count(gop_command);

       use_alt_ref = gop_command->use_alt_ref;

-    } else {

-      gop_coding_frames = get_gop_coding_frame_num(

-          &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,

-          &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);

-      use_alt_ref &= allow_alt_ref;

-#else

-  gop_coding_frames = get_gop_coding_frame_num(

-      &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,

-      &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);

-  use_alt_ref &= allow_alt_ref;

 #endif

   // Was the group length constrained by the requirement for a new KF?

@@ -2823,8 +2769,8 @@

     // Calculate the boost for alt ref.

     rc->gfu_boost =

-        compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames,

-                          b_frames, avg_inter_frame_qindex);

+        compute_arf_boost(frame_info, twopass, arf_show_idx, f_frames, b_frames,

+                          avg_inter_frame_qindex);

     rc->source_alt_ref_pending = 1;

   } else {

     const int f_frames = gop_coding_frames - 1;

@@ -2834,9 +2780,9 @@

     const int gld_show_idx =

         VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info));

     const int arf_boost =

-        compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames,

-                          b_frames, avg_inter_frame_qindex);

-    rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost);

+        compute_arf_boost(frame_info, twopass, gld_show_idx, f_frames, b_frames,

+                          avg_inter_frame_qindex);

+    rc->gfu_boost = VPXMIN((int)twopass->gf_max_total_boost, arf_boost);

     rc->source_alt_ref_pending = 0;

@@ -2939,7 +2885,9 @@

         cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone),

         group_av_noise, vbr_group_bits_per_frame);

     twopass->active_worst_quality =

-        (tmp_q + (twopass->active_worst_quality * 3)) >> 2;

+        (int)((tmp_q + (twopass->active_worst_quality *

+                        (twopass->active_wq_factor - 1))) /

+              twopass->active_wq_factor);

 #if CONFIG_ALWAYS_ADJUST_BPM

     // Reset rolling actual and target bits counters for ARF groups.

@@ -3159,18 +3107,10 @@

 #define MIN_SCAN_FRAMES_FOR_KF_BOOST 32

 #define KF_ABS_ZOOM_THRESH 6.0

-#ifdef AGGRESSIVE_VBR

-#define KF_MAX_FRAME_BOOST 80.0

-#define MAX_KF_TOT_BOOST 4800

-#else

-#define KF_MAX_FRAME_BOOST 96.0

-#define MAX_KF_TOT_BOOST 5400

-#endif

 int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf,

-                               const FRAME_INFO *frame_info,

-                               const FIRST_PASS_INFO *first_pass_info,

-                               int kf_show_idx, int min_gf_interval) {

+                               const TWO_PASS *const twopass, int kf_show_idx,

+                               int min_gf_interval) {

+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;

   double recent_loop_decay[FRAMES_TO_CHECK_DECAY];

   int j;

   int frames_to_key;

@@ -3197,7 +3137,7 @@

           break;

         // How fast is the prediction quality decaying?

-        loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);

+        loop_decay_rate = get_prediction_decay_rate(twopass, next_frame);

         // We want to know something about the recent past... rather than

         // as used elsewhere where we are concerned with decay in prediction

@@ -3283,8 +3223,8 @@

   kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats,

                                      mean_mod_score, av_err);

-  rc->frames_to_key = vp9_get_frames_to_next_key(

-      oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval);

+  rc->frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, kf_show_idx,

+                                                 rc->min_gf_interval);

   // If there is a max kf interval set by the user we must obey it.

   // We already breakout of the loop above at 2x max.

@@ -3366,7 +3306,7 @@

       if (i > 0) {

         zero_motion_accumulator =

             VPXMIN(zero_motion_accumulator,

-                   get_zero_motion_factor(&cpi->frame_info, &next_frame));

+                   get_zero_motion_factor(twopass, &next_frame));

       } else {

         zero_motion_accumulator =

             next_frame.pcnt_inter - next_frame.pcnt_motion;

@@ -3380,8 +3320,8 @@

       // the first key frame or it points to a refernce before the new key

       // frame.

       if (i < 2) sr_accumulator = 0.0;

-      frame_boost = calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0,

-                                        KF_MAX_FRAME_BOOST * zm_factor);

+      frame_boost =

+          calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0, zm_factor);

       boost_score += frame_boost;

@@ -3410,12 +3350,12 @@

   // Special case for static / slide show content but dont apply

   // if the kf group is very short.

   if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {

-    rc->kf_boost = MAX_KF_TOT_BOOST;

+    rc->kf_boost = (int)(twopass->kf_max_total_boost);

   } else {

-    // Apply various clamps for min and max boost

+    // Apply various clamps for min and max oost

     rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));

     rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);

-    rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);

+    rc->kf_boost = VPXMIN(rc->kf_boost, (int)(twopass->kf_max_total_boost));

   // Work out how many bits to allocate for the key frame itself.

@@ -3451,23 +3391,66 @@

-static int is_skippable_frame(const VP9_COMP *cpi) {

-  // If the current frame does not have non-zero motion vector detected in the

-  // first  pass, and so do its previous and forward frames, then this frame

-  // can be skipped for partition check, and the partition size is assigned

-  // according to the variance

-  const TWO_PASS *const twopass = &cpi->twopass;

+// Configure image size specific vizier parameters.

+// Later these will be set via additional command line options

+void vp9_init_vizier_params(TWO_PASS *const twopass, int screen_area) {

+  // When |use_vizier_rc_params| is 1, we expect the rc parameters below to

+  // have been initialised on the command line as adjustment factors such

+  // that a factor of 1.0 will match the default behavior when

+  // |use_vizier_rc_params| is 0

+  if (twopass->use_vizier_rc_params) {

+    twopass->active_wq_factor *= AV_WQ_FACTOR;

+    twopass->err_per_mb *= BASELINE_ERR_PER_MB;

+    twopass->sr_default_decay_limit *= DEFAULT_DECAY_LIMIT;

+    if (twopass->sr_default_decay_limit > 1.0)  // > 1.0 here makes no sense

+      twopass->sr_default_decay_limit = 1.0;

+    twopass->sr_diff_factor *= 1.0;

+    twopass->gf_frame_max_boost *= GF_MAX_FRAME_BOOST;

+    twopass->gf_max_total_boost *= MAX_GF_BOOST;

+    // NOTE: In use max boost has precedence over min boost. So even if min is

+    // somehow set higher than max the final boost value will be clamped to the

+    // appropriate maximum.

+    twopass->kf_frame_min_boost *= KF_MIN_FRAME_BOOST;

+    twopass->kf_frame_max_boost_first *= KF_MAX_FRAME_BOOST;

+    twopass->kf_frame_max_boost_subs *= KF_MAX_FRAME_BOOST;

+    twopass->kf_max_total_boost *= MAX_KF_TOT_BOOST;

+    twopass->zm_factor *= DEFAULT_ZM_FACTOR;

+    if (twopass->zm_factor > 1.0)  // > 1.0 here makes no sense

+      twopass->zm_factor = 1.0;

-  return (!frame_is_intra_only(&cpi->common) &&

-          twopass->stats_in - 2 > twopass->stats_in_start &&

-          twopass->stats_in < twopass->stats_in_end &&

-          (twopass->stats_in - 1)->pcnt_inter -

-                  (twopass->stats_in - 1)->pcnt_motion ==

-              1 &&

-          (twopass->stats_in - 2)->pcnt_inter -

-                  (twopass->stats_in - 2)->pcnt_motion ==

-              1 &&

-          twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);

+    // Correction for the fact that the kf_err_per_mb_factor default is

+    // already different for different video formats and ensures that a passed

+    // in value of 1.0 on the vizier command line will still match the current

+    // default.

+    if (screen_area < 1280 * 720) {

+      twopass->kf_err_per_mb *= 2000.0;

+    } else if (screen_area < 1920 * 1080) {

+      twopass->kf_err_per_mb *= 500.0;

+    } else {

+      twopass->kf_err_per_mb *= 250.0;

+    }

+  } else {

+    // When |use_vizier_rc_params| is 0, use defaults.

+    twopass->active_wq_factor = AV_WQ_FACTOR;

+    twopass->err_per_mb = BASELINE_ERR_PER_MB;

+    twopass->sr_default_decay_limit = DEFAULT_DECAY_LIMIT;

+    twopass->sr_diff_factor = 1.0;

+    twopass->gf_frame_max_boost = GF_MAX_FRAME_BOOST;

+    twopass->gf_max_total_boost = MAX_GF_BOOST;

+    twopass->kf_frame_min_boost = KF_MIN_FRAME_BOOST;

+    twopass->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;

+    twopass->kf_frame_max_boost_subs = KF_MAX_FRAME_BOOST;

+    twopass->kf_max_total_boost = MAX_KF_TOT_BOOST;

+    twopass->zm_factor = DEFAULT_ZM_FACTOR;

+    if (screen_area < 1280 * 720) {

+      twopass->kf_err_per_mb = 2000.0;

+    } else if (screen_area < 1920 * 1080) {

+      twopass->kf_err_per_mb = 500.0;

+    } else {

+      twopass->kf_err_per_mb = 250.0;

+    }

+  }

 void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {

@@ -3480,6 +3463,13 @@

   if (!twopass->stats_in) return;

+  // Configure image size specific vizier parameters

+  if (cm->current_video_frame == 0) {

+    unsigned int screen_area = (cm->width * cm->height);

+    vp9_init_vizier_params(twopass, screen_area);

+  }

   // If this is an arf frame then we dont want to read the stats file or

   // advance the input pointer as we already have what we need.

   if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {

@@ -3498,13 +3488,6 @@

     cm->frame_type = INTER_FRAME;

-    // Do the firstpass stats indicate that this frame is skippable for the

-    // partition search?

-    if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&

-        !cpi->use_svc) {

-      cpi->partition_search_skippable_frame = is_skippable_frame(cpi);

-    }

     // The multiplication by 256 reverses a scaling factor of (>> 8)

     // applied when combining MB error values for the frame.

     twopass->mb_av_energy = log((this_frame.intra_error * 256.0) + 1.0);

@@ -3587,13 +3570,6 @@

   vp9_configure_buffer_updates(cpi, gf_group->index);

-  // Do the firstpass stats indicate that this frame is skippable for the

-  // partition search?

-  if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&

-      !cpi->use_svc) {

-    cpi->partition_search_skippable_frame = is_skippable_frame(cpi);

-  }

   rc->base_frame_target = gf_group->bit_allocation[gf_group->index];

   // The multiplication by 256 reverses a scaling factor of (>> 8)

@@ -3605,9 +3581,6 @@

   subtract_stats(&twopass->total_left_stats, &this_frame);

-#define MINQ_ADJ_LIMIT 48

-#define MINQ_ADJ_LIMIT_CQ 20

-#define HIGH_UNDERSHOOT_RATIO 2

 void vp9_twopass_postencode_update(VP9_COMP *cpi) {

   TWO_PASS *const twopass = &cpi->twopass;

   RATE_CONTROL *const rc = &cpi->rc;

@@ -3747,8 +3720,7 @@

   *first_is_key_frame = 0;

   if (rc.frames_to_key == 0) {

     rc.frames_to_key = vp9_get_frames_to_next_key(

-        &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info,

-        *first_show_idx, rc.min_gf_interval);

+        &cpi->oxcf, &cpi->twopass, *first_show_idx, rc.min_gf_interval);

     rc.frames_since_key = 0;

     *first_is_key_frame = 1;

@@ -3756,18 +3728,18 @@

   if (gop_command->use) {

     *coding_frame_count = gop_command_coding_frame_count(gop_command);

     *use_alt_ref = gop_command->use_alt_ref;

-    assert(*coding_frame_count < rc.frames_to_key);

+    assert(gop_command->show_frame_count <= rc.frames_to_key);

   } else {

     *coding_frame_count = vp9_get_gop_coding_frame_count(

-        &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, &rc,

-        *first_show_idx, multi_layer_arf, allow_alt_ref, *first_is_key_frame,

+        &cpi->oxcf, &cpi->twopass, &cpi->frame_info, &rc, *first_show_idx,

+        multi_layer_arf, allow_alt_ref, *first_is_key_frame,

         *last_gop_use_alt_ref, use_alt_ref);

 int vp9_get_gop_coding_frame_count(const VP9EncoderConfig *oxcf,

+                                   const TWO_PASS *const twopass,

                                    const FRAME_INFO *frame_info,

-                                   const FIRST_PASS_INFO *first_pass_info,

                                    const RATE_CONTROL *rc, int show_idx,

                                    int multi_layer_arf, int allow_alt_ref,

                                    int first_is_key_frame,

@@ -3775,12 +3747,19 @@

   int frame_count;

   double gop_intra_factor;

   const int arf_active_or_kf = last_gop_use_alt_ref || first_is_key_frame;

-  RANGE active_gf_interval = get_active_gf_inverval_range(

-      frame_info, rc, arf_active_or_kf, show_idx, /*active_worst_quality=*/0,

-      /*last_boosted_qindex=*/0);

+  RANGE active_gf_interval;

+  int arf_layers;

+  if (oxcf->use_simple_encode_api) {

+    active_gf_interval = get_active_gf_inverval_range_simple(

+        rc->min_gf_interval, arf_active_or_kf, rc->frames_to_key);

+  } else {

+    active_gf_interval = get_active_gf_inverval_range(

+        frame_info, rc, arf_active_or_kf, show_idx, /*active_worst_quality=*/0,

+        /*last_boosted_qindex=*/0);

+  }

-  const int arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf,

-                                        active_gf_interval.max);

+  arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf,

+                              active_gf_interval.max);

   if (multi_layer_arf) {

     gop_intra_factor = 1.0 + 0.25 * arf_layers;

   } else {

@@ -3787,9 +3766,9 @@

     gop_intra_factor = 1.0;

-  frame_count = get_gop_coding_frame_num(

-      use_alt_ref, frame_info, first_pass_info, rc, show_idx,

-      &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames);

+  frame_count = get_gop_coding_frame_num(use_alt_ref, frame_info, twopass, rc,

+                                         show_idx, &active_gf_interval,

+                                         gop_intra_factor, oxcf->lag_in_frames);

   *use_alt_ref &= allow_alt_ref;

   return frame_count;

@@ -3797,9 +3776,10 @@

 // Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of

 // coding frames (including show frame and alt ref) can be determined.

 int vp9_get_coding_frame_num(const VP9EncoderConfig *oxcf,

-                             const FRAME_INFO *frame_info,

-                             const FIRST_PASS_INFO *first_pass_info,

-                             int multi_layer_arf, int allow_alt_ref) {

+                             const TWO_PASS *const twopass,

+                             const FRAME_INFO *frame_info, int multi_layer_arf,

+                             int allow_alt_ref) {

+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;

   int coding_frame_num = 0;

   RATE_CONTROL rc;

   int gop_coding_frame_count;

@@ -3812,14 +3792,14 @@

     int use_alt_ref;

     int first_is_key_frame = 0;

     if (rc.frames_to_key == 0) {

-      rc.frames_to_key = vp9_get_frames_to_next_key(

-          oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);

+      rc.frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, show_idx,

+                                                    rc.min_gf_interval);

       rc.frames_since_key = 0;

       first_is_key_frame = 1;

     gop_coding_frame_count = vp9_get_gop_coding_frame_count(

-        oxcf, frame_info, first_pass_info, &rc, show_idx, multi_layer_arf,

+        oxcf, twopass, frame_info, &rc, show_idx, multi_layer_arf,

         allow_alt_ref, first_is_key_frame, last_gop_use_alt_ref, &use_alt_ref);

     rc.source_alt_ref_active = use_alt_ref;

@@ -3834,9 +3814,8 @@

 void vp9_get_key_frame_map(const VP9EncoderConfig *oxcf,

-                           const FRAME_INFO *frame_info,

-                           const FIRST_PASS_INFO *first_pass_info,

-                           int *key_frame_map) {

+                           const TWO_PASS *const twopass, int *key_frame_map) {

+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;

   int show_idx = 0;

   RATE_CONTROL rc;

   vp9_rc_init(oxcf, 1, &rc);

@@ -3849,8 +3828,8 @@

   while (show_idx < first_pass_info->num_frames) {

     int key_frame_group_size;

     key_frame_map[show_idx] = 1;

-    key_frame_group_size = vp9_get_frames_to_next_key(

-        oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);

+    key_frame_group_size =

+        vp9_get_frames_to_next_key(oxcf, twopass, show_idx, rc.min_gf_interval);

     assert(key_frame_group_size > 0);

     show_idx += key_frame_group_size;

--- a/vp9/encoder/vp9_firstpass.h

+++ b/vp9/encoder/vp9_firstpass.h

@@ -21,27 +21,6 @@

 extern "C" {

 #endif

-#if CONFIG_FP_MB_STATS

-#define FPMB_DCINTRA_MASK 0x01

-#define FPMB_MOTION_ZERO_MASK 0x02

-#define FPMB_MOTION_LEFT_MASK 0x04

-#define FPMB_MOTION_RIGHT_MASK 0x08

-#define FPMB_MOTION_UP_MASK 0x10

-#define FPMB_MOTION_DOWN_MASK 0x20

-#define FPMB_ERROR_SMALL_MASK 0x40

-#define FPMB_ERROR_LARGE_MASK 0x80

-#define FPMB_ERROR_SMALL_TH 2000

-#define FPMB_ERROR_LARGE_TH 48000

-typedef struct {

-  uint8_t *mb_stats_start;

-  uint8_t *mb_stats_end;

-} FIRSTPASS_MB_STATS;

-#endif

 #define INVALID_ROW (-1)

 #define MAX_ARF_LAYERS 6

@@ -188,12 +167,6 @@

   double mb_av_energy;

   double mb_smooth_pct;

-#if CONFIG_FP_MB_STATS

-  uint8_t *frame_mb_stats_buf;

-  uint8_t *this_frame_mb_stats;

-  FIRSTPASS_MB_STATS firstpass_mb_stats;

-#endif

   FP_MB_FLOAT_STATS *fp_mb_float_stats;

   // An indication of the content type of the current frame

@@ -221,6 +194,24 @@

   int last_qindex_of_arf_layer[MAX_ARF_LAYERS];

   GF_GROUP gf_group;

+  // Vizeir project experimental two pass rate control parameters.

+  // When |use_vizier_rc_params| is 1, the following parameters will

+  // be overwritten by pass in values. Otherwise, they are initialized

+  // by default values.

+  int use_vizier_rc_params;

+  double active_wq_factor;

+  double err_per_mb;

+  double sr_default_decay_limit;

+  double sr_diff_factor;

+  double kf_err_per_mb;

+  double kf_frame_min_boost;

+  double kf_frame_max_boost_first;  // Max for first kf in a chunk.

+  double kf_frame_max_boost_subs;   // Max for subsequent mid chunk kfs.

+  double kf_max_total_boost;

+  double gf_max_total_boost;

+  double gf_frame_max_boost;

+  double zm_factor;

 } TWO_PASS;

 struct VP9_COMP;

@@ -239,6 +230,7 @@

 void vp9_init_second_pass(struct VP9_COMP *cpi);

 void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);

+void vp9_init_vizier_params(TWO_PASS *const twopass, int screen_area);

 // Post encode update of the rate control parameters for 2-pass

 void vp9_twopass_postencode_update(struct VP9_COMP *cpi);

@@ -248,9 +240,8 @@

 struct VP9EncoderConfig;

 int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf,

-                               const FRAME_INFO *frame_info,

-                               const FIRST_PASS_INFO *first_pass_info,

-                               int kf_show_idx, int min_gf_interval);

+                               const TWO_PASS *const twopass, int kf_show_idx,

+                               int min_gf_interval);

 #if CONFIG_RATE_CTRL

 /* Call this function to get info about the next group of pictures.

  * This function should be called after vp9_create_compressor() when encoding

@@ -265,8 +256,8 @@

 /*!\brief Call this function before coding a new group of pictures to get

  * information about it.

  * \param[in] oxcf                 Encoder config

+ * \param[in] twopass              Twopass info

  * \param[in] frame_info           Frame info

- * \param[in] first_pass_info      First pass stats

  * \param[in] rc                   Rate control state

  * \param[in] show_idx             Show index of the first frame in the group

  * \param[in] multi_layer_arf      Is multi-layer alternate reference used

@@ -279,8 +270,8 @@

  * \return Returns coding frame count

*/

 int vp9_get_gop_coding_frame_count(const struct VP9EncoderConfig *oxcf,

+                                   const TWO_PASS *const twopass,

                                    const FRAME_INFO *frame_info,

-                                   const FIRST_PASS_INFO *first_pass_info,

                                    const RATE_CONTROL *rc, int show_idx,

                                    int multi_layer_arf, int allow_alt_ref,

                                    int first_is_key_frame,

@@ -287,19 +278,17 @@

                                    int last_gop_use_alt_ref, int *use_alt_ref);

 int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,

-                             const FRAME_INFO *frame_info,

-                             const FIRST_PASS_INFO *first_pass_info,

-                             int multi_layer_arf, int allow_alt_ref);

+                             const TWO_PASS *const twopass,

+                             const FRAME_INFO *frame_info, int multi_layer_arf,

+                             int allow_alt_ref);

 /*!\brief Compute a key frame binary map indicates whether key frames appear

  * in the corresponding positions. The passed in key_frame_map must point to an

- * integer array with length equal to first_pass_info->num_frames, which is the

- * number of show frames in the video.

+ * integer array with length equal to twopass->first_pass_info.num_frames,

+ * which is the number of show frames in the video.

*/

 void vp9_get_key_frame_map(const struct VP9EncoderConfig *oxcf,

-                           const FRAME_INFO *frame_info,

-                           const FIRST_PASS_INFO *first_pass_info,

-                           int *key_frame_map);

+                           const TWO_PASS *const twopass, int *key_frame_map);

 #endif  // CONFIG_RATE_CTRL

 FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass);

--- a/vp9/encoder/vp9_lookahead.h

+++ b/vp9/encoder/vp9_lookahead.h

@@ -82,15 +82,11 @@

  * This function will copy the source image into a new framebuffer with

  * the expected stride/border.

- * If active_map is non-NULL and there is only one frame in the queue, then copy

- * only active macroblocks.

- *

  * \param[in] ctx         Pointer to the lookahead context

  * \param[in] src         Pointer to the image to enqueue

  * \param[in] ts_start    Timestamp for the start of this frame

  * \param[in] ts_end      Timestamp for the end of this frame

  * \param[in] flags       Flags set on this frame

- * \param[in] active_map  Map that specifies which macroblock is active

*/

 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,

                        int64_t ts_start, int64_t ts_end, int use_highbitdepth,

--- a/vp9/encoder/vp9_ratectrl.c

+++ b/vp9/encoder/vp9_ratectrl.c

@@ -39,9 +39,6 @@

 #define MAX_MB_RATE 250

 #define MAXRATE_1080P 4000000

-#define DEFAULT_KF_BOOST 2000

-#define DEFAULT_GF_BOOST 2000

 #define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1

 #define MIN_BPB_FACTOR 0.005

@@ -280,9 +277,9 @@

         svc->current_superframe > 0) {

       // TODO(marpan): This may need to be modified for temporal layers.

       const double framerate_pts = 10000000.0 / ts_delta;

-      lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts);

+      lrc->bits_off_target += (int)round(lc->target_bandwidth / framerate_pts);

     } else {

-      lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate);

+      lrc->bits_off_target += (int)round(lc->target_bandwidth / lc->framerate);

     // Clip buffer level to maximum buffer size for the layer.

     lrc->bits_off_target =

@@ -410,6 +407,7 @@

   rc->source_alt_ref_active = 0;

   rc->frames_till_gf_update_due = 0;

+  rc->constrain_gf_key_freq_onepass_vbr = 1;

   rc->ni_av_qi = oxcf->worst_allowed_q;

   rc->ni_tot_qi = 0;

   rc->ni_frames = 0;

@@ -1720,10 +1718,12 @@

 #if CONFIG_RATE_CTRL

-  if (cpi->encode_command.use_external_target_frame_bits) {

-    rc->this_frame_target = cpi->encode_command.target_frame_bits;

+  if (cpi->oxcf.use_simple_encode_api) {

+    if (cpi->encode_command.use_external_target_frame_bits) {

+      rc->this_frame_target = cpi->encode_command.target_frame_bits;

+    }

-#endif

+#endif  // CONFIG_RATE_CTRL

   // Target rate per SB64 (including partial SB64s.

   rc->sb64_target_rate = (int)(((int64_t)rc->this_frame_target * 64 * 64) /

@@ -2009,7 +2009,7 @@

-static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {

+int vp9_calc_pframe_target_size_one_pass_vbr(const VP9_COMP *cpi) {

   const RATE_CONTROL *const rc = &cpi->rc;

   const int af_ratio = rc->af_ratio_onepass_vbr;

   int64_t target =

@@ -2024,7 +2024,7 @@

   return vp9_rc_clamp_pframe_target_size(cpi, (int)target);

-static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {

+int vp9_calc_iframe_target_size_one_pass_vbr(const VP9_COMP *cpi) {

   static const int kf_ratio = 25;

   const RATE_CONTROL *rc = &cpi->rc;

   const int target = rc->avg_frame_bandwidth * kf_ratio;

@@ -2050,22 +2050,9 @@

-void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {

-  VP9_COMMON *const cm = &cpi->common;

+void vp9_set_gf_update_one_pass_vbr(VP9_COMP *const cpi) {

   RATE_CONTROL *const rc = &cpi->rc;

-  int target;

-  if (!cpi->refresh_alt_ref_frame &&

-      (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||

-       rc->frames_to_key == 0)) {

-    cm->frame_type = KEY_FRAME;

-    rc->this_key_frame_forced =

-        cm->current_video_frame != 0 && rc->frames_to_key == 0;

-    rc->frames_to_key = cpi->oxcf.key_freq;

-    rc->kf_boost = DEFAULT_KF_BOOST;

-    rc->source_alt_ref_active = 0;

-  } else {

-    cm->frame_type = INTER_FRAME;

-  }

+  VP9_COMMON *const cm = &cpi->common;

   if (rc->frames_till_gf_update_due == 0) {

     double rate_err = 1.0;

     rc->gfu_boost = DEFAULT_GF_BOOST;

@@ -2084,18 +2071,23 @@

           rate_err > 3.5) {

         rc->baseline_gf_interval =

             VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1);

-      } else if (rc->avg_frame_low_motion < 20) {

+      } else if (rc->avg_frame_low_motion > 0 &&

+                 rc->avg_frame_low_motion < 20) {

         // Decrease gf interval for high motion case.

         rc->baseline_gf_interval = VPXMAX(6, rc->baseline_gf_interval >> 1);

-      // Adjust boost and af_ratio based on avg_frame_low_motion, which varies

-      // between 0 and 100 (stationary, 100% zero/small motion).

-      rc->gfu_boost =

-          VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /

-                          (rc->avg_frame_low_motion + 100));

+      // Adjust boost and af_ratio based on avg_frame_low_motion, which

+      // varies between 0 and 100 (stationary, 100% zero/small motion).

+      if (rc->avg_frame_low_motion > 0)

+        rc->gfu_boost =

+            VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /

+                            (rc->avg_frame_low_motion + 100));

+      else if (rc->avg_frame_low_motion == 0 && rate_err > 1.0)

+        rc->gfu_boost = DEFAULT_GF_BOOST >> 1;

       rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));

-    adjust_gfint_frame_constraint(cpi, rc->frames_to_key);

+    if (rc->constrain_gf_key_freq_onepass_vbr)

+      adjust_gfint_frame_constraint(cpi, rc->frames_to_key);

     rc->frames_till_gf_update_due = rc->baseline_gf_interval;

     cpi->refresh_golden_frame = 1;

     rc->source_alt_ref_pending = 0;

@@ -2105,10 +2097,29 @@

       rc->alt_ref_gf_group = 1;

+}

+void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {

+  VP9_COMMON *const cm = &cpi->common;

+  RATE_CONTROL *const rc = &cpi->rc;

+  int target;

+  if (!cpi->refresh_alt_ref_frame &&

+      (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||

+       rc->frames_to_key == 0)) {

+    cm->frame_type = KEY_FRAME;

+    rc->this_key_frame_forced =

+        cm->current_video_frame != 0 && rc->frames_to_key == 0;

+    rc->frames_to_key = cpi->oxcf.key_freq;

+    rc->kf_boost = DEFAULT_KF_BOOST;

+    rc->source_alt_ref_active = 0;

+  } else {

+    cm->frame_type = INTER_FRAME;

+  }

+  vp9_set_gf_update_one_pass_vbr(cpi);

   if (cm->frame_type == KEY_FRAME)

-    target = calc_iframe_target_size_one_pass_vbr(cpi);

+    target = vp9_calc_iframe_target_size_one_pass_vbr(cpi);

   else

-    target = calc_pframe_target_size_one_pass_vbr(cpi);

+    target = vp9_calc_pframe_target_size_one_pass_vbr(cpi);

   vp9_rc_set_frame_target(cpi, target);

   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0)

     vp9_cyclic_refresh_update_parameters(cpi);

@@ -2526,26 +2537,25 @@

     rc->min_gf_interval = FIXED_GF_INTERVAL;

     rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;

   } else {

+    double framerate = cpi->framerate;

     // Set Maximum gf/arf interval

     rc->max_gf_interval = oxcf->max_gf_interval;

     rc->min_gf_interval = oxcf->min_gf_interval;

 #if CONFIG_RATE_CTRL

+    if (oxcf->use_simple_encode_api) {

+      // In this experiment, we avoid framerate being changed dynamically during

+      // encoding.

+      framerate = oxcf->init_framerate;

+    }

+#endif  // CONFIG_RATE_CTRL

     if (rc->min_gf_interval == 0) {

       rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(

-          oxcf->width, oxcf->height, oxcf->init_framerate);

+          oxcf->width, oxcf->height, framerate);

     if (rc->max_gf_interval == 0) {

-      rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(

-          oxcf->init_framerate, rc->min_gf_interval);

+      rc->max_gf_interval =

+          vp9_rc_get_default_max_gf_interval(framerate, rc->min_gf_interval);

-#else

-    if (rc->min_gf_interval == 0)

-      rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(

-          oxcf->width, oxcf->height, cpi->framerate);

-    if (rc->max_gf_interval == 0)

-      rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(

-          cpi->framerate, rc->min_gf_interval);

-#endif

     // Extended max interval for genuinely static scenes like slide shows.

     rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;

@@ -2953,7 +2963,7 @@

-    target = calc_pframe_target_size_one_pass_vbr(cpi);

+    target = vp9_calc_pframe_target_size_one_pass_vbr(cpi);

     vp9_rc_set_frame_target(cpi, target);

   rc->prev_avg_source_sad_lag = avg_source_sad_lag;

@@ -3163,7 +3173,7 @@

           VPXMIN(20, VPXMAX(10, rc->baseline_gf_interval));

       adjust_gfint_frame_constraint(cpi, rc->frames_to_key);

       rc->frames_till_gf_update_due = rc->baseline_gf_interval;

-      target = calc_pframe_target_size_one_pass_vbr(cpi);

+      target = vp9_calc_pframe_target_size_one_pass_vbr(cpi);

       vp9_rc_set_frame_target(cpi, target);

       rc->count_last_scene_change = 0;

     } else {

--- a/vp9/encoder/vp9_ratectrl.h

+++ b/vp9/encoder/vp9_ratectrl.h

@@ -27,6 +27,9 @@

 // Bits Per MB at different Q (Multiplied by 512)

 #define BPER_MB_NORMBITS 9

+#define DEFAULT_KF_BOOST 2000

+#define DEFAULT_GF_BOOST 2000

 #define MIN_GF_INTERVAL 4

 #define MAX_GF_INTERVAL 16

 #define FIXED_GF_INTERVAL 8  // Used in some testing modes only

@@ -204,6 +207,10 @@

   int preserve_arf_as_gld;

   int preserve_next_arf_as_gld;

   int show_arf_as_gld;

+  // Flag to constrain golden frame interval on key frame frequency for 1 pass

+  // VBR.

+  int constrain_gf_key_freq_onepass_vbr;

 } RATE_CONTROL;

 struct VP9_COMP;

@@ -255,6 +262,9 @@

 void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi);

 int vp9_calc_pframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi);

 int vp9_calc_iframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi);

+int vp9_calc_pframe_target_size_one_pass_vbr(const struct VP9_COMP *cpi);

+int vp9_calc_iframe_target_size_one_pass_vbr(const struct VP9_COMP *cpi);

+void vp9_set_gf_update_one_pass_vbr(struct VP9_COMP *const cpi);

 void vp9_update_buffer_level_preencode(struct VP9_COMP *cpi);

 void vp9_rc_get_svc_params(struct VP9_COMP *cpi);

--- a/vp9/encoder/vp9_rd.c

+++ b/vp9/encoder/vp9_rd.c

@@ -197,28 +197,68 @@

 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,

                                                               128, 144, 144 };

+// Configure Vizier RD parameters.

+// Later this function will use passed in command line values.

+void vp9_init_rd_parameters(VP9_COMP *cpi) {

+  RD_CONTROL *const rdc = &cpi->rd_ctrl;

+  // When |use_vizier_rc_params| is 1, we expect the rd parameters have been

+  // initialized by the pass in values.

+  // Be careful that parameters below are only initialized to 1, if we do not

+  // pass values to them. It is desired to take care of each parameter when

+  // using |use_vizier_rc_params|.

+  if (cpi->twopass.use_vizier_rc_params) return;

+  // Make sure this function is floating point safe.

+  vpx_clear_system_state();

+  rdc->rd_mult_inter_qp_fac = 1.0;

+  rdc->rd_mult_arf_qp_fac = 1.0;

+  rdc->rd_mult_key_qp_fac = 1.0;

+}

+// Returns the default rd multiplier for inter frames for a given qindex.

+// The function here is a first pass estimate based on data from

+// a previous Vizer run

+static double def_inter_rd_multiplier(int qindex) {

+  return 4.15 + (0.001 * (double)qindex);

+}

+// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.

+// The function here is a first pass estimate based on data from

+// a previous Vizer run

+static double def_arf_rd_multiplier(int qindex) {

+  return 4.25 + (0.001 * (double)qindex);

+}

+// Returns the default rd multiplier for key frames for a given qindex.

+// The function here is a first pass estimate based on data from

+// a previous Vizer run

+static double def_kf_rd_multiplier(int qindex) {

+  return 4.35 + (0.001 * (double)qindex);

+}

 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {

-  // largest dc_quant is 21387, therefore rdmult should always fit in int32_t

+  const RD_CONTROL *rdc = &cpi->rd_ctrl;

   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);

-  uint32_t rdmult = q * q;

+  // largest dc_quant is 21387, therefore rdmult should fit in int32_t

+  int rdmult = q * q;

-  if (cpi->common.frame_type != KEY_FRAME) {

-    if (qindex < 128)

-      rdmult = rdmult * 4;

-    else if (qindex < 190)

-      rdmult = rdmult * 4 + rdmult / 2;

-    else

-      rdmult = rdmult * 3;

+  // Make sure this function is floating point safe.

+  vpx_clear_system_state();

+  if (cpi->common.frame_type == KEY_FRAME) {

+    double def_rd_q_mult = def_kf_rd_multiplier(qindex);

+    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);

+  } else if (!cpi->rc.is_src_frame_alt_ref &&

+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {

+    double def_rd_q_mult = def_arf_rd_multiplier(qindex);

+    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);

   } else {

-    if (qindex < 64)

-      rdmult = rdmult * 4;

-    else if (qindex <= 128)

-      rdmult = rdmult * 3 + rdmult / 2;

-    else if (qindex < 190)

-      rdmult = rdmult * 4 + rdmult / 2;

-    else

-      rdmult = rdmult * 7 + rdmult / 2;

+    double def_rd_q_mult = def_inter_rd_multiplier(qindex);

+    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);

 #if CONFIG_VP9_HIGHBITDEPTH

   switch (cpi->common.bit_depth) {

     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;

--- a/vp9/encoder/vp9_rd.h

+++ b/vp9/encoder/vp9_rd.h

@@ -101,6 +101,13 @@

   THR_INTRA,

 } THR_MODES_SUB8X8;

+typedef struct {

+  // RD multiplier control factors added for Vizier project.

+  double rd_mult_inter_qp_fac;

+  double rd_mult_arf_qp_fac;

+  double rd_mult_key_qp_fac;

+} RD_CONTROL;

 typedef struct RD_OPT {

   // Thresh_mult is used to set a threshold for the rd score. A higher value

   // means that we will accept the best mode so far more often. This number

@@ -143,6 +150,8 @@

 struct TileDataEnc;

 struct VP9_COMP;

 struct macroblock;

+void vp9_init_rd_parameters(struct VP9_COMP *cpi);

 int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex);

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -745,8 +745,8 @@

   MODE_INFO *const mi = xd->mi[0];

   int64_t rd1, rd2, rd;

   int rate;

-  int64_t dist;

-  int64_t sse;

+  int64_t dist = INT64_MAX;

+  int64_t sse = INT64_MAX;

   const int coeff_ctx =

       combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);

   struct buf_2d *recon = args->this_recon;

@@ -799,6 +799,13 @@

     if (max_txsize_lookup[plane_bsize] == tx_size)

       skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))];

+    // This reduces the risk of bad perceptual quality due to bad prediction.

+    // We always force the encoder to perform transform and quantization.

+    if (!args->cpi->sf.allow_skip_txfm_ac_dc &&

+        skip_txfm_flag == SKIP_TXFM_AC_DC) {

+      skip_txfm_flag = SKIP_TXFM_NONE;

+    }

     if (skip_txfm_flag == SKIP_TXFM_NONE ||

         (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) {

       // full forward transform and quantization

@@ -827,17 +834,7 @@

         dist = VPXMAX(0, sse - dc_correct);

     } else {

-      // SKIP_TXFM_AC_DC

-      // skip forward transform. Because this is handled here, the quantization

-      // does not need to do it.

-      x->plane[plane].eobs[block] = 0;

-      sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;

-      dist = sse;

-      if (recon) {

-        uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)];

-        copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride,

-                           blk_row, blk_col, plane_bsize, tx_bsize);

-      }

+      assert(0 && "allow_skip_txfm_ac_dc does not allow SKIP_TXFM_AC_DC.");

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -345,7 +345,6 @@

     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;

     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;

     sf->adaptive_interp_filter_search = 1;

-    sf->allow_partition_search_skip = 1;

     if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {

       for (i = 0; i < MAX_MESH_STEP; ++i) {

@@ -931,7 +930,6 @@

   sf->max_delta_qindex = 0;

   sf->disable_filter_search_var_thresh = 0;

   sf->adaptive_interp_filter_search = 0;

-  sf->allow_partition_search_skip = 0;

   sf->allow_txfm_domain_distortion = 0;

   sf->tx_domain_thresh = 99.0;

   sf->allow_quant_coeff_opt = sf->optimize_coefficients;

@@ -940,6 +938,7 @@

   sf->enable_tpl_model = oxcf->enable_tpl_model;

   sf->prune_ref_frame_for_rect_partitions = 0;

   sf->temporal_filter_search_method = MESH;

+  sf->allow_skip_txfm_ac_dc = 0;

   for (i = 0; i < TX_SIZES; i++) {

     sf->intra_y_mode_mask[i] = INTRA_ALL;

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -525,9 +525,6 @@

     int prune_rect_thresh[4];

   } rd_ml_partition;

-  // Allow skipping partition search for still image frame

-  int allow_partition_search_skip;

   // Fast approximation of vp9_model_rd_from_var_lapndz

   int simple_model_rd_from_var;

@@ -612,6 +609,12 @@

   // For real-time mode: force DC only under intra search when content

   // does not have high souce SAD.

   int rt_intra_dc_only_low_content;

+  // The encoder has a feature that skips forward transform and quantization

+  // based on a model rd estimation to reduce encoding time.

+  // However, this feature is dangerous since it could lead to bad perceptual

+  // quality. This flag is added to guard the feature.

+  int allow_skip_txfm_ac_dc;

 } SPEED_FEATURES;

 struct VP9_COMP;

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -322,8 +322,8 @@

     const int prev_layer_target_bandwidth =

         oxcf->layer_target_bitrate[st_idx - 1];

     lc->avg_frame_size =

-        (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /

-              (lc->framerate - prev_layer_framerate));

+        (int)round((lc->target_bandwidth - prev_layer_target_bandwidth) /

+                   (lc->framerate - prev_layer_framerate));

@@ -956,7 +956,7 @@

   if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame &&

       !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame)

     svc->non_reference_frame = 1;

-  // For non-flexible mode, where update_buffer_slot is used, need to check if

+  // For flexible mode, where update_buffer_slot is used, need to check if

   // all buffer slots are not refreshed.

   if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {

     if (svc->update_buffer_slot[svc->spatial_layer_id] != 0)

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -173,6 +173,8 @@

   uint8_t fb_idx_temporal_layer_id[REF_FRAMES];

   int spatial_layer_sync[VPX_SS_MAX_LAYERS];

+  // Quantizer for each spatial layer.

+  int base_qindex[VPX_SS_MAX_LAYERS];

   uint8_t set_intra_only_frame;

   uint8_t previous_frame_is_intra_only;

   uint8_t superframe_has_layer_sync;

--- a/vp9/ratectrl_rtc.cc

+++ b/vp9/ratectrl_rtc.cc

@@ -11,6 +11,7 @@

 #include <new>

+#include "vp9/common/vp9_common.h"

 #include "vp9/encoder/vp9_encoder.h"

 #include "vp9/encoder/vp9_picklpf.h"

 #include "vpx/vp8cx.h"

@@ -24,10 +25,19 @@

                                                 VP9RateControlRTC());

   if (!rc_api) return nullptr;

   rc_api->cpi_ = static_cast<VP9_COMP *>(vpx_memalign(32, sizeof(*cpi_)));

-  if (rc_api->cpi_ == nullptr) {

-    return nullptr;

-  }

+  if (!rc_api->cpi_) return nullptr;

+  vp9_zero(*rc_api->cpi_);

   rc_api->InitRateControl(cfg);

+  if (cfg.aq_mode) {

+    VP9_COMP *const cpi = rc_api->cpi_;

+    cpi->segmentation_map = static_cast<uint8_t *>(

+        vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,

+                   sizeof(*cpi->segmentation_map)));

+    cpi->cyclic_refresh =

+        vp9_cyclic_refresh_alloc(cpi->common.mi_rows, cpi->common.mi_cols);

+    cpi->cyclic_refresh->content_mode = 0;

+  }

   return rc_api;

@@ -38,13 +48,18 @@

   cm->profile = PROFILE_0;

   cm->bit_depth = VPX_BITS_8;

   cm->show_frame = 1;

-  oxcf->rc_mode = VPX_CBR;

+  oxcf->profile = cm->profile;

+  oxcf->bit_depth = cm->bit_depth;

+  oxcf->rc_mode = rc_cfg.rc_mode;

   oxcf->pass = 0;

-  oxcf->aq_mode = NO_AQ;

+  oxcf->aq_mode = rc_cfg.aq_mode ? CYCLIC_REFRESH_AQ : NO_AQ;

   oxcf->content = VP9E_CONTENT_DEFAULT;

   oxcf->drop_frames_water_mark = 0;

+  cm->current_video_frame = 0;

+  rc->kf_boost = DEFAULT_KF_BOOST;

   UpdateRateControl(rc_cfg);

+  vp9_set_mb_mi(cm, cm->width, cm->height);

   cpi_->use_svc = (cpi_->svc.number_spatial_layers > 1 ||

                    cpi_->svc.number_temporal_layers > 1)

@@ -55,8 +70,8 @@

   rc->rc_2_frame = 0;

   vp9_rc_init_minq_luts();

   vp9_rc_init(oxcf, 0, rc);

+  rc->constrain_gf_key_freq_onepass_vbr = 0;

   cpi_->sf.use_nonrd_pick_mode = 1;

-  cm->current_video_frame = 0;

 void VP9RateControlRTC::UpdateRateControl(

@@ -73,6 +88,7 @@

   oxcf->best_allowed_q = vp9_quantizer_to_qindex(rc_cfg.min_quantizer);

   rc->worst_quality = oxcf->worst_allowed_q;

   rc->best_quality = oxcf->best_allowed_q;

+  oxcf->init_framerate = rc_cfg.framerate;

   oxcf->target_bandwidth = 1000 * rc_cfg.target_bandwidth;

   oxcf->starting_buffer_level_ms = rc_cfg.buf_initial_sz;

   oxcf->optimal_buffer_level_ms = rc_cfg.buf_optimal_sz;

@@ -85,10 +101,11 @@

       (rc_cfg.ts_number_layers > 1) ? rc_cfg.ts_number_layers : 0);

   cpi_->oxcf.rc_max_intra_bitrate_pct = rc_cfg.max_intra_bitrate_pct;

+  cpi_->oxcf.rc_max_inter_bitrate_pct = rc_cfg.max_inter_bitrate_pct;

   cpi_->framerate = rc_cfg.framerate;

   cpi_->svc.number_spatial_layers = rc_cfg.ss_number_layers;

   cpi_->svc.number_temporal_layers = rc_cfg.ts_number_layers;

+  vp9_set_mb_mi(cm, cm->width, cm->height);

   for (int sl = 0; sl < cpi_->svc.number_spatial_layers; ++sl) {

     for (int tl = 0; tl < cpi_->svc.number_temporal_layers; ++tl) {

       const int layer =

@@ -138,11 +155,27 @@

   cpi_->sf.use_nonrd_pick_mode = 1;

   if (cpi_->svc.number_spatial_layers == 1 &&

       cpi_->svc.number_temporal_layers == 1) {

-    int target;

-    if (frame_is_intra_only(cm))

-      target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_);

-    else

-      target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_);

+    int target = 0;

+    if (cpi_->oxcf.rc_mode == VPX_CBR) {

+      if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ)

+        vp9_cyclic_refresh_update_parameters(cpi_);

+      if (frame_is_intra_only(cm))

+        target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_);

+      else

+        target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_);

+    } else if (cpi_->oxcf.rc_mode == VPX_VBR) {

+      if (cm->frame_type == KEY_FRAME) {

+        cpi_->rc.this_key_frame_forced = cm->current_video_frame != 0;

+        cpi_->rc.frames_to_key = cpi_->oxcf.key_freq;

+      }

+      vp9_set_gf_update_one_pass_vbr(cpi_);

+      if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ)

+        vp9_cyclic_refresh_update_parameters(cpi_);

+      if (frame_is_intra_only(cm))

+        target = vp9_calc_iframe_target_size_one_pass_vbr(cpi_);

+      else

+        target = vp9_calc_pframe_target_size_one_pass_vbr(cpi_);

+    }

     vp9_rc_set_frame_target(cpi_, target);

     vp9_update_buffer_level_preencode(cpi_);

   } else {

@@ -153,6 +186,8 @@

   int bottom_index, top_index;

   cpi_->common.base_qindex =

       vp9_rc_pick_q_and_bounds(cpi_, &bottom_index, &top_index);

+  if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_setup(cpi_);

 int VP9RateControlRTC::GetQP() const { return cpi_->common.base_qindex; }

@@ -161,6 +196,14 @@

   struct loopfilter *const lf = &cpi_->common.lf;

   vp9_pick_filter_level(nullptr, cpi_, LPF_PICK_FROM_Q);

   return lf->filter_level;

+}

+signed char *VP9RateControlRTC::GetCyclicRefreshMap() const {

+  return cpi_->cyclic_refresh->map;

+}

+int *VP9RateControlRTC::GetDeltaQ() const {

+  return cpi_->cyclic_refresh->qindex_delta;

 void VP9RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {

--- a/vp9/ratectrl_rtc.h

+++ b/vp9/ratectrl_rtc.h

@@ -18,27 +18,30 @@

 #include "vp9/common/vp9_enums.h"

 #include "vp9/common/vp9_onyxc_int.h"

 #include "vp9/vp9_iface_common.h"

+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"

 #include "vp9/encoder/vp9_encoder.h"

 #include "vp9/encoder/vp9_firstpass.h"

 #include "vp9/vp9_cx_iface.h"

+#include "vpx/internal/vpx_ratectrl_rtc.h"

 #include "vpx_mem/vpx_mem.h"

 namespace libvpx {

-struct VP9RateControlRtcConfig {

-  int width;

-  int height;

-  // 0-63

-  int max_quantizer;

-  int min_quantizer;

-  int64_t target_bandwidth;

-  int64_t buf_initial_sz;

-  int64_t buf_optimal_sz;

-  int64_t buf_sz;

-  int undershoot_pct;

-  int overshoot_pct;

-  int max_intra_bitrate_pct;

-  double framerate;

+struct VP9RateControlRtcConfig : public VpxRateControlRtcConfig {

+ public:

+  VP9RateControlRtcConfig() {

+    vp9_zero(max_quantizers);

+    vp9_zero(min_quantizers);

+    vp9_zero(scaling_factor_den);

+    vp9_zero(scaling_factor_num);

+    vp9_zero(layer_target_bitrate);

+    vp9_zero(ts_rate_decimator);

+    scaling_factor_num[0] = 1;

+    scaling_factor_den[0] = 1;

+    max_quantizers[0] = max_quantizer;

+    min_quantizers[0] = min_quantizer;

+  }

   // Number of spatial layers

   int ss_number_layers;

   // Number of temporal layers

@@ -47,8 +50,6 @@

   int min_quantizers[VPX_MAX_LAYERS];

   int scaling_factor_num[VPX_SS_MAX_LAYERS];

   int scaling_factor_den[VPX_SS_MAX_LAYERS];

-  int layer_target_bitrate[VPX_MAX_LAYERS];

-  int ts_rate_decimator[VPX_TS_MAX_LAYERS];

};

 struct VP9FrameParamsQpRTC {

@@ -58,7 +59,7 @@

};

 // This interface allows using VP9 real-time rate control without initializing

-// the encoder. To use this interface, you need to link with libvp9rc.a.

+// the encoder. To use this interface, you need to link with libvpxrc.a.

//

 // #include "vp9/ratectrl_rtc.h"

 // VP9RateControlRTC rc_api;

@@ -84,15 +85,23 @@

       const VP9RateControlRtcConfig &cfg);

   ~VP9RateControlRTC() {

     if (cpi_) {

-      for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) {

-        for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) {

-          int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers);

-          LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer];

-          vpx_free(lc->map);

-          vpx_free(lc->last_coded_q_map);

-          vpx_free(lc->consec_zero_mv);

+      if (cpi_->svc.number_spatial_layers > 1 ||

+          cpi_->svc.number_temporal_layers > 1) {

+        for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) {

+          for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) {

+            int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers);

+            LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer];

+            vpx_free(lc->map);

+            vpx_free(lc->last_coded_q_map);

+            vpx_free(lc->consec_zero_mv);

+          }

+      if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {

+        vpx_free(cpi_->segmentation_map);

+        cpi_->segmentation_map = NULL;

+        vp9_cyclic_refresh_free(cpi_->cyclic_refresh);

+      }

       vpx_free(cpi_);

@@ -101,6 +110,8 @@

   // GetQP() needs to be called after ComputeQP() to get the latest QP

   int GetQP() const;

   int GetLoopfilterLevel() const;

+  signed char *GetCyclicRefreshMap() const;

+  int *GetDeltaQ() const;

   void ComputeQP(const VP9FrameParamsQpRTC &frame_params);

   // Feedback to rate control with the size of current encoded frame

   void PostEncodeUpdate(uint64_t encoded_frame_size);

--- a/vp9/simple_encode.cc

+++ b/vp9/simple_encode.cc

@@ -793,6 +793,7 @@

   if (enc_pass == VPX_RC_FIRST_PASS) {

     oxcf.lag_in_frames = 0;

+  oxcf.use_simple_encode_api = 1;

   return oxcf;

@@ -872,14 +873,14 @@

   const VP9EncoderConfig oxcf = GetEncodeConfig(

       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,

       VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);

-  VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);

-  struct lookahead_ctx *lookahead = cpi->lookahead;

+  impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);

+  struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead;

   int i;

   int use_highbitdepth = 0;

   const int num_rows_16x16 = get_num_unit_16x16(frame_height_);

   const int num_cols_16x16 = get_num_unit_16x16(frame_width_);

 #if CONFIG_VP9_HIGHBITDEPTH

-  use_highbitdepth = cpi->common.use_highbitdepth;

+  use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth;

 #endif

   vpx_image_t img;

   vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);

@@ -905,30 +906,35 @@

         ENCODE_FRAME_RESULT encode_frame_info;

         vp9_init_encode_frame_result(&encode_frame_info);

         // TODO(angiebird): Call vp9_first_pass directly

-        vp9_get_compressed_data(cpi, &frame_flags, &size, nullptr, &time_stamp,

-                                &time_end, flush, &encode_frame_info);

+        vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr,

+                                &time_stamp, &time_end, flush,

+                                &encode_frame_info);

         // vp9_get_compressed_data only generates first pass stats not

         // compresses data

         assert(size == 0);

         // Get vp9 first pass motion vector info.

         std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);

-        update_motion_vector_info(cpi->fp_motion_vector_info, num_rows_16x16,

-                                  num_cols_16x16, mv_info.data(),

-                                  kMotionVectorFullPixelPrecision);

+        update_motion_vector_info(

+            impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16,

+            num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision);

         fp_motion_vector_info_.push_back(mv_info);

-      impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));

+      impl_ptr_->first_pass_stats.push_back(

+          vp9_get_frame_stats(&impl_ptr_->cpi->twopass));

-  vp9_end_first_pass(cpi);

   // TODO(angiebird): Store the total_stats apart form first_pass_stats

-  impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass));

-  free_encoder(cpi);

-  rewind(in_file_);

-  vpx_img_free(&img);

+  impl_ptr_->first_pass_stats.push_back(

+      vp9_get_total_stats(&impl_ptr_->cpi->twopass));

+  vp9_end_first_pass(impl_ptr_->cpi);

   // Generate key_frame_map based on impl_ptr_->first_pass_stats.

   key_frame_map_ = ComputeKeyFrameMap();

+  free_encoder(impl_ptr_->cpi);

+  impl_ptr_->cpi = nullptr;

+  rewind(in_file_);

+  vpx_img_free(&img);

 std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {

@@ -1004,8 +1010,7 @@

 static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,

                                  int start_show_index) {

   GOP_COMMAND gop_command;

-  if (gop_map.size() > 0) {

-    assert(static_cast<size_t>(start_show_index) < gop_map.size());

+  if (static_cast<size_t>(start_show_index) < gop_map.size()) {

     assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);

     int end_show_index = start_show_index + 1;

     // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is

@@ -1049,6 +1054,11 @@

   frame_coding_index_ = 0;

   show_frame_count_ = 0;

+  assert(impl_ptr_->cpi != nullptr);

+  FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);

+  unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;

+  vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area);

   UpdateKeyFrameGroup(show_frame_count_);

   const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);

@@ -1084,8 +1094,7 @@

   const VP9_COMP *cpi = impl_ptr_->cpi;

   key_frame_group_index_ = 0;

   key_frame_group_size_ = vp9_get_frames_to_next_key(

-      &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info,

-      key_frame_show_index, cpi->rc.min_gf_interval);

+      &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval);

   assert(key_frame_group_size_ > 0);

   // Init the reference frame info when a new key frame group appears.

   InitRefFrameInfo(&ref_frame_info_);

@@ -1239,7 +1248,7 @@

     start_show_index += gop_command.show_frame_count;

     coding_frame_count += gop_command_coding_frame_count(&gop_command);

-  assert(start_show_index == gop_map.size());

+  assert(static_cast<size_t>(start_show_index) == gop_map.size());

   return coding_frame_count;

@@ -1250,6 +1259,7 @@

   // These are the default settings for now.

+  TWO_PASS twopass;

   const int multi_layer_arf = 0;

   const int allow_alt_ref = 1;

   vpx_rational_t frame_rate =

@@ -1258,30 +1268,30 @@

       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,

       VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);

   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);

-  FIRST_PASS_INFO first_pass_info;

-  fps_init_first_pass_info(&first_pass_info,

+  fps_init_first_pass_info(&twopass.first_pass_info,

                            GetVectorData(impl_ptr_->first_pass_stats),

                            num_frames_);

-  return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,

-                                  multi_layer_arf, allow_alt_ref);

+  unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;

+  vp9_init_vizier_params(&twopass, screen_area);

+  return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf,

+                                  allow_alt_ref);

 std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {

   // The last entry of first_pass_stats is the overall stats.

-  assert(impl_ptr_->first_pass_stats.size() == num_frames_ + 1);

+  assert(impl_ptr_->first_pass_stats.size() ==

+         static_cast<size_t>(num_frames_) + 1);

   vpx_rational_t frame_rate =

       make_vpx_rational(frame_rate_num_, frame_rate_den_);

   const VP9EncoderConfig oxcf = GetEncodeConfig(

       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,

       VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);

-  FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);

-  FIRST_PASS_INFO first_pass_info;

-  fps_init_first_pass_info(&first_pass_info,

+  TWO_PASS twopass;

+  fps_init_first_pass_info(&twopass.first_pass_info,

                            GetVectorData(impl_ptr_->first_pass_stats),

                            num_frames_);

   std::vector<int> key_frame_map(num_frames_, 0);

-  vp9_get_key_frame_map(&oxcf, &frame_info, &first_pass_info,

-                        GetVectorData(key_frame_map));

+  vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map));

   return key_frame_map;

--- a/vp9/vp9_cx_iface.c

+++ b/vp9/vp9_cx_iface.c

@@ -348,6 +348,24 @@

   RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB);

   RANGE_CHECK(extra_cfg, color_range, VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE);

+  // The range below shall be further tuned.

+  RANGE_CHECK(cfg, use_vizier_rc_params, 0, 1);

+  RANGE_CHECK(cfg, active_wq_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, err_per_mb_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, sr_default_decay_limit.den, 1, 1000);

+  RANGE_CHECK(cfg, sr_diff_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_err_per_mb_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_frame_min_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_frame_max_boost_subs_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, kf_max_total_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, gf_max_total_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, gf_frame_max_boost_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, zm_factor.den, 1, 1000);

+  RANGE_CHECK(cfg, rd_mult_inter_qp_fac.den, 1, 1000);

+  RANGE_CHECK(cfg, rd_mult_arf_qp_fac.den, 1, 1000);

+  RANGE_CHECK(cfg, rd_mult_key_qp_fac.den, 1, 1000);

   return VPX_CODEC_OK;

@@ -565,10 +583,6 @@

   vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in);

-#if CONFIG_FP_MB_STATS

-  oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;

-#endif

   oxcf->color_space = extra_cfg->color_space;

   oxcf->color_range = extra_cfg->color_range;

   oxcf->render_width = extra_cfg->render_width;

@@ -634,10 +648,135 @@

   if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf);

+  oxcf->use_simple_encode_api = 0;

   // vp9_dump_encoder_config(oxcf, stderr);

   return VPX_CODEC_OK;

+static vpx_codec_err_t set_twopass_params_from_config(

+    const vpx_codec_enc_cfg_t *const cfg, struct VP9_COMP *cpi) {

+  if (!cfg->use_vizier_rc_params) return VPX_CODEC_OK;

+  if (cpi == NULL) return VPX_CODEC_ERROR;

+  cpi->twopass.use_vizier_rc_params = cfg->use_vizier_rc_params;

+  // The values set here are factors that will be applied to default values

+  // to get the final value used in the two pass code. Hence 1.0 will

+  // match the default behaviour when not using passed in values.

+  // We also apply limits here to prevent the user from applying settings

+  // that make no sense.

+  cpi->twopass.active_wq_factor =

+      (double)cfg->active_wq_factor.num / (double)cfg->active_wq_factor.den;

+  if (cpi->twopass.active_wq_factor < 0.25)

+    cpi->twopass.active_wq_factor = 0.25;

+  else if (cpi->twopass.active_wq_factor > 16.0)

+    cpi->twopass.active_wq_factor = 16.0;

+  cpi->twopass.err_per_mb =

+      (double)cfg->err_per_mb_factor.num / (double)cfg->err_per_mb_factor.den;

+  if (cpi->twopass.err_per_mb < 0.25)

+    cpi->twopass.err_per_mb = 0.25;

+  else if (cpi->twopass.err_per_mb > 4.0)

+    cpi->twopass.err_per_mb = 4.0;

+  cpi->twopass.sr_default_decay_limit =

+      (double)cfg->sr_default_decay_limit.num /

+      (double)cfg->sr_default_decay_limit.den;

+  if (cpi->twopass.sr_default_decay_limit < 0.25)

+    cpi->twopass.sr_default_decay_limit = 0.25;

+  // If the default changes this will need to change.

+  else if (cpi->twopass.sr_default_decay_limit > 1.33)

+    cpi->twopass.sr_default_decay_limit = 1.33;

+  cpi->twopass.sr_diff_factor =

+      (double)cfg->sr_diff_factor.num / (double)cfg->sr_diff_factor.den;

+  if (cpi->twopass.sr_diff_factor < 0.25)

+    cpi->twopass.sr_diff_factor = 0.25;

+  else if (cpi->twopass.sr_diff_factor > 4.0)

+    cpi->twopass.sr_diff_factor = 4.0;

+  cpi->twopass.kf_err_per_mb = (double)cfg->kf_err_per_mb_factor.num /

+                               (double)cfg->kf_err_per_mb_factor.den;

+  if (cpi->twopass.kf_err_per_mb < 0.25)

+    cpi->twopass.kf_err_per_mb = 0.25;

+  else if (cpi->twopass.kf_err_per_mb > 4.0)

+    cpi->twopass.kf_err_per_mb = 4.0;

+  cpi->twopass.kf_frame_min_boost = (double)cfg->kf_frame_min_boost_factor.num /

+                                    (double)cfg->kf_frame_min_boost_factor.den;

+  if (cpi->twopass.kf_frame_min_boost < 0.25)

+    cpi->twopass.kf_frame_min_boost = 0.25;

+  else if (cpi->twopass.kf_frame_min_boost > 4.0)

+    cpi->twopass.kf_frame_min_boost = 4.0;

+  cpi->twopass.kf_frame_max_boost_first =

+      (double)cfg->kf_frame_max_boost_first_factor.num /

+      (double)cfg->kf_frame_max_boost_first_factor.den;

+  if (cpi->twopass.kf_frame_max_boost_first < 0.25)

+    cpi->twopass.kf_frame_max_boost_first = 0.25;

+  else if (cpi->twopass.kf_frame_max_boost_first > 4.0)

+    cpi->twopass.kf_frame_max_boost_first = 4.0;

+  cpi->twopass.kf_frame_max_boost_subs =

+      (double)cfg->kf_frame_max_boost_subs_factor.num /

+      (double)cfg->kf_frame_max_boost_subs_factor.den;

+  if (cpi->twopass.kf_frame_max_boost_subs < 0.25)

+    cpi->twopass.kf_frame_max_boost_subs = 0.25;

+  else if (cpi->twopass.kf_frame_max_boost_subs > 4.0)

+    cpi->twopass.kf_frame_max_boost_subs = 4.0;

+  cpi->twopass.kf_max_total_boost = (double)cfg->kf_max_total_boost_factor.num /

+                                    (double)cfg->kf_max_total_boost_factor.den;

+  if (cpi->twopass.kf_max_total_boost < 0.25)

+    cpi->twopass.kf_max_total_boost = 0.25;

+  else if (cpi->twopass.kf_max_total_boost > 4.0)

+    cpi->twopass.kf_max_total_boost = 4.0;

+  cpi->twopass.gf_max_total_boost = (double)cfg->gf_max_total_boost_factor.num /

+                                    (double)cfg->gf_max_total_boost_factor.den;

+  if (cpi->twopass.gf_max_total_boost < 0.25)

+    cpi->twopass.gf_max_total_boost = 0.25;

+  else if (cpi->twopass.gf_max_total_boost > 4.0)

+    cpi->twopass.gf_max_total_boost = 4.0;

+  cpi->twopass.gf_frame_max_boost = (double)cfg->gf_frame_max_boost_factor.num /

+                                    (double)cfg->gf_frame_max_boost_factor.den;

+  if (cpi->twopass.gf_frame_max_boost < 0.25)

+    cpi->twopass.gf_frame_max_boost = 0.25;

+  else if (cpi->twopass.gf_frame_max_boost > 4.0)

+    cpi->twopass.gf_frame_max_boost = 4.0;

+  cpi->twopass.zm_factor =

+      (double)cfg->zm_factor.num / (double)cfg->zm_factor.den;

+  if (cpi->twopass.zm_factor < 0.25)

+    cpi->twopass.zm_factor = 0.25;

+  else if (cpi->twopass.zm_factor > 2.0)

+    cpi->twopass.zm_factor = 2.0;

+  cpi->rd_ctrl.rd_mult_inter_qp_fac = (double)cfg->rd_mult_inter_qp_fac.num /

+                                      (double)cfg->rd_mult_inter_qp_fac.den;

+  if (cpi->rd_ctrl.rd_mult_inter_qp_fac < 0.25)

+    cpi->rd_ctrl.rd_mult_inter_qp_fac = 0.25;

+  else if (cpi->rd_ctrl.rd_mult_inter_qp_fac > 4.0)

+    cpi->rd_ctrl.rd_mult_inter_qp_fac = 4.0;

+  cpi->rd_ctrl.rd_mult_arf_qp_fac =

+      (double)cfg->rd_mult_arf_qp_fac.num / (double)cfg->rd_mult_arf_qp_fac.den;

+  if (cpi->rd_ctrl.rd_mult_arf_qp_fac < 0.25)

+    cpi->rd_ctrl.rd_mult_arf_qp_fac = 0.25;

+  else if (cpi->rd_ctrl.rd_mult_arf_qp_fac > 4.0)

+    cpi->rd_ctrl.rd_mult_arf_qp_fac = 4.0;

+  cpi->rd_ctrl.rd_mult_key_qp_fac =

+      (double)cfg->rd_mult_key_qp_fac.num / (double)cfg->rd_mult_key_qp_fac.den;

+  if (cpi->rd_ctrl.rd_mult_key_qp_fac < 0.25)

+    cpi->rd_ctrl.rd_mult_key_qp_fac = 0.25;

+  else if (cpi->rd_ctrl.rd_mult_key_qp_fac > 4.0)

+    cpi->rd_ctrl.rd_mult_key_qp_fac = 4.0;

+  return VPX_CODEC_OK;

+}

 static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx,

                                           const vpx_codec_enc_cfg_t *cfg) {

   vpx_codec_err_t res;

@@ -664,6 +803,7 @@

   if (res == VPX_CODEC_OK) {

     ctx->cfg = *cfg;

     set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);

+    set_twopass_params_from_config(&ctx->cfg, ctx->cpi);

     // On profile change, request a key frame

     force_key |= ctx->cpi->common.profile != ctx->oxcf.profile;

     vp9_change_config(ctx->cpi, &ctx->oxcf);

@@ -690,6 +830,25 @@

   return VPX_CODEC_OK;

+static vpx_codec_err_t ctrl_get_quantizer_svc_layers(vpx_codec_alg_priv_t *ctx,

+                                                     va_list args) {

+  int *const arg = va_arg(args, int *);

+  int i;

+  if (arg == NULL) return VPX_CODEC_INVALID_PARAM;

+  for (i = 0; i < VPX_SS_MAX_LAYERS; i++) {

+    arg[i] = ctx->cpi->svc.base_qindex[i];

+  }

+  return VPX_CODEC_OK;

+}

+static vpx_codec_err_t ctrl_get_loopfilter_level(vpx_codec_alg_priv_t *ctx,

+                                                 va_list args) {

+  int *const arg = va_arg(args, int *);

+  if (arg == NULL) return VPX_CODEC_INVALID_PARAM;

+  *arg = ctx->cpi->common.lf.filter_level;

+  return VPX_CODEC_OK;

+}

 static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx,

                                         const struct vp9_extracfg *extra_cfg) {

   const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg);

@@ -696,6 +855,7 @@

   if (res == VPX_CODEC_OK) {

     ctx->extra_cfg = *extra_cfg;

     set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);

+    set_twopass_params_from_config(&ctx->cfg, ctx->cpi);

     vp9_change_config(ctx->cpi, &ctx->oxcf);

   return res;

@@ -886,6 +1046,18 @@

   return update_extra_cfg(ctx, &extra_cfg);

+static vpx_codec_err_t ctrl_set_rtc_external_ratectrl(vpx_codec_alg_priv_t *ctx,

+                                                      va_list args) {

+  VP9_COMP *const cpi = ctx->cpi;

+  const unsigned int data = va_arg(args, unsigned int);

+  if (data) {

+    cpi->compute_frame_low_motion_onepass = 0;

+    cpi->rc.constrain_gf_key_freq_onepass_vbr = 0;

+    cpi->cyclic_refresh->content_mode = 0;

+  }

+  return VPX_CODEC_OK;

+}

 static vpx_codec_err_t ctrl_enable_motion_vector_unit_test(

     vpx_codec_alg_priv_t *ctx, va_list args) {

   struct vp9_extracfg extra_cfg = ctx->extra_cfg;

@@ -940,6 +1112,7 @@

 #endif

       priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool);

       if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR;

+      set_twopass_params_from_config(&priv->cfg, priv->cpi);

@@ -1744,6 +1917,7 @@

   if (oxcf->pass == 2) {

     const FRAME_INFO *frame_info = &cpi->frame_info;

     vpx_rc_config_t ratectrl_config;

+    vpx_codec_err_t codec_status;

     ratectrl_config.frame_width = frame_info->frame_width;

     ratectrl_config.frame_height = frame_info->frame_height;

@@ -1755,7 +1929,10 @@

     ratectrl_config.frame_rate_num = oxcf->g_timebase.den;

     ratectrl_config.frame_rate_den = oxcf->g_timebase.num;

-    vp9_extrc_create(funcs, ratectrl_config, ext_ratectrl);

+    codec_status = vp9_extrc_create(funcs, ratectrl_config, ext_ratectrl);

+    if (codec_status != VPX_CODEC_OK) {

+      return codec_status;

+    }

   return VPX_CODEC_OK;

@@ -1812,11 +1989,14 @@

   { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync },

   { VP9E_SET_DELTA_Q_UV, ctrl_set_delta_q_uv },

   { VP9E_SET_DISABLE_LOOPFILTER, ctrl_set_disable_loopfilter },

+  { VP9E_SET_RTC_EXTERNAL_RATECTRL, ctrl_set_rtc_external_ratectrl },

   { VP9E_SET_EXTERNAL_RATE_CONTROL, ctrl_set_external_rate_control },

   // Getters

   { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },

   { VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64 },

+  { VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, ctrl_get_quantizer_svc_layers },

+  { VP9E_GET_LOOPFILTER_LEVEL, ctrl_get_loopfilter_level },

   { VP9_GET_REFERENCE, ctrl_get_reference },

   { VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id },

   { VP9E_GET_ACTIVEMAP, ctrl_get_active_map },

@@ -1879,14 +2059,30 @@

         VPX_SS_DEFAULT_LAYERS,  // ss_number_layers

         { 0 },

-        { 0 },  // ss_target_bitrate

-        1,      // ts_number_layers

-        { 0 },  // ts_target_bitrate

-        { 0 },  // ts_rate_decimator

-        0,      // ts_periodicity

-        { 0 },  // ts_layer_id

-        { 0 },  // layer_taget_bitrate

-        0       // temporal_layering_mode

+        { 0 },     // ss_target_bitrate

+        1,         // ts_number_layers

+        { 0 },     // ts_target_bitrate

+        { 0 },     // ts_rate_decimator

+        0,         // ts_periodicity

+        { 0 },     // ts_layer_id

+        { 0 },     // layer_taget_bitrate

+        0,         // temporal_layering_mode

+        0,         // use_vizier_rc_params

+        { 1, 1 },  // active_wq_factor

+        { 1, 1 },  // err_per_mb_factor

+        { 1, 1 },  // sr_default_decay_limit

+        { 1, 1 },  // sr_diff_factor

+        { 1, 1 },  // kf_err_per_mb_factor

+        { 1, 1 },  // kf_frame_min_boost_factor

+        { 1, 1 },  // kf_frame_max_boost_first_factor

+        { 1, 1 },  // kf_frame_max_boost_subs_factor

+        { 1, 1 },  // kf_max_total_boost_factor

+        { 1, 1 },  // gf_max_total_boost_factor

+        { 1, 1 },  // gf_frame_max_boost_factor

+        { 1, 1 },  // zm_factor

+        { 1, 1 },  // rd_mult_inter_qp_fac

+        { 1, 1 },  // rd_mult_arf_qp_fac

+        { 1, 1 },  // rd_mult_key_qp_fac

     } },

};

@@ -2105,11 +2301,6 @@

   DUMP_STRUCT_VALUE(fp, oxcf, target_level);

   // TODO(angiebird): dump two_pass_stats_in

-#if CONFIG_FP_MB_STATS

-  // TODO(angiebird): dump firstpass_mb_stats_in

-#endif

   DUMP_STRUCT_VALUE(fp, oxcf, tuning);

   DUMP_STRUCT_VALUE(fp, oxcf, content);

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -2123,6 +2314,8 @@

   DUMP_STRUCT_VALUE(fp, oxcf, row_mt);

   DUMP_STRUCT_VALUE(fp, oxcf, motion_vector_unit_test);

+  DUMP_STRUCT_VALUE(fp, oxcf, delta_q_uv);

+  DUMP_STRUCT_VALUE(fp, oxcf, use_simple_encode_api);

 FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) {

--- a/vpx/internal/vpx_codec_internal.h

+++ b/vpx/internal/vpx_codec_internal.h

@@ -283,7 +283,7 @@

   vpx_codec_enc_cfg_t cfg;

 } vpx_codec_enc_cfg_map_t;

-/*!\brief Decoder algorithm interface interface

+/*!\brief Decoder algorithm interface

  * All decoders \ref MUST expose a variable of this type.

*/

--- /dev/null

+++ b/vpx/internal/vpx_ratectrl_rtc.h

@@ -1,0 +1,62 @@

+/*

+ *  Copyright (c) 2021 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef VPX_VPX_RATECTRL_RTC_H_

+#define VPX_VPX_RATECTRL_RTC_H_

+#include "vpx/vpx_encoder.h"

+namespace libvpx {

+struct VpxRateControlRtcConfig {

+ public:

+  VpxRateControlRtcConfig() {

+    width = 1280;

+    height = 720;

+    max_quantizer = 63;

+    min_quantizer = 2;

+    target_bandwidth = 1000;

+    buf_initial_sz = 600;

+    buf_optimal_sz = 600;

+    buf_sz = 1000;

+    undershoot_pct = overshoot_pct = 50;

+    max_intra_bitrate_pct = 50;

+    max_inter_bitrate_pct = 0;

+    framerate = 30.0;

+    ts_number_layers = 1;

+    rc_mode = VPX_CBR;

+    aq_mode = 0;

+    layer_target_bitrate[0] = static_cast<int>(target_bandwidth);

+    ts_rate_decimator[0] = 1;

+  }

+  int width;

+  int height;

+  // 0-63

+  int max_quantizer;

+  int min_quantizer;

+  int64_t target_bandwidth;

+  int64_t buf_initial_sz;

+  int64_t buf_optimal_sz;

+  int64_t buf_sz;

+  int undershoot_pct;

+  int overshoot_pct;

+  int max_intra_bitrate_pct;

+  int max_inter_bitrate_pct;

+  double framerate;

+  // Number of temporal layers

+  int ts_number_layers;

+  int layer_target_bitrate[VPX_MAX_LAYERS];

+  int ts_rate_decimator[VPX_TS_MAX_LAYERS];

+  // vbr, cbr

+  enum vpx_rc_mode rc_mode;

+  int aq_mode;

+};

+}  // namespace libvpx

+#endif

--- a/vpx/src/vpx_image.c

+++ b/vpx/src/vpx_image.c

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include <limits.h>

 #include <stdlib.h>

 #include <string.h>

@@ -22,8 +23,10 @@

                                      unsigned char *img_data) {

   unsigned int h, w, s, xcs, ycs, bps;

   unsigned int stride_in_bytes;

-  int align;

+  unsigned int align;

+  if (img != NULL) memset(img, 0, sizeof(vpx_image_t));

   /* Treat align==0 like align==1 */

   if (!buf_align) buf_align = 1;

@@ -88,8 +91,6 @@

     if (!img) goto fail;

     img->self_allocd = 1;

-  } else {

-    memset(img, 0, sizeof(vpx_image_t));

   img->img_data = img_data;

@@ -152,9 +153,8 @@

 int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y,

                      unsigned int w, unsigned int h) {

-  unsigned char *data;

-  if (x + w <= img->w && y + h <= img->h) {

+  if (x <= UINT_MAX - w && x + w <= img->w && y <= UINT_MAX - h &&

+      y + h <= img->h) {

     img->d_w = w;

     img->d_h = h;

@@ -165,7 +165,7 @@

     } else {

       const int bytes_per_sample =

           (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;

-      data = img->img_data;

+      unsigned char *data = img->img_data;

       if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) {

         img->planes[VPX_PLANE_ALPHA] =

--- a/vpx/vp8cx.h

+++ b/vpx/vp8cx.h

@@ -712,6 +712,47 @@

    * Supported in codecs: VP9

*/

   VP9E_SET_EXTERNAL_RATE_CONTROL,

+  /*!\brief Codec control to disable internal features in rate control.

+   *

+   * This will do 3 things, only for 1 pass:

+   *  - Turn off low motion computation

+   *  - Turn off gf update constraint on key frame frequency

+   *  - Turn off content mode for cyclic refresh

+   *

+   * With those, the rate control is expected to work exactly the same as the

+   * interface provided in ratectrl_rtc.cc/h

+   *

+   * Supported in codecs: VP9

+   */

+  VP9E_SET_RTC_EXTERNAL_RATECTRL,

+  /*!\brief Codec control function to get loopfilter level in the encoder.

+   *

+   * Supported in codecs: VP9

+   */

+  VP9E_GET_LOOPFILTER_LEVEL,

+  /*!\brief Codec control to get last quantizers for all spatial layers.

+   *

+   * Return value uses an array of internal quantizers scale defined by the

+   * codec, for all spatial layers.

+   * The size of the array passed in should be #VPX_SS_MAX_LAYERS.

+   *

+   * Supported in codecs: VP9

+   */

+  VP9E_GET_LAST_QUANTIZER_SVC_LAYERS,

+  /*!\brief Codec control to disable internal features in rate control.

+   *

+   * This will turn off cyclic refresh for vp8.

+   *

+   * With this, the rate control is expected to work exactly the same as the

+   * interface provided in vp8_ratectrl_rtc.cc/h

+   *

+   * Supported in codecs: VP8

+   */

+  VP8E_SET_RTC_EXTERNAL_RATECTRL,

};

 /*!\brief vpx 1-D scaling mode

@@ -767,8 +808,8 @@

   unsigned int rows; /**< Number of rows. */

   unsigned int cols; /**< Number of columns. */

   /*! VP8 only uses the first 4 segments. VP9 uses 8 segments. */

-  int delta_q[8];  /**< Quantizer deltas. */

-  int delta_lf[8]; /**< Loop filter deltas. */

+  int delta_q[8];  /**< Quantizer deltas. Valid range: [-63, 63].*/

+  int delta_lf[8]; /**< Loop filter deltas. Valid range: [-63, 63].*/

   /*! skip and ref frame segment is only used in VP9. */

   int skip[8];      /**< Skip this block. */

   int ref_frame[8]; /**< Reference frame for this block. */

@@ -969,6 +1010,9 @@

 #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER

 VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)

 #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER_64

+VPX_CTRL_USE_TYPE(VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, int *)

+#define VPX_CTRL_VP9E_GET_LAST_QUANTIZER_SVC_LAYERS

 VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *)

 #define VPX_CTRL_VP9E_GET_SVC_LAYER_ID

@@ -1037,6 +1081,9 @@

 VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *)

 #define VPX_CTRL_VP9E_GET_LEVEL

+VPX_CTRL_USE_TYPE(VP9E_GET_LOOPFILTER_LEVEL, int *)

+#define VPX_CTRL_VP9E_GET_LOOPFILTER_LEVEL

 VPX_CTRL_USE_TYPE(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int)

 #define VPX_CTRL_VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST

@@ -1067,6 +1114,12 @@

 VPX_CTRL_USE_TYPE(VP9E_SET_DISABLE_LOOPFILTER, int)

 #define VPX_CTRL_VP9E_SET_DISABLE_LOOPFILTER

+VPX_CTRL_USE_TYPE(VP9E_SET_RTC_EXTERNAL_RATECTRL, int)

+#define VPX_CTRL_VP9E_SET_RTC_EXTERNAL_RATECTRL

+VPX_CTRL_USE_TYPE(VP8E_SET_RTC_EXTERNAL_RATECTRL, int)

+#define VPX_CTRL_VP8E_SET_RTC_EXTERNAL_RATECTRL

 VPX_CTRL_USE_TYPE(VP9E_SET_EXTERNAL_RATE_CONTROL, vpx_rc_funcs_t *)

 #define VPX_CTRL_VP9E_SET_EXTERNAL_RATE_CONTROL

--- a/vpx/vpx_codec.mk

+++ b/vpx/vpx_codec.mk

@@ -33,6 +33,7 @@

 API_SRCS-yes += src/vpx_encoder.c

 API_SRCS-yes += vpx_encoder.h

 API_SRCS-yes += internal/vpx_codec_internal.h

+API_SRCS-yes += internal/vpx_ratectrl_rtc.h

 API_SRCS-yes += src/vpx_codec.c

 API_SRCS-yes += src/vpx_image.c

 API_SRCS-yes += vpx_codec.h

--- a/vpx/vpx_encoder.h

+++ b/vpx/vpx_encoder.h

@@ -58,7 +58,7 @@

  * fields to structures

*/

 #define VPX_ENCODER_ABI_VERSION \

-  (14 + VPX_CODEC_ABI_VERSION + \

+  (15 + VPX_CODEC_ABI_VERSION + \

    VPX_EXT_RATECTRL_ABI_VERSION) /**<\hideinitializer*/

 /*! \brief Encoder capabilities bitfield

@@ -457,7 +457,7 @@

   /*!\brief Target data rate

-   * Target bandwidth to use for this stream, in kilobits per second.

+   * Target bitrate to use for this stream, in kilobits per second.

*/

   unsigned int rc_target_bitrate;

@@ -498,7 +498,7 @@

    * undershoot level (current rate vs target) beyond which more aggressive

    * corrective measures are taken.

    *   *

-   * Valid values in the range VP8:0-1000 VP9: 0-100.

+   * Valid values in the range VP8:0-100 VP9: 0-100.

*/

   unsigned int rc_undershoot_pct;

@@ -513,7 +513,7 @@

    * overshoot level (current rate vs target) beyond which more aggressive

    * corrective measures are taken.

-   * Valid values in the range VP8:0-1000 VP9: 0-100.

+   * Valid values in the range VP8:0-100 VP9: 0-100.

*/

   unsigned int rc_overshoot_pct;

@@ -693,6 +693,151 @@

*/

   int temporal_layering_mode;

+  /*!\brief A flag indicating whether to use external rate control parameters.

+   * By default is 0. If set to 1, the following parameters will be used in the

+   * rate control system.

+   */

+  int use_vizier_rc_params;

+  /*!\brief Active worst quality factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t active_wq_factor;

+  /*!\brief Error per macroblock adjustment factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t err_per_mb_factor;

+  /*!\brief Second reference default decay limit.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t sr_default_decay_limit;

+  /*!\brief Second reference difference factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t sr_diff_factor;

+  /*!\brief Keyframe error per macroblock adjustment factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t kf_err_per_mb_factor;

+  /*!\brief Keyframe minimum boost adjustment factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t kf_frame_min_boost_factor;

+  /*!\brief Keyframe maximum boost adjustment factor, for the first keyframe

+   * in a chunk.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t kf_frame_max_boost_first_factor;

+  /*!\brief Keyframe maximum boost adjustment factor, for subsequent keyframes.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t kf_frame_max_boost_subs_factor;

+  /*!\brief Keyframe maximum total boost adjustment factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t kf_max_total_boost_factor;

+  /*!\brief Golden frame maximum total boost adjustment factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t gf_max_total_boost_factor;

+  /*!\brief Golden frame maximum boost adjustment factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t gf_frame_max_boost_factor;

+  /*!\brief Zero motion power factor.

+   *

+   * Rate control parameters, set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t zm_factor;

+  /*!\brief Rate-distortion multiplier for inter frames.

+   * The multiplier is a crucial parameter in the calculation of rate distortion

+   * cost. It is often related to the qp (qindex) value.

+   * Rate control parameters, could be set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t rd_mult_inter_qp_fac;

+  /*!\brief Rate-distortion multiplier for alt-ref frames.

+   * The multiplier is a crucial parameter in the calculation of rate distortion

+   * cost. It is often related to the qp (qindex) value.

+   * Rate control parameters, could be set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t rd_mult_arf_qp_fac;

+  /*!\brief Rate-distortion multiplier for key frames.

+   * The multiplier is a crucial parameter in the calculation of rate distortion

+   * cost. It is often related to the qp (qindex) value.

+   * Rate control parameters, could be set from external experiment results.

+   * Only when |use_vizier_rc_params| is set to 1, the pass in value will be

+   * used. Otherwise, the default value is used.

+   *

+   */

+  vpx_rational_t rd_mult_key_qp_fac;

 } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */

 /*!\brief  vp9 svc extra configure parameters

--- a/vpx/vpx_ext_ratectrl.h

+++ b/vpx/vpx_ext_ratectrl.h

@@ -38,9 +38,15 @@

  * The encoder will receive the decision from the external rate control model

  * through get_encodeframe_decision() defined in vpx_rc_funcs_t.

+ *

+ * If max_frame_size = 0, the encoding ignores max frame size limit.

+ * If max_frame_size = -1, the encoding uses VP9's max frame size as the limit.

+ * If the encoded frame size is larger than max_frame_size, the frame is

+ * recoded to meet the size limit, following VP9's recoding principles.

*/

 typedef struct vpx_rc_encodeframe_decision {

-  int q_index; /**< Quantizer step index [0..255]*/

+  int q_index;        /**< Quantizer step index [0..255]*/

+  int max_frame_size; /**< Maximal frame size allowed to encode a frame*/

 } vpx_rc_encodeframe_decision_t;

 /*!\brief Information for the frame to be encoded.

@@ -82,6 +88,7 @@

   int64_t sse;         /**< sum of squared error of the reconstructed frame */

   int64_t bit_count;   /**< number of bits spent on coding the frame*/

   int64_t pixel_count; /**< number of pixels in YUV planes of the frame*/

+  int actual_encoding_qindex; /**< the actual qindex used to encode the frame*/

 } vpx_rc_encodeframe_result_t;

 /*!\brief Status returned by rate control callback functions.

--- a/vpx/vpx_image.h

+++ b/vpx/vpx_image.h

@@ -171,7 +171,8 @@

 /*!\brief Set the rectangle identifying the displayed portion of the image

  * Updates the displayed rectangle (aka viewport) on the image surface to

- * match the specified coordinates and size.

+ * match the specified coordinates and size. Specifically, sets img->d_w,

+ * img->d_h, and elements of the img->planes[] array.

  * \param[in]    img       Image descriptor

  * \param[in]    x         leftmost column

@@ -179,7 +180,7 @@

  * \param[in]    w         width

  * \param[in]    h         height

- * \return 0 if the requested rectangle is valid, nonzero otherwise.

+ * \return 0 if the requested rectangle is valid, nonzero (-1) otherwise.

*/

 int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y,

                      unsigned int w, unsigned int h);

--- a/vpx_dsp/arm/avg_neon.c

+++ b/vpx_dsp/arm/avg_neon.c

@@ -22,8 +22,7 @@

 uint32_t vpx_avg_4x4_neon(const uint8_t *a, int a_stride) {

   const uint8x16_t b = load_unaligned_u8q(a, a_stride);

   const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b));

-  const uint32x2_t d = horizontal_add_uint16x8(c);

-  return vget_lane_u32(vrshr_n_u32(d, 4), 0);

+  return (horizontal_add_uint16x8(c) + (1 << 3)) >> 4;

 uint32_t vpx_avg_8x8_neon(const uint8_t *a, int a_stride) {

@@ -30,7 +29,6 @@

   int i;

   uint8x8_t b, c;

   uint16x8_t sum;

-  uint32x2_t d;

   b = vld1_u8(a);

   a += a_stride;

   c = vld1_u8(a);

@@ -43,9 +41,7 @@

     sum = vaddw_u8(sum, d);

-  d = horizontal_add_uint16x8(sum);

-  return vget_lane_u32(vrshr_n_u32(d, 6), 0);

+  return (horizontal_add_uint16x8(sum) + (1 << 5)) >> 6;

 // coeff: 16 bits, dynamic range [-32640, 32640].

@@ -139,8 +135,7 @@

     ref += 16;

-  return vget_lane_s16(vreinterpret_s16_u32(horizontal_add_uint16x8(vec_sum)),

-                       0);

+  return (int16_t)horizontal_add_uint16x8(vec_sum);

 // ref, src = [0, 510] - max diff = 16-bits

--- a/vpx_dsp/arm/fdct_partial_neon.c

+++ b/vpx_dsp/arm/fdct_partial_neon.c

@@ -15,19 +15,10 @@

 #include "vpx_dsp/arm/mem_neon.h"

 #include "vpx_dsp/arm/sum_neon.h"

-static INLINE tran_low_t get_lane(const int32x2_t a) {

-#if CONFIG_VP9_HIGHBITDEPTH

-  return vget_lane_s32(a, 0);

-#else

-  return vget_lane_s16(vreinterpret_s16_s32(a), 0);

-#endif  // CONFIG_VP9_HIGHBITDETPH

-}

 void vpx_fdct4x4_1_neon(const int16_t *input, tran_low_t *output, int stride) {

   int16x4_t a0, a1, a2, a3;

   int16x8_t b0, b1;

   int16x8_t c;

-  int32x2_t d;

   a0 = vld1_s16(input);

   input += stride;

@@ -42,9 +33,7 @@

   c = vaddq_s16(b0, b1);

-  d = horizontal_add_int16x8(c);

-  output[0] = get_lane(vshl_n_s32(d, 1));

+  output[0] = (tran_low_t)(horizontal_add_int16x8(c) << 1);

   output[1] = 0;

@@ -57,7 +46,7 @@

     sum = vaddq_s16(sum, input_00);

-  output[0] = get_lane(horizontal_add_int16x8(sum));

+  output[0] = (tran_low_t)horizontal_add_int16x8(sum);

   output[1] = 0;

@@ -66,7 +55,7 @@

   int r;

   int16x8_t left = vld1q_s16(input);

   int16x8_t right = vld1q_s16(input + 8);

-  int32x2_t sum;

+  int32_t sum;

   input += stride;

   for (r = 1; r < 16; ++r) {

@@ -77,9 +66,9 @@

     right = vaddq_s16(right, b);

-  sum = vadd_s32(horizontal_add_int16x8(left), horizontal_add_int16x8(right));

+  sum = horizontal_add_int16x8(left) + horizontal_add_int16x8(right);

-  output[0] = get_lane(vshr_n_s32(sum, 1));

+  output[0] = (tran_low_t)(sum >> 1);

   output[1] = 0;

@@ -90,7 +79,7 @@

   int16x8_t a1 = vld1q_s16(input + 8);

   int16x8_t a2 = vld1q_s16(input + 16);

   int16x8_t a3 = vld1q_s16(input + 24);

-  int32x2_t sum;

+  int32_t sum;

   input += stride;

   for (r = 1; r < 32; ++r) {

@@ -105,9 +94,10 @@

     a3 = vaddq_s16(a3, b3);

-  sum = vadd_s32(horizontal_add_int16x8(a0), horizontal_add_int16x8(a1));

-  sum = vadd_s32(sum, horizontal_add_int16x8(a2));

-  sum = vadd_s32(sum, horizontal_add_int16x8(a3));

-  output[0] = get_lane(vshr_n_s32(sum, 3));

+  sum = horizontal_add_int16x8(a0);

+  sum += horizontal_add_int16x8(a1);

+  sum += horizontal_add_int16x8(a2);

+  sum += horizontal_add_int16x8(a3);

+  output[0] = (tran_low_t)(sum >> 3);

   output[1] = 0;

--- a/vpx_dsp/arm/highbd_loopfilter_neon.c

+++ b/vpx_dsp/arm/highbd_loopfilter_neon.c

@@ -661,6 +661,17 @@

   vpx_highbd_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, bd);

+// Quiet warnings of the form: 'vpx_dsp/arm/highbd_loopfilter_neon.c|675 col 67|

+// warning: 'oq1' may be used uninitialized in this function

+// [-Wmaybe-uninitialized]', for oq1-op1. Without reworking the code or adding

+// an additional branch this warning cannot be silenced otherwise. The

+// loopfilter is only called when needed for a block so these output pixels

+// will be set.

+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)

+#pragma GCC diagnostic push

+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"

+#endif

 static void lpf_horizontal_16_kernel(uint16_t *s, int p,

                                      const uint16x8_t blimit_vec,

                                      const uint16x8_t limit_vec,

@@ -722,6 +733,10 @@

     store_4x8(s - 2, p, op1, op0, oq0, oq1);

+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)

+#pragma GCC diagnostic pop

+#endif

 void vpx_highbd_lpf_horizontal_16_neon(uint16_t *s, int p,

                                        const uint8_t *blimit,

--- a/vpx_dsp/arm/loopfilter_neon.c

+++ b/vpx_dsp/arm/loopfilter_neon.c

@@ -975,6 +975,17 @@

 FUN_LPF_16_KERNEL(_dual_, 16)  // lpf_16_dual_kernel

 #undef FUN_LPF_16_KERNEL

+// Quiet warnings of the form: 'vpx_dsp/arm/loopfilter_neon.c|981 col 42|

+// warning: 'oq1' may be used uninitialized in this function

+// [-Wmaybe-uninitialized]', for oq1-op1. Without reworking the code or adding

+// an additional branch this warning cannot be silenced otherwise. The

+// loopfilter is only called when needed for a block so these output pixels

+// will be set.

+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)

+#pragma GCC diagnostic push

+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"

+#endif

 void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, const uint8_t *blimit,

                                 const uint8_t *limit, const uint8_t *thresh) {

   uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6,

@@ -1090,3 +1101,7 @@

               vget_high_u8(oq0), vget_high_u8(oq1));

+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)

+#pragma GCC diagnostic pop

+#endif

--- a/vpx_dsp/arm/mem_neon.h

+++ b/vpx_dsp/arm/mem_neon.h

@@ -19,6 +19,24 @@

 #include "vpx/vpx_integer.h"

 #include "vpx_dsp/vpx_dsp_common.h"

+// Support for these xN intrinsics is lacking in older versions of GCC.

+#if defined(__GNUC__) && !defined(__clang__)

+#if __GNUC__ < 8 || defined(__arm__)

+static INLINE uint8x16x2_t vld1q_u8_x2(uint8_t const *ptr) {

+  uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };

+  return res;

+}

+#endif

+#if __GNUC__ < 9 || defined(__arm__)

+static INLINE uint8x16x3_t vld1q_u8_x3(uint8_t const *ptr) {

+  uint8x16x3_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),

+                         vld1q_u8(ptr + 2 * 16) } };

+  return res;

+}

+#endif

+#endif

 static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1,

                                           const int16_t c2, const int16_t c3) {

   return vcreate_s16((uint16_t)c0 | ((uint32_t)c1 << 16) |

@@ -95,7 +113,8 @@

 // Load 2 sets of 4 bytes when alignment is not guaranteed.

-static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) {

+static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf,

+                                          ptrdiff_t stride) {

   uint32_t a;

   uint32x2_t a_u32 = vdup_n_u32(0);

   if (stride == 4) return vld1_u8(buf);

@@ -108,7 +127,7 @@

 // Store 2 sets of 4 bytes when alignment is not guaranteed.

-static INLINE void store_unaligned_u8(uint8_t *buf, int stride,

+static INLINE void store_unaligned_u8(uint8_t *buf, ptrdiff_t stride,

                                       const uint8x8_t a) {

   const uint32x2_t a_u32 = vreinterpret_u32_u8(a);

   if (stride == 4) {

@@ -121,7 +140,8 @@

 // Load 4 sets of 4 bytes when alignment is not guaranteed.

-static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) {

+static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf,

+                                            ptrdiff_t stride) {

   uint32_t a;

   uint32x4_t a_u32 = vdupq_n_u32(0);

   if (stride == 4) return vld1q_u8(buf);

@@ -141,7 +161,7 @@

 // Store 4 sets of 4 bytes when alignment is not guaranteed.

-static INLINE void store_unaligned_u8q(uint8_t *buf, int stride,

+static INLINE void store_unaligned_u8q(uint8_t *buf, ptrdiff_t stride,

                                        const uint8x16_t a) {

   const uint32x4_t a_u32 = vreinterpretq_u32_u8(a);

   if (stride == 4) {

@@ -158,7 +178,7 @@

 // Load 2 sets of 4 bytes when alignment is guaranteed.

-static INLINE uint8x8_t load_u8(const uint8_t *buf, int stride) {

+static INLINE uint8x8_t load_u8(const uint8_t *buf, ptrdiff_t stride) {

   uint32x2_t a = vdup_n_u32(0);

   assert(!((intptr_t)buf % sizeof(uint32_t)));

@@ -171,7 +191,7 @@

 // Store 2 sets of 4 bytes when alignment is guaranteed.

-static INLINE void store_u8(uint8_t *buf, int stride, const uint8x8_t a) {

+static INLINE void store_u8(uint8_t *buf, ptrdiff_t stride, const uint8x8_t a) {

   uint32x2_t a_u32 = vreinterpret_u32_u8(a);

   assert(!((intptr_t)buf % sizeof(uint32_t)));

--- a/vpx_dsp/arm/sad4d_neon.c

+++ b/vpx_dsp/arm/sad4d_neon.c

@@ -34,7 +34,9 @@

                             uint32_t *const res) {

   int i;

   uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) };

+#if !defined(__aarch64__)

   uint16x4_t a[2];

+#endif

   uint32x4_t r;

   assert(!((intptr_t)src_ptr % sizeof(uint32_t)));

@@ -51,9 +53,14 @@

     abs[1] = vabal_u8(abs[1], s, ref23);

+#if defined(__aarch64__)

+  abs[0] = vpaddq_u16(abs[0], abs[1]);

+  r = vpaddlq_u16(abs[0]);

+#else

   a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0]));

   a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1]));

   r = vpaddlq_u16(vcombine_u16(a[0], a[1]));

+#endif

   vst1q_u32(res, r);

@@ -74,6 +81,12 @@

 // Can handle 512 pixels' sad sum (such as 16x32 or 32x16)

 static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,

                                           uint32_t *const res) {

+#if defined(__aarch64__)

+  const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);

+  const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);

+  const uint16x8_t b0 = vpaddq_u16(a0, a1);

+  const uint32x4_t r = vpaddlq_u16(b0);

+#else

   const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));

   const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));

   const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2]));

@@ -81,12 +94,23 @@

   const uint16x4_t b0 = vpadd_u16(a0, a1);

   const uint16x4_t b1 = vpadd_u16(a2, a3);

   const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1));

+#endif

   vst1q_u32(res, r);

+#if defined(__arm__) || !defined(__ARM_FEATURE_DOTPROD)

 // Can handle 1024 pixels' sad sum (such as 32x32)

 static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,

                                            uint32_t *const res) {

+#if defined(__aarch64__)

+  const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);

+  const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);

+  const uint32x4_t b0 = vpaddlq_u16(a0);

+  const uint32x4_t b1 = vpaddlq_u16(a1);

+  const uint32x4_t r = vpaddq_u32(b0, b1);

+  vst1q_u32(res, r);

+#else

   const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));

   const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));

   const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2]));

@@ -96,15 +120,26 @@

   const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0));

   const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1));

   vst1q_u32(res, vcombine_u32(c0, c1));

+#endif

 // Can handle 2048 pixels' sad sum (such as 32x64 or 64x32)

 static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,

                                            uint32_t *const res) {

+#if defined(__aarch64__)

   const uint32x4_t a0 = vpaddlq_u16(sum[0]);

   const uint32x4_t a1 = vpaddlq_u16(sum[1]);

   const uint32x4_t a2 = vpaddlq_u16(sum[2]);

   const uint32x4_t a3 = vpaddlq_u16(sum[3]);

+  const uint32x4_t b0 = vpaddq_u32(a0, a1);

+  const uint32x4_t b1 = vpaddq_u32(a2, a3);

+  const uint32x4_t r = vpaddq_u32(b0, b1);

+  vst1q_u32(res, r);

+#else

+  const uint32x4_t a0 = vpaddlq_u16(sum[0]);

+  const uint32x4_t a1 = vpaddlq_u16(sum[1]);

+  const uint32x4_t a2 = vpaddlq_u16(sum[2]);

+  const uint32x4_t a3 = vpaddlq_u16(sum[3]);

   const uint32x2_t b0 = vadd_u32(vget_low_u32(a0), vget_high_u32(a0));

   const uint32x2_t b1 = vadd_u32(vget_low_u32(a1), vget_high_u32(a1));

   const uint32x2_t b2 = vadd_u32(vget_low_u32(a2), vget_high_u32(a2));

@@ -112,11 +147,13 @@

   const uint32x2_t c0 = vpadd_u32(b0, b1);

   const uint32x2_t c1 = vpadd_u32(b2, b3);

   vst1q_u32(res, vcombine_u32(c0, c1));

+#endif

 // Can handle 4096 pixels' sad sum (such as 64x64)

 static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,

                                            uint32_t *const res) {

+#if defined(__aarch64__)

   const uint32x4_t a0 = vpaddlq_u16(sum[0]);

   const uint32x4_t a1 = vpaddlq_u16(sum[1]);

   const uint32x4_t a2 = vpaddlq_u16(sum[2]);

@@ -129,6 +166,23 @@

   const uint32x4_t b1 = vaddq_u32(a2, a3);

   const uint32x4_t b2 = vaddq_u32(a4, a5);

   const uint32x4_t b3 = vaddq_u32(a6, a7);

+  const uint32x4_t c0 = vpaddq_u32(b0, b1);

+  const uint32x4_t c1 = vpaddq_u32(b2, b3);

+  const uint32x4_t r = vpaddq_u32(c0, c1);

+  vst1q_u32(res, r);

+#else

+  const uint32x4_t a0 = vpaddlq_u16(sum[0]);

+  const uint32x4_t a1 = vpaddlq_u16(sum[1]);

+  const uint32x4_t a2 = vpaddlq_u16(sum[2]);

+  const uint32x4_t a3 = vpaddlq_u16(sum[3]);

+  const uint32x4_t a4 = vpaddlq_u16(sum[4]);

+  const uint32x4_t a5 = vpaddlq_u16(sum[5]);

+  const uint32x4_t a6 = vpaddlq_u16(sum[6]);

+  const uint32x4_t a7 = vpaddlq_u16(sum[7]);

+  const uint32x4_t b0 = vaddq_u32(a0, a1);

+  const uint32x4_t b1 = vaddq_u32(a2, a3);

+  const uint32x4_t b2 = vaddq_u32(a4, a5);

+  const uint32x4_t b3 = vaddq_u32(a6, a7);

   const uint32x2_t c0 = vadd_u32(vget_low_u32(b0), vget_high_u32(b0));

   const uint32x2_t c1 = vadd_u32(vget_low_u32(b1), vget_high_u32(b1));

   const uint32x2_t c2 = vadd_u32(vget_low_u32(b2), vget_high_u32(b2));

@@ -136,8 +190,11 @@

   const uint32x2_t d0 = vpadd_u32(c0, c1);

   const uint32x2_t d1 = vpadd_u32(c2, c3);

   vst1q_u32(res, vcombine_u32(d0, d1));

+#endif

+#endif

 static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,

                             const uint8_t *const ref_array[4], int ref_stride,

                             uint32_t *res, const int height) {

@@ -180,7 +237,42 @@

 ////////////////////////////////////////////////////////////////////////////////

+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \

+    (__ARM_FEATURE_DOTPROD == 1)

 static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,

+                              uint32x4_t *const sum) {

+  const uint8x16_t r = vld1q_u8(ref_ptr);

+  const uint8x16_t diff = vabdq_u8(src_ptr, r);

+  *sum = vdotq_u32(*sum, diff, vdupq_n_u8(1));

+}

+static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,

+                             const uint8_t *const ref_array[4], int ref_stride,

+                             uint32_t *res, const int height) {

+  int i;

+  uint32x4_t r0, r1;

+  const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

+                                 ref_array[3] };

+  uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),

+                        vdupq_n_u32(0) };

+  for (i = 0; i < height; ++i) {

+    const uint8x16_t s = vld1q_u8(src_ptr + i * src_stride);

+    sad16_neon(ref_loop[0] + i * ref_stride, s, &sum[0]);

+    sad16_neon(ref_loop[1] + i * ref_stride, s, &sum[1]);

+    sad16_neon(ref_loop[2] + i * ref_stride, s, &sum[2]);

+    sad16_neon(ref_loop[3] + i * ref_stride, s, &sum[3]);

+  }

+  r0 = vpaddq_u32(sum[0], sum[1]);

+  r1 = vpaddq_u32(sum[2], sum[3]);

+  vst1q_u32(res, vpaddq_u32(r0, r1));

+}

+#else

+static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,

                               uint16x8_t *const sum) {

   const uint8x16_t r = vld1q_u8(ref_ptr);

   *sum = vabal_u8(*sum, vget_low_u8(src_ptr), vget_low_u8(r));

@@ -190,7 +282,7 @@

 static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,

                              const uint8_t *const ref_array[4], int ref_stride,

                              uint32_t *res, const int height) {

-  int i, j;

+  int i;

   const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

                                  ref_array[3] };

   uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),

@@ -199,15 +291,22 @@

   for (i = 0; i < height; ++i) {

     const uint8x16_t s = vld1q_u8(src_ptr);

     src_ptr += src_stride;

-    for (j = 0; j < 4; ++j) {

-      sad16_neon(ref_loop[j], s, &sum[j]);

-      ref_loop[j] += ref_stride;

-    }

+    /* Manual unrolling here stops the compiler from getting confused. */

+    sad16_neon(ref_loop[0], s, &sum[0]);

+    ref_loop[0] += ref_stride;

+    sad16_neon(ref_loop[1], s, &sum[1]);

+    ref_loop[1] += ref_stride;

+    sad16_neon(ref_loop[2], s, &sum[2]);

+    ref_loop[2] += ref_stride;

+    sad16_neon(ref_loop[3], s, &sum[3]);

+    ref_loop[3] += ref_stride;

   sad_512_pel_final_neon(sum, res);

+#endif

 void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride,

                          const uint8_t *const ref_array[4], int ref_stride,

                          uint32_t *res) {

@@ -228,8 +327,69 @@

 ////////////////////////////////////////////////////////////////////////////////

+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \

+    (__ARM_FEATURE_DOTPROD == 1)

 static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,

                              const uint8_t *const ref_array[4], int ref_stride,

+                             uint32_t *res, const int height) {

+  int i;

+  uint32x4_t r0, r1;

+  const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

+                                 ref_array[3] };

+  uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),

+                        vdupq_n_u32(0) };

+  for (i = 0; i < height; ++i) {

+    uint8x16_t s;

+    s = vld1q_u8(src_ptr + 0 * 16);

+    sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);

+    s = vld1q_u8(src_ptr + 1 * 16);

+    sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);

+    src_ptr += src_stride;

+    ref_loop[0] += ref_stride;

+    ref_loop[1] += ref_stride;

+    ref_loop[2] += ref_stride;

+    ref_loop[3] += ref_stride;

+  }

+  r0 = vpaddq_u32(sum[0], sum[1]);

+  r1 = vpaddq_u32(sum[2], sum[3]);

+  vst1q_u32(res, vpaddq_u32(r0, r1));

+}

+void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,

+                          const uint8_t *const ref_array[4], int ref_stride,

+                          uint32_t *res) {

+  sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);

+}

+void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,

+                          const uint8_t *const ref_array[4], int ref_stride,

+                          uint32_t *res) {

+  sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);

+}

+void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,

+                          const uint8_t *const ref_array[4], int ref_stride,

+                          uint32_t *res) {

+  sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 64);

+}

+#else

+static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,

+                             const uint8_t *const ref_array[4], int ref_stride,

                              const int height, uint16x8_t *const sum) {

   int i;

   const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

@@ -284,14 +444,124 @@

   sad_2048_pel_final_neon(sum, res);

-////////////////////////////////////////////////////////////////////////////////

+#endif

+////////////////////////////////////////////////////////////////////////////////

+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \

+    (__ARM_FEATURE_DOTPROD == 1)

 void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,

                           const uint8_t *const ref_array[4], int ref_stride,

                           uint32_t *res) {

   int i;

+  uint32x4_t r0, r1;

   const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

                                  ref_array[3] };

+  uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),

+                        vdupq_n_u32(0) };

+  for (i = 0; i < 32; ++i) {

+    uint8x16_t s;

+    s = vld1q_u8(src_ptr + 0 * 16);

+    sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);

+    s = vld1q_u8(src_ptr + 1 * 16);

+    sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);

+    s = vld1q_u8(src_ptr + 2 * 16);

+    sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]);

+    s = vld1q_u8(src_ptr + 3 * 16);

+    sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]);

+    src_ptr += src_stride;

+    ref_loop[0] += ref_stride;

+    ref_loop[1] += ref_stride;

+    ref_loop[2] += ref_stride;

+    ref_loop[3] += ref_stride;

+  }

+  r0 = vpaddq_u32(sum[0], sum[1]);

+  r1 = vpaddq_u32(sum[2], sum[3]);

+  vst1q_u32(res, vpaddq_u32(r0, r1));

+}

+void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,

+                          const uint8_t *const ref_array[4], int ref_stride,

+                          uint32_t *res) {

+  int i;

+  uint32x4_t r0, r1, r2, r3;

+  const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

+                                 ref_array[3] };

+  uint32x4_t sum[8] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),

+                        vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),

+                        vdupq_n_u32(0), vdupq_n_u32(0) };

+  for (i = 0; i < 64; ++i) {

+    uint8x16_t s;

+    s = vld1q_u8(src_ptr + 0 * 16);

+    sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]);

+    sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]);

+    s = vld1q_u8(src_ptr + 1 * 16);

+    sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);

+    sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]);

+    sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]);

+    sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]);

+    s = vld1q_u8(src_ptr + 2 * 16);

+    sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]);

+    sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]);

+    sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]);

+    s = vld1q_u8(src_ptr + 3 * 16);

+    sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]);

+    sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]);

+    sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]);

+    sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]);

+    src_ptr += src_stride;

+    ref_loop[0] += ref_stride;

+    ref_loop[1] += ref_stride;

+    ref_loop[2] += ref_stride;

+    ref_loop[3] += ref_stride;

+  }

+  r0 = vpaddq_u32(sum[0], sum[1]);

+  r1 = vpaddq_u32(sum[2], sum[3]);

+  r2 = vpaddq_u32(sum[4], sum[5]);

+  r3 = vpaddq_u32(sum[6], sum[7]);

+  r0 = vpaddq_u32(r0, r1);

+  r1 = vpaddq_u32(r2, r3);

+  vst1q_u32(res, vpaddq_u32(r0, r1));

+}

+#else

+void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,

+                          const uint8_t *const ref_array[4], int ref_stride,

+                          uint32_t *res) {

+  int i;

+  const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],

+                                 ref_array[3] };

   uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),

                         vdupq_n_u16(0) };

@@ -378,3 +648,5 @@

   sad_4096_pel_final_neon(sum, res);

+#endif

--- a/vpx_dsp/arm/sad_neon.c

+++ b/vpx_dsp/arm/sad_neon.c

@@ -23,7 +23,7 @@

   const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride);

   uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8));

   abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8));

-  return vget_lane_u32(horizontal_add_uint16x8(abs), 0);

+  return horizontal_add_uint16x8(abs);

 uint32_t vpx_sad4x4_avg_neon(const uint8_t *src_ptr, int src_stride,

@@ -35,7 +35,7 @@

   const uint8x16_t avg = vrhaddq_u8(ref_u8, second_pred_u8);

   uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(avg));

   abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg));

-  return vget_lane_u32(horizontal_add_uint16x8(abs), 0);

+  return horizontal_add_uint16x8(abs);

 uint32_t vpx_sad4x8_neon(const uint8_t *src_ptr, int src_stride,

@@ -51,7 +51,7 @@

     abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8));

-  return vget_lane_u32(horizontal_add_uint16x8(abs), 0);

+  return horizontal_add_uint16x8(abs);

 uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride,

@@ -71,7 +71,7 @@

     abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg));

-  return vget_lane_u32(horizontal_add_uint16x8(abs), 0);

+  return horizontal_add_uint16x8(abs);

 static INLINE uint16x8_t sad8x(const uint8_t *src_ptr, int src_stride,

@@ -114,7 +114,7 @@

   uint32_t vpx_sad8x##n##_neon(const uint8_t *src_ptr, int src_stride,         \

                                const uint8_t *ref_ptr, int ref_stride) {       \

     const uint16x8_t abs = sad8x(src_ptr, src_stride, ref_ptr, ref_stride, n); \

-    return vget_lane_u32(horizontal_add_uint16x8(abs), 0);                     \

+    return horizontal_add_uint16x8(abs);                                       \

   }                                                                            \

   uint32_t vpx_sad8x##n##_avg_neon(const uint8_t *src_ptr, int src_stride,     \

@@ -122,7 +122,7 @@

                                    const uint8_t *second_pred) {               \

     const uint16x8_t abs =                                                     \

         sad8x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n);   \

-    return vget_lane_u32(horizontal_add_uint16x8(abs), 0);                     \

+    return horizontal_add_uint16x8(abs);                                       \

 sad8xN(4);

@@ -172,7 +172,7 @@

                                 const uint8_t *ref_ptr, int ref_stride) {     \

     const uint16x8_t abs =                                                    \

         sad16x(src_ptr, src_stride, ref_ptr, ref_stride, n);                  \

-    return vget_lane_u32(horizontal_add_uint16x8(abs), 0);                    \

+    return horizontal_add_uint16x8(abs);                                      \

   }                                                                           \

   uint32_t vpx_sad16x##n##_avg_neon(const uint8_t *src_ptr, int src_stride,   \

@@ -180,7 +180,7 @@

                                     const uint8_t *second_pred) {             \

     const uint16x8_t abs =                                                    \

         sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \

-    return vget_lane_u32(horizontal_add_uint16x8(abs), 0);                    \

+    return horizontal_add_uint16x8(abs);                                      \

 sad16xN(8);

@@ -240,7 +240,7 @@

                                 const uint8_t *ref_ptr, int ref_stride) {     \

     const uint16x8_t abs =                                                    \

         sad32x(src_ptr, src_stride, ref_ptr, ref_stride, n);                  \

-    return vget_lane_u32(horizontal_add_uint16x8(abs), 0);                    \

+    return horizontal_add_uint16x8(abs);                                      \

   }                                                                           \

   uint32_t vpx_sad32x##n##_avg_neon(const uint8_t *src_ptr, int src_stride,   \

@@ -248,7 +248,7 @@

                                     const uint8_t *second_pred) {             \

     const uint16x8_t abs =                                                    \

         sad32x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \

-    return vget_lane_u32(horizontal_add_uint16x8(abs), 0);                    \

+    return horizontal_add_uint16x8(abs);                                      \

 sad32xN(16);

@@ -338,7 +338,7 @@

                                 const uint8_t *ref_ptr, int ref_stride) {     \

     const uint32x4_t abs =                                                    \

         sad64x(src_ptr, src_stride, ref_ptr, ref_stride, n);                  \

-    return vget_lane_u32(horizontal_add_uint32x4(abs), 0);                    \

+    return horizontal_add_uint32x4(abs);                                      \

   }                                                                           \

   uint32_t vpx_sad64x##n##_avg_neon(const uint8_t *src_ptr, int src_stride,   \

@@ -346,7 +346,7 @@

                                     const uint8_t *second_pred) {             \

     const uint32x4_t abs =                                                    \

         sad64x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \

-    return vget_lane_u32(horizontal_add_uint32x4(abs), 0);                    \

+    return horizontal_add_uint32x4(abs);                                      \

 sad64xN(32);

--- a/vpx_dsp/arm/sum_neon.h

+++ b/vpx_dsp/arm/sum_neon.h

@@ -16,23 +16,65 @@

 #include "./vpx_config.h"

 #include "vpx/vpx_integer.h"

-static INLINE int32x2_t horizontal_add_int16x8(const int16x8_t a) {

+static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) {

+#if defined(__aarch64__)

+  return vaddlvq_s16(a);

+#else

   const int32x4_t b = vpaddlq_s16(a);

   const int64x2_t c = vpaddlq_s32(b);

-  return vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)),

-                  vreinterpret_s32_s64(vget_high_s64(c)));

+  const int32x2_t d = vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)),

+                               vreinterpret_s32_s64(vget_high_s64(c)));

+  return vget_lane_s32(d, 0);

+#endif

-static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) {

+static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) {

+#if defined(__aarch64__)

+  return vaddlvq_u16(a);

+#else

   const uint32x4_t b = vpaddlq_u16(a);

   const uint64x2_t c = vpaddlq_u32(b);

-  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),

-                  vreinterpret_u32_u64(vget_high_u64(c)));

+  const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),

+                                vreinterpret_u32_u64(vget_high_u64(c)));

+  return vget_lane_u32(d, 0);

+#endif

-static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {

+static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) {

+#if defined(__aarch64__)

+  return vaddv_s32(a);

+#else

+  return vget_lane_s32(a, 0) + vget_lane_s32(a, 1);

+#endif

+}

+static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) {

+#if defined(__aarch64__)

+  return vaddv_u32(a);

+#else

+  return vget_lane_u32(a, 0) + vget_lane_u32(a, 1);

+#endif

+}

+static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) {

+#if defined(__aarch64__)

+  return vaddvq_s32(a);

+#else

+  const int64x2_t b = vpaddlq_s32(a);

+  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),

+                               vreinterpret_s32_s64(vget_high_s64(b)));

+  return vget_lane_s32(c, 0);

+#endif

+}

+static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) {

+#if defined(__aarch64__)

+  return vaddvq_u32(a);

+#else

   const uint64x2_t b = vpaddlq_u32(a);

-  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),

-                  vreinterpret_u32_u64(vget_high_u64(b)));

+  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),

+                                vreinterpret_u32_u64(vget_high_u64(b)));

+  return vget_lane_u32(c, 0);

+#endif

 #endif  // VPX_VPX_DSP_ARM_SUM_NEON_H_

--- a/vpx_dsp/arm/variance_neon.c

+++ b/vpx_dsp/arm/variance_neon.c

@@ -19,6 +19,100 @@

 #include "vpx_dsp/arm/sum_neon.h"

 #include "vpx_ports/mem.h"

+#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1)

+// Process a block of width 4 four rows at a time.

+static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride,

+                               const uint8_t *ref_ptr, int ref_stride, int h,

+                               uint32_t *sse, int *sum) {

+  int i;

+  uint32x4_t sum_a = vdupq_n_u32(0);

+  uint32x4_t sum_b = vdupq_n_u32(0);

+  uint32x4_t sse_u32 = vdupq_n_u32(0);

+  for (i = 0; i < h; i += 4) {

+    const uint8x16_t a = load_unaligned_u8q(src_ptr, src_stride);

+    const uint8x16_t b = load_unaligned_u8q(ref_ptr, ref_stride);

+    const uint8x16_t abs_diff = vabdq_u8(a, b);

+    sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);

+    sum_a = vdotq_u32(sum_a, a, vdupq_n_u8(1));

+    sum_b = vdotq_u32(sum_b, b, vdupq_n_u8(1));

+    src_ptr += 4 * src_stride;

+    ref_ptr += 4 * ref_stride;

+  }

+  *sum = horizontal_add_int32x4(vreinterpretq_s32_u32(vsubq_u32(sum_a, sum_b)));

+  *sse = horizontal_add_uint32x4(sse_u32);

+}

+// Process a block of any size where the width is divisible by 16.

+static void variance_neon_w16(const uint8_t *src_ptr, int src_stride,

+                              const uint8_t *ref_ptr, int ref_stride, int w,

+                              int h, uint32_t *sse, int *sum) {

+  int i, j;

+  uint32x4_t sum_a = vdupq_n_u32(0);

+  uint32x4_t sum_b = vdupq_n_u32(0);

+  uint32x4_t sse_u32 = vdupq_n_u32(0);

+  for (i = 0; i < h; ++i) {

+    for (j = 0; j < w; j += 16) {

+      const uint8x16_t a = vld1q_u8(src_ptr + j);

+      const uint8x16_t b = vld1q_u8(ref_ptr + j);

+      const uint8x16_t abs_diff = vabdq_u8(a, b);

+      sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);

+      sum_a = vdotq_u32(sum_a, a, vdupq_n_u8(1));

+      sum_b = vdotq_u32(sum_b, b, vdupq_n_u8(1));

+    }

+    src_ptr += src_stride;

+    ref_ptr += ref_stride;

+  }

+  *sum = horizontal_add_int32x4(vreinterpretq_s32_u32(vsubq_u32(sum_a, sum_b)));

+  *sse = horizontal_add_uint32x4(sse_u32);

+}

+// Process a block of width 8 two rows at a time.

+static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride,

+                               const uint8_t *ref_ptr, int ref_stride, int h,

+                               uint32_t *sse, int *sum) {

+  int i = 0;

+  uint32x2_t sum_a = vdup_n_u32(0);

+  uint32x2_t sum_b = vdup_n_u32(0);

+  uint32x2_t sse_lo_u32 = vdup_n_u32(0);

+  uint32x2_t sse_hi_u32 = vdup_n_u32(0);

+  do {

+    const uint8x8_t a_0 = vld1_u8(src_ptr);

+    const uint8x8_t a_1 = vld1_u8(src_ptr + src_stride);

+    const uint8x8_t b_0 = vld1_u8(ref_ptr);

+    const uint8x8_t b_1 = vld1_u8(ref_ptr + ref_stride);

+    const uint8x8_t abs_diff_0 = vabd_u8(a_0, b_0);

+    const uint8x8_t abs_diff_1 = vabd_u8(a_1, b_1);

+    sse_lo_u32 = vdot_u32(sse_lo_u32, abs_diff_0, abs_diff_0);

+    sse_hi_u32 = vdot_u32(sse_hi_u32, abs_diff_1, abs_diff_1);

+    sum_a = vdot_u32(sum_a, a_0, vdup_n_u8(1));

+    sum_b = vdot_u32(sum_b, b_0, vdup_n_u8(1));

+    sum_a = vdot_u32(sum_a, a_1, vdup_n_u8(1));

+    sum_b = vdot_u32(sum_b, b_1, vdup_n_u8(1));

+    src_ptr += src_stride + src_stride;

+    ref_ptr += ref_stride + ref_stride;

+    i += 2;

+  } while (i < h);

+  *sum = horizontal_add_int32x2(vreinterpret_s32_u32(vsub_u32(sum_a, sum_b)));

+  *sse = horizontal_add_uint32x2(vadd_u32(sse_lo_u32, sse_hi_u32));

+}

+#else

 // The variance helper functions use int16_t for sum. 8 values are accumulated

 // and then added (at which point they expand up to int32_t). To avoid overflow,

 // there can be no more than 32767 / 255 ~= 128 values accumulated in each

@@ -66,10 +160,9 @@

     ref_ptr += 4 * ref_stride;

-  *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);

-  *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32(

-                           vaddq_s32(sse_lo_s32, sse_hi_s32))),

-                       0);

+  *sum = horizontal_add_int16x8(sum_s16);

+  *sse = horizontal_add_uint32x4(

+      vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));

 // Process a block of any size where the width is divisible by 16.

@@ -115,10 +208,9 @@

     ref_ptr += ref_stride;

-  *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);

-  *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32(

-                           vaddq_s32(sse_lo_s32, sse_hi_s32))),

-                       0);

+  *sum = horizontal_add_int16x8(sum_s16);

+  *sse = horizontal_add_uint32x4(

+      vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));

 // Process a block of width 8 two rows at a time.

@@ -157,12 +249,13 @@

     i += 2;

   } while (i < h);

-  *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);

-  *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32(

-                           vaddq_s32(sse_lo_s32, sse_hi_s32))),

-                       0);

+  *sum = horizontal_add_int16x8(sum_s16);

+  *sse = horizontal_add_uint32x4(

+      vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));

+#endif

 void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride,

                         const uint8_t *ref_ptr, int ref_stride,

                         unsigned int *sse, int *sum) {

@@ -264,117 +357,165 @@

   return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);

+#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1)

 unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,

                                const unsigned char *ref_ptr, int ref_stride,

                                unsigned int *sse) {

   int i;

-  int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;

-  int64x1_t d0s64;

-  uint8x16_t q0u8, q1u8, q2u8, q3u8;

-  int32x4_t q7s32, q8s32, q9s32, q10s32;

-  uint16x8_t q11u16, q12u16, q13u16, q14u16;

-  int64x2_t q1s64;

+  uint8x16_t a[2], b[2], abs_diff[2];

+  uint32x4_t sse_vec[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };

-  q7s32 = vdupq_n_s32(0);

-  q8s32 = vdupq_n_s32(0);

-  q9s32 = vdupq_n_s32(0);

-  q10s32 = vdupq_n_s32(0);

-  for (i = 0; i < 8; i++) {  // mse16x16_neon_loop

-    q0u8 = vld1q_u8(src_ptr);

+  for (i = 0; i < 8; i++) {

+    a[0] = vld1q_u8(src_ptr);

     src_ptr += src_stride;

-    q1u8 = vld1q_u8(src_ptr);

+    a[1] = vld1q_u8(src_ptr);

     src_ptr += src_stride;

-    q2u8 = vld1q_u8(ref_ptr);

+    b[0] = vld1q_u8(ref_ptr);

     ref_ptr += ref_stride;

-    q3u8 = vld1q_u8(ref_ptr);

+    b[1] = vld1q_u8(ref_ptr);

     ref_ptr += ref_stride;

-    q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));

-    q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));

-    q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));

-    q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));

+    abs_diff[0] = vabdq_u8(a[0], b[0]);

+    abs_diff[1] = vabdq_u8(a[1], b[1]);

-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));

-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));

-    q7s32 = vmlal_s16(q7s32, d22s16, d22s16);

-    q8s32 = vmlal_s16(q8s32, d23s16, d23s16);

+    sse_vec[0] = vdotq_u32(sse_vec[0], abs_diff[0], abs_diff[0]);

+    sse_vec[1] = vdotq_u32(sse_vec[1], abs_diff[1], abs_diff[1]);

+  }

-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));

-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));

-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);

-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);

+  *sse = horizontal_add_uint32x4(vaddq_u32(sse_vec[0], sse_vec[1]));

+  return horizontal_add_uint32x4(vaddq_u32(sse_vec[0], sse_vec[1]));

+}

-    d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));

-    d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));

-    q7s32 = vmlal_s16(q7s32, d26s16, d26s16);

-    q8s32 = vmlal_s16(q8s32, d27s16, d27s16);

+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,

+                                   const unsigned char *ref_ptr,

+                                   int ref_stride) {

+  uint8x8_t a[4], b[4], abs_diff[4];

+  uint32x2_t sse = vdup_n_u32(0);

+  a[0] = vld1_u8(src_ptr);

+  src_ptr += src_stride;

+  b[0] = vld1_u8(ref_ptr);

+  ref_ptr += ref_stride;

+  a[1] = vld1_u8(src_ptr);

+  src_ptr += src_stride;

+  b[1] = vld1_u8(ref_ptr);

+  ref_ptr += ref_stride;

+  a[2] = vld1_u8(src_ptr);

+  src_ptr += src_stride;

+  b[2] = vld1_u8(ref_ptr);

+  ref_ptr += ref_stride;

+  a[3] = vld1_u8(src_ptr);

+  b[3] = vld1_u8(ref_ptr);

+  abs_diff[0] = vabd_u8(a[0], b[0]);

+  abs_diff[1] = vabd_u8(a[1], b[1]);

+  abs_diff[2] = vabd_u8(a[2], b[2]);

+  abs_diff[3] = vabd_u8(a[3], b[3]);

+  sse = vdot_u32(sse, abs_diff[0], abs_diff[0]);

+  sse = vdot_u32(sse, abs_diff[1], abs_diff[1]);

+  sse = vdot_u32(sse, abs_diff[2], abs_diff[2]);

+  sse = vdot_u32(sse, abs_diff[3], abs_diff[3]);

-    d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));

-    d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));

-    q9s32 = vmlal_s16(q9s32, d28s16, d28s16);

-    q10s32 = vmlal_s16(q10s32, d29s16, d29s16);

-  }

+  return vget_lane_u32(sse, 0);

+}

-  q7s32 = vaddq_s32(q7s32, q8s32);

-  q9s32 = vaddq_s32(q9s32, q10s32);

-  q10s32 = vaddq_s32(q7s32, q9s32);

+#else

-  q1s64 = vpaddlq_s32(q10s32);

-  d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));

+unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,

+                               const unsigned char *ref_ptr, int ref_stride,

+                               unsigned int *sse) {

+  int i;

+  uint8x16_t a[2], b[2];

+  int16x4_t diff_lo[4], diff_hi[4];

+  uint16x8_t diff[4];

+  int32x4_t sse_vec[4] = { vdupq_n_s32(0), vdupq_n_s32(0), vdupq_n_s32(0),

+                           vdupq_n_s32(0) };

-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);

-  return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);

+  for (i = 0; i < 8; i++) {

+    a[0] = vld1q_u8(src_ptr);

+    src_ptr += src_stride;

+    a[1] = vld1q_u8(src_ptr);

+    src_ptr += src_stride;

+    b[0] = vld1q_u8(ref_ptr);

+    ref_ptr += ref_stride;

+    b[1] = vld1q_u8(ref_ptr);

+    ref_ptr += ref_stride;

+    diff[0] = vsubl_u8(vget_low_u8(a[0]), vget_low_u8(b[0]));

+    diff[1] = vsubl_u8(vget_high_u8(a[0]), vget_high_u8(b[0]));

+    diff[2] = vsubl_u8(vget_low_u8(a[1]), vget_low_u8(b[1]));

+    diff[3] = vsubl_u8(vget_high_u8(a[1]), vget_high_u8(b[1]));

+    diff_lo[0] = vreinterpret_s16_u16(vget_low_u16(diff[0]));

+    diff_lo[1] = vreinterpret_s16_u16(vget_low_u16(diff[1]));

+    sse_vec[0] = vmlal_s16(sse_vec[0], diff_lo[0], diff_lo[0]);

+    sse_vec[1] = vmlal_s16(sse_vec[1], diff_lo[1], diff_lo[1]);

+    diff_lo[2] = vreinterpret_s16_u16(vget_low_u16(diff[2]));

+    diff_lo[3] = vreinterpret_s16_u16(vget_low_u16(diff[3]));

+    sse_vec[2] = vmlal_s16(sse_vec[2], diff_lo[2], diff_lo[2]);

+    sse_vec[3] = vmlal_s16(sse_vec[3], diff_lo[3], diff_lo[3]);

+    diff_hi[0] = vreinterpret_s16_u16(vget_high_u16(diff[0]));

+    diff_hi[1] = vreinterpret_s16_u16(vget_high_u16(diff[1]));

+    sse_vec[0] = vmlal_s16(sse_vec[0], diff_hi[0], diff_hi[0]);

+    sse_vec[1] = vmlal_s16(sse_vec[1], diff_hi[1], diff_hi[1]);

+    diff_hi[2] = vreinterpret_s16_u16(vget_high_u16(diff[2]));

+    diff_hi[3] = vreinterpret_s16_u16(vget_high_u16(diff[3]));

+    sse_vec[2] = vmlal_s16(sse_vec[2], diff_hi[2], diff_hi[2]);

+    sse_vec[3] = vmlal_s16(sse_vec[3], diff_hi[3], diff_hi[3]);

+  }

+  sse_vec[0] = vaddq_s32(sse_vec[0], sse_vec[1]);

+  sse_vec[2] = vaddq_s32(sse_vec[2], sse_vec[3]);

+  sse_vec[0] = vaddq_s32(sse_vec[0], sse_vec[2]);

+  *sse = horizontal_add_uint32x4(vreinterpretq_u32_s32(sse_vec[0]));

+  return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse_vec[0]));

 unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,

                                    const unsigned char *ref_ptr,

                                    int ref_stride) {

-  int16x4_t d22s16, d24s16, d26s16, d28s16;

-  int64x1_t d0s64;

-  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;

-  int32x4_t q7s32, q8s32, q9s32, q10s32;

-  uint16x8_t q11u16, q12u16, q13u16, q14u16;

-  int64x2_t q1s64;

+  uint8x8_t a[4], b[4];

+  int16x4_t diff_lo[4];

+  uint16x8_t diff[4];

+  int32x4_t sse;

-  d0u8 = vld1_u8(src_ptr);

+  a[0] = vld1_u8(src_ptr);

   src_ptr += src_stride;

-  d4u8 = vld1_u8(ref_ptr);

+  b[0] = vld1_u8(ref_ptr);

   ref_ptr += ref_stride;

-  d1u8 = vld1_u8(src_ptr);

+  a[1] = vld1_u8(src_ptr);

   src_ptr += src_stride;

-  d5u8 = vld1_u8(ref_ptr);

+  b[1] = vld1_u8(ref_ptr);

   ref_ptr += ref_stride;

-  d2u8 = vld1_u8(src_ptr);

+  a[2] = vld1_u8(src_ptr);

   src_ptr += src_stride;

-  d6u8 = vld1_u8(ref_ptr);

+  b[2] = vld1_u8(ref_ptr);

   ref_ptr += ref_stride;

-  d3u8 = vld1_u8(src_ptr);

-  src_ptr += src_stride;

-  d7u8 = vld1_u8(ref_ptr);

-  ref_ptr += ref_stride;

+  a[3] = vld1_u8(src_ptr);

+  b[3] = vld1_u8(ref_ptr);

-  q11u16 = vsubl_u8(d0u8, d4u8);

-  q12u16 = vsubl_u8(d1u8, d5u8);

-  q13u16 = vsubl_u8(d2u8, d6u8);

-  q14u16 = vsubl_u8(d3u8, d7u8);

+  diff[0] = vsubl_u8(a[0], b[0]);

+  diff[1] = vsubl_u8(a[1], b[1]);

+  diff[2] = vsubl_u8(a[2], b[2]);

+  diff[3] = vsubl_u8(a[3], b[3]);

-  d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));

-  d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));

-  d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));

-  d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));

+  diff_lo[0] = vget_low_s16(vreinterpretq_s16_u16(diff[0]));

+  diff_lo[1] = vget_low_s16(vreinterpretq_s16_u16(diff[1]));

+  diff_lo[2] = vget_low_s16(vreinterpretq_s16_u16(diff[2]));

+  diff_lo[3] = vget_low_s16(vreinterpretq_s16_u16(diff[3]));

-  q7s32 = vmull_s16(d22s16, d22s16);

-  q8s32 = vmull_s16(d24s16, d24s16);

-  q9s32 = vmull_s16(d26s16, d26s16);

-  q10s32 = vmull_s16(d28s16, d28s16);

+  sse = vmull_s16(diff_lo[0], diff_lo[0]);

+  sse = vmlal_s16(sse, diff_lo[1], diff_lo[1]);

+  sse = vmlal_s16(sse, diff_lo[2], diff_lo[2]);

+  sse = vmlal_s16(sse, diff_lo[3], diff_lo[3]);

-  q7s32 = vaddq_s32(q7s32, q8s32);

-  q9s32 = vaddq_s32(q9s32, q10s32);

-  q9s32 = vaddq_s32(q7s32, q9s32);

-  q1s64 = vpaddlq_s32(q9s32);

-  d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));

-  return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);

+  return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse));

+#endif

--- a/vpx_dsp/arm/vpx_convolve8_neon.c

+++ b/vpx_dsp/arm/vpx_convolve8_neon.c

@@ -14,6 +14,7 @@

 #include "./vpx_config.h"

 #include "./vpx_dsp_rtcd.h"

 #include "vpx/vpx_integer.h"

+#include "vpx_dsp/arm/mem_neon.h"

 #include "vpx_dsp/arm/transpose_neon.h"

 #include "vpx_dsp/arm/vpx_convolve8_neon.h"

 #include "vpx_ports/mem.h"

@@ -30,6 +31,741 @@

 // instructions. This optimization is much faster in speed unit test, but slowed

 // down the whole decoder by 5%.

+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \

+    (__ARM_FEATURE_DOTPROD == 1)

+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = {

+  0, 1, 2,  3,  1, 2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6,

+  4, 5, 6,  7,  5, 6,  7,  8,  6,  7,  8,  9,  7,  8,  9,  10,

+  8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14

+};

+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_tran_concat_tbl[32]) = {

+  0, 8,  16, 24, 1, 9,  17, 25, 2, 10, 18, 26, 3, 11, 19, 27,

+  4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31

+};

+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_merge_block_tbl[48]) = {

+  /* Shift left and insert new last column in transposed 4x4 block. */

+  1, 2, 3, 16, 5, 6, 7, 20, 9, 10, 11, 24, 13, 14, 15, 28,

+  /* Shift left and insert two new columns in transposed 4x4 block. */

+  2, 3, 16, 17, 6, 7, 20, 21, 10, 11, 24, 25, 14, 15, 28, 29,

+  /* Shift left and insert three new columns in transposed 4x4 block. */

+  3, 16, 17, 18, 7, 20, 21, 22, 11, 24, 25, 26, 15, 28, 29, 30

+};

+static INLINE void transpose_concat_4x4(int8x8_t *a0, int8x8_t *a1,

+                                        int8x8_t *a2, int8x8_t *a3,

+                                        int8x16_t *b,

+                                        const uint8x16_t permute_tbl) {

+  /* Transpose 8-bit elements and concatenate result rows as follows:

+   * a0: 00, 01, 02, 03, XX, XX, XX, XX

+   * a1: 10, 11, 12, 13, XX, XX, XX, XX

+   * a2: 20, 21, 22, 23, XX, XX, XX, XX

+   * a3: 30, 31, 32, 33, XX, XX, XX, XX

+   *

+   * b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33

+   *

+   * The 'permute_tbl' is always 'dot_prod_tran_concat_tbl' above. Passing it

+   * as an argument is preferable to loading it directly from memory as this

+   * inline helper is called many times from the same parent function.

+   */

+  int8x16x2_t samples = { { vcombine_s8(*a0, *a1), vcombine_s8(*a2, *a3) } };

+  *b = vqtbl2q_s8(samples, permute_tbl);

+}

+static INLINE void transpose_concat_8x4(int8x8_t *a0, int8x8_t *a1,

+                                        int8x8_t *a2, int8x8_t *a3,

+                                        int8x16_t *b0, int8x16_t *b1,

+                                        const uint8x16x2_t permute_tbl) {

+  /* Transpose 8-bit elements and concatenate result rows as follows:

+   * a0: 00, 01, 02, 03, 04, 05, 06, 07

+   * a1: 10, 11, 12, 13, 14, 15, 16, 17

+   * a2: 20, 21, 22, 23, 24, 25, 26, 27

+   * a3: 30, 31, 32, 33, 34, 35, 36, 37

+   *

+   * b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33

+   * b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37

+   *

+   * The 'permute_tbl' is always 'dot_prod_tran_concat_tbl' above. Passing it

+   * as an argument is preferable to loading it directly from memory as this

+   * inline helper is called many times from the same parent function.

+   */

+  int8x16x2_t samples = { { vcombine_s8(*a0, *a1), vcombine_s8(*a2, *a3) } };

+  *b0 = vqtbl2q_s8(samples, permute_tbl.val[0]);

+  *b1 = vqtbl2q_s8(samples, permute_tbl.val[1]);

+}

+void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,

+                              uint8_t *dst, ptrdiff_t dst_stride,

+                              const InterpKernel *filter, int x0_q4,

+                              int x_step_q4, int y0_q4, int y_step_q4, int w,

+                              int h) {

+  const int8x8_t filters = vmovn_s16(vld1q_s16(filter[x0_q4]));

+  const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[x0_q4]), 128);

+  const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));

+  const uint8x16_t range_limit = vdupq_n_u8(128);

+  uint8x16_t s0, s1, s2, s3;

+  assert(!((intptr_t)dst & 3));

+  assert(!(dst_stride & 3));

+  assert(x_step_q4 == 16);

+  (void)x_step_q4;

+  (void)y0_q4;

+  (void)y_step_q4;

+  src -= 3;

+  if (w == 4) {

+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);

+    do {

+      int32x4_t t0, t1, t2, t3;

+      int16x8_t t01, t23;

+      uint8x8_t d01, d23;

+      s0 = vld1q_u8(src);

+      src += src_stride;

+      s1 = vld1q_u8(src);

+      src += src_stride;

+      s2 = vld1q_u8(src);

+      src += src_stride;

+      s3 = vld1q_u8(src);

+      src += src_stride;

+      t0 = convolve8_4_dot(s0, filters, correction, range_limit, permute_tbl);

+      t1 = convolve8_4_dot(s1, filters, correction, range_limit, permute_tbl);

+      t2 = convolve8_4_dot(s2, filters, correction, range_limit, permute_tbl);

+      t3 = convolve8_4_dot(s3, filters, correction, range_limit, permute_tbl);

+      t01 = vcombine_s16(vqmovn_s32(t0), vqmovn_s32(t1));

+      t23 = vcombine_s16(vqmovn_s32(t2), vqmovn_s32(t3));

+      d01 = vqrshrun_n_s16(t01, 7);

+      d23 = vqrshrun_n_s16(t23, 7);

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);

+      dst += dst_stride;

+      h -= 4;

+    } while (h > 0);

+  } else {

+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);

+    const uint8_t *s;

+    uint8_t *d;

+    int width;

+    uint8x8_t d0, d1, d2, d3;

+    do {

+      width = w;

+      s = src;

+      d = dst;

+      do {

+        s0 = vld1q_u8(s + 0 * src_stride);

+        s1 = vld1q_u8(s + 1 * src_stride);

+        s2 = vld1q_u8(s + 2 * src_stride);

+        s3 = vld1q_u8(s + 3 * src_stride);

+        d0 = convolve8_8_dot(s0, filters, correction, range_limit, permute_tbl);

+        d1 = convolve8_8_dot(s1, filters, correction, range_limit, permute_tbl);

+        d2 = convolve8_8_dot(s2, filters, correction, range_limit, permute_tbl);

+        d3 = convolve8_8_dot(s3, filters, correction, range_limit, permute_tbl);

+        vst1_u8(d + 0 * dst_stride, d0);

+        vst1_u8(d + 1 * dst_stride, d1);

+        vst1_u8(d + 2 * dst_stride, d2);

+        vst1_u8(d + 3 * dst_stride, d3);

+        s += 8;

+        d += 8;

+        width -= 8;

+      } while (width > 0);

+      src += 4 * src_stride;

+      dst += 4 * dst_stride;

+      h -= 4;

+    } while (h > 0);

+  }

+}

+void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,

+                                  uint8_t *dst, ptrdiff_t dst_stride,

+                                  const InterpKernel *filter, int x0_q4,

+                                  int x_step_q4, int y0_q4, int y_step_q4,

+                                  int w, int h) {

+  const int8x8_t filters = vmovn_s16(vld1q_s16(filter[x0_q4]));

+  const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[x0_q4]), 128);

+  const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));

+  const uint8x16_t range_limit = vdupq_n_u8(128);

+  uint8x16_t s0, s1, s2, s3;

+  assert(!((intptr_t)dst & 3));

+  assert(!(dst_stride & 3));

+  assert(x_step_q4 == 16);

+  (void)x_step_q4;

+  (void)y0_q4;

+  (void)y_step_q4;

+  src -= 3;

+  if (w == 4) {

+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);

+    do {

+      int32x4_t t0, t1, t2, t3;

+      int16x8_t t01, t23;

+      uint8x8_t d01, d23, dd01, dd23;

+      dd01 = vdup_n_u8(0);

+      dd23 = vdup_n_u8(0);

+      s0 = vld1q_u8(src);

+      src += src_stride;

+      s1 = vld1q_u8(src);

+      src += src_stride;

+      s2 = vld1q_u8(src);

+      src += src_stride;

+      s3 = vld1q_u8(src);

+      src += src_stride;

+      t0 = convolve8_4_dot(s0, filters, correction, range_limit, permute_tbl);

+      t1 = convolve8_4_dot(s1, filters, correction, range_limit, permute_tbl);

+      t2 = convolve8_4_dot(s2, filters, correction, range_limit, permute_tbl);

+      t3 = convolve8_4_dot(s3, filters, correction, range_limit, permute_tbl);

+      t01 = vcombine_s16(vqmovn_s32(t0), vqmovn_s32(t1));

+      t23 = vcombine_s16(vqmovn_s32(t2), vqmovn_s32(t3));

+      d01 = vqrshrun_n_s16(t01, 7);

+      d23 = vqrshrun_n_s16(t23, 7);

+      dd01 = load_u8(dst + 0 * dst_stride, dst_stride);

+      dd23 = load_u8(dst + 2 * dst_stride, dst_stride);

+      d01 = vrhadd_u8(d01, dd01);

+      d23 = vrhadd_u8(d23, dd23);

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);

+      dst += dst_stride;

+      h -= 4;

+    } while (h > 0);

+  } else {

+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);

+    const uint8_t *s;

+    uint8_t *d;

+    int width;

+    uint8x8_t d0, d1, d2, d3, dd0, dd1, dd2, dd3;

+    do {

+      width = w;

+      s = src;

+      d = dst;

+      do {

+        s0 = vld1q_u8(s + 0 * src_stride);

+        s1 = vld1q_u8(s + 1 * src_stride);

+        s2 = vld1q_u8(s + 2 * src_stride);

+        s3 = vld1q_u8(s + 3 * src_stride);

+        d0 = convolve8_8_dot(s0, filters, correction, range_limit, permute_tbl);

+        d1 = convolve8_8_dot(s1, filters, correction, range_limit, permute_tbl);

+        d2 = convolve8_8_dot(s2, filters, correction, range_limit, permute_tbl);

+        d3 = convolve8_8_dot(s3, filters, correction, range_limit, permute_tbl);

+        dd0 = vld1_u8(d + 0 * dst_stride);

+        dd1 = vld1_u8(d + 1 * dst_stride);

+        dd2 = vld1_u8(d + 2 * dst_stride);

+        dd3 = vld1_u8(d + 3 * dst_stride);

+        d0 = vrhadd_u8(d0, dd0);

+        d1 = vrhadd_u8(d1, dd1);

+        d2 = vrhadd_u8(d2, dd2);

+        d3 = vrhadd_u8(d3, dd3);

+        vst1_u8(d + 0 * dst_stride, d0);

+        vst1_u8(d + 1 * dst_stride, d1);

+        vst1_u8(d + 2 * dst_stride, d2);

+        vst1_u8(d + 3 * dst_stride, d3);

+        s += 8;

+        d += 8;

+        width -= 8;

+      } while (width > 0);

+      src += 4 * src_stride;

+      dst += 4 * dst_stride;

+      h -= 4;

+    } while (h > 0);

+  }

+}

+void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,

+                             uint8_t *dst, ptrdiff_t dst_stride,

+                             const InterpKernel *filter, int x0_q4,

+                             int x_step_q4, int y0_q4, int y_step_q4, int w,

+                             int h) {

+  const int8x8_t filters = vmovn_s16(vld1q_s16(filter[y0_q4]));

+  const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[y0_q4]), 128);

+  const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));

+  const uint8x8_t range_limit = vdup_n_u8(128);

+  const uint8x16x3_t merge_block_tbl = vld1q_u8_x3(dot_prod_merge_block_tbl);

+  uint8x8_t t0, t1, t2, t3, t4, t5, t6;

+  int8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;

+  int8x16x2_t samples_LUT;

+  assert(!((intptr_t)dst & 3));

+  assert(!(dst_stride & 3));

+  assert(y_step_q4 == 16);

+  (void)x0_q4;

+  (void)x_step_q4;

+  (void)y_step_q4;

+  src -= 3 * src_stride;

+  if (w == 4) {

+    const uint8x16_t tran_concat_tbl = vld1q_u8(dot_prod_tran_concat_tbl);

+    int8x16_t s0123, s1234, s2345, s3456, s4567, s5678, s6789, s78910;

+    int32x4_t d0, d1, d2, d3;

+    uint8x8_t d01, d23;

+    load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);

+    src += 4 * src_stride;

+    t4 = vld1_u8(src);

+    src += src_stride;

+    t5 = vld1_u8(src);

+    src += src_stride;

+    t6 = vld1_u8(src);

+    src += src_stride;

+    /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */

+    s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));

+    s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));

+    s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));

+    s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));

+    s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));

+    s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));

+    s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));

+    s7 = vdup_n_s8(0);

+    s8 = vdup_n_s8(0);

+    s9 = vdup_n_s8(0);

+    /* This operation combines a conventional transpose and the sample permute

+     * (see horizontal case) required before computing the dot product.

+     */

+    transpose_concat_4x4(&s0, &s1, &s2, &s3, &s0123, tran_concat_tbl);

+    transpose_concat_4x4(&s1, &s2, &s3, &s4, &s1234, tran_concat_tbl);

+    transpose_concat_4x4(&s2, &s3, &s4, &s5, &s2345, tran_concat_tbl);

+    transpose_concat_4x4(&s3, &s4, &s5, &s6, &s3456, tran_concat_tbl);

+    transpose_concat_4x4(&s4, &s5, &s6, &s7, &s4567, tran_concat_tbl);

+    transpose_concat_4x4(&s5, &s6, &s7, &s8, &s5678, tran_concat_tbl);

+    transpose_concat_4x4(&s6, &s7, &s8, &s9, &s6789, tran_concat_tbl);

+    do {

+      uint8x8_t t7, t8, t9, t10;

+      load_u8_8x4(src, src_stride, &t7, &t8, &t9, &t10);

+      s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));

+      s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));

+      s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));

+      s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));

+      transpose_concat_4x4(&s7, &s8, &s9, &s10, &s78910, tran_concat_tbl);

+      /* Merge new data into block from previous iteration. */

+      samples_LUT.val[0] = s3456;

+      samples_LUT.val[1] = s78910;

+      s4567 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);

+      s5678 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);

+      s6789 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);

+      d0 = convolve8_4_dot_partial(s0123, s4567, correction, filters);

+      d1 = convolve8_4_dot_partial(s1234, s5678, correction, filters);

+      d2 = convolve8_4_dot_partial(s2345, s6789, correction, filters);

+      d3 = convolve8_4_dot_partial(s3456, s78910, correction, filters);

+      d01 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d0), vqmovn_s32(d1)), 7);

+      d23 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d2), vqmovn_s32(d3)), 7);

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);

+      dst += dst_stride;

+      /* Prepare block for next iteration - re-using as much as possible. */

+      /* Shuffle everything up four rows. */

+      s0123 = s4567;

+      s1234 = s5678;

+      s2345 = s6789;

+      s3456 = s78910;

+      src += 4 * src_stride;

+      h -= 4;

+    } while (h > 0);

+  } else {

+    const uint8x16x2_t tran_concat_tbl = vld1q_u8_x2(dot_prod_tran_concat_tbl);

+    int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,

+        s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo,

+        s6789_hi, s78910_lo, s78910_hi;

+    uint8x8_t d0, d1, d2, d3;

+    const uint8_t *s;

+    uint8_t *d;

+    int height;

+    do {

+      height = h;

+      s = src;

+      d = dst;

+      load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);

+      s += 4 * src_stride;

+      t4 = vld1_u8(s);

+      s += src_stride;

+      t5 = vld1_u8(s);

+      s += src_stride;

+      t6 = vld1_u8(s);

+      s += src_stride;

+      /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */

+      s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));

+      s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));

+      s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));

+      s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));

+      s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));

+      s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));

+      s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));

+      s7 = vdup_n_s8(0);

+      s8 = vdup_n_s8(0);

+      s9 = vdup_n_s8(0);

+      /* This operation combines a conventional transpose and the sample permute

+       * (see horizontal case) required before computing the dot product.

+       */

+      transpose_concat_8x4(&s0, &s1, &s2, &s3, &s0123_lo, &s0123_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s1, &s2, &s3, &s4, &s1234_lo, &s1234_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s2, &s3, &s4, &s5, &s2345_lo, &s2345_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s3, &s4, &s5, &s6, &s3456_lo, &s3456_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s4, &s5, &s6, &s7, &s4567_lo, &s4567_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s5, &s6, &s7, &s8, &s5678_lo, &s5678_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s6, &s7, &s8, &s9, &s6789_lo, &s6789_hi,

+                           tran_concat_tbl);

+      do {

+        uint8x8_t t7, t8, t9, t10;

+        load_u8_8x4(s, src_stride, &t7, &t8, &t9, &t10);

+        s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));

+        s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));

+        s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));

+        s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));

+        transpose_concat_8x4(&s7, &s8, &s9, &s10, &s78910_lo, &s78910_hi,

+                             tran_concat_tbl);

+        /* Merge new data into block from previous iteration. */

+        samples_LUT.val[0] = s3456_lo;

+        samples_LUT.val[1] = s78910_lo;

+        s4567_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);

+        s5678_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);

+        s6789_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);

+        samples_LUT.val[0] = s3456_hi;

+        samples_LUT.val[1] = s78910_hi;

+        s4567_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);

+        s5678_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);

+        s6789_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);

+        d0 = convolve8_8_dot_partial(s0123_lo, s4567_lo, s0123_hi, s4567_hi,

+                                     correction, filters);

+        d1 = convolve8_8_dot_partial(s1234_lo, s5678_lo, s1234_hi, s5678_hi,

+                                     correction, filters);

+        d2 = convolve8_8_dot_partial(s2345_lo, s6789_lo, s2345_hi, s6789_hi,

+                                     correction, filters);

+        d3 = convolve8_8_dot_partial(s3456_lo, s78910_lo, s3456_hi, s78910_hi,

+                                     correction, filters);

+        vst1_u8(d + 0 * dst_stride, d0);

+        vst1_u8(d + 1 * dst_stride, d1);

+        vst1_u8(d + 2 * dst_stride, d2);

+        vst1_u8(d + 3 * dst_stride, d3);

+        /* Prepare block for next iteration - re-using as much as possible. */

+        /* Shuffle everything up four rows. */

+        s0123_lo = s4567_lo;

+        s0123_hi = s4567_hi;

+        s1234_lo = s5678_lo;

+        s1234_hi = s5678_hi;

+        s2345_lo = s6789_lo;

+        s2345_hi = s6789_hi;

+        s3456_lo = s78910_lo;

+        s3456_hi = s78910_hi;

+        s += 4 * src_stride;

+        d += 4 * dst_stride;

+        height -= 4;

+      } while (height > 0);

+      src += 8;

+      dst += 8;

+      w -= 8;

+    } while (w > 0);

+  }

+}

+void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,

+                                 uint8_t *dst, ptrdiff_t dst_stride,

+                                 const InterpKernel *filter, int x0_q4,

+                                 int x_step_q4, int y0_q4, int y_step_q4, int w,

+                                 int h) {

+  const int8x8_t filters = vmovn_s16(vld1q_s16(filter[y0_q4]));

+  const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[y0_q4]), 128);

+  const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));

+  const uint8x8_t range_limit = vdup_n_u8(128);

+  const uint8x16x3_t merge_block_tbl = vld1q_u8_x3(dot_prod_merge_block_tbl);

+  uint8x8_t t0, t1, t2, t3, t4, t5, t6;

+  int8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;

+  int8x16x2_t samples_LUT;

+  assert(!((intptr_t)dst & 3));

+  assert(!(dst_stride & 3));

+  assert(y_step_q4 == 16);

+  (void)x0_q4;

+  (void)x_step_q4;

+  (void)y_step_q4;

+  src -= 3 * src_stride;

+  if (w == 4) {

+    const uint8x16_t tran_concat_tbl = vld1q_u8(dot_prod_tran_concat_tbl);

+    int8x16_t s0123, s1234, s2345, s3456, s4567, s5678, s6789, s78910;

+    int32x4_t d0, d1, d2, d3;

+    uint8x8_t d01, d23, dd01, dd23;

+    load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);

+    src += 4 * src_stride;

+    t4 = vld1_u8(src);

+    src += src_stride;

+    t5 = vld1_u8(src);

+    src += src_stride;

+    t6 = vld1_u8(src);

+    src += src_stride;

+    /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */

+    s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));

+    s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));

+    s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));

+    s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));

+    s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));

+    s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));

+    s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));

+    s7 = vdup_n_s8(0);

+    s8 = vdup_n_s8(0);

+    s9 = vdup_n_s8(0);

+    /* This operation combines a conventional transpose and the sample permute

+     * (see horizontal case) required before computing the dot product.

+     */

+    transpose_concat_4x4(&s0, &s1, &s2, &s3, &s0123, tran_concat_tbl);

+    transpose_concat_4x4(&s1, &s2, &s3, &s4, &s1234, tran_concat_tbl);

+    transpose_concat_4x4(&s2, &s3, &s4, &s5, &s2345, tran_concat_tbl);

+    transpose_concat_4x4(&s3, &s4, &s5, &s6, &s3456, tran_concat_tbl);

+    transpose_concat_4x4(&s4, &s5, &s6, &s7, &s4567, tran_concat_tbl);

+    transpose_concat_4x4(&s5, &s6, &s7, &s8, &s5678, tran_concat_tbl);

+    transpose_concat_4x4(&s6, &s7, &s8, &s9, &s6789, tran_concat_tbl);

+    do {

+      uint8x8_t t7, t8, t9, t10;

+      load_u8_8x4(src, src_stride, &t7, &t8, &t9, &t10);

+      s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));

+      s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));

+      s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));

+      s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));

+      transpose_concat_4x4(&s7, &s8, &s9, &s10, &s78910, tran_concat_tbl);

+      /* Merge new data into block from previous iteration. */

+      samples_LUT.val[0] = s3456;

+      samples_LUT.val[1] = s78910;

+      s4567 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);

+      s5678 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);

+      s6789 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);

+      d0 = convolve8_4_dot_partial(s0123, s4567, correction, filters);

+      d1 = convolve8_4_dot_partial(s1234, s5678, correction, filters);

+      d2 = convolve8_4_dot_partial(s2345, s6789, correction, filters);

+      d3 = convolve8_4_dot_partial(s3456, s78910, correction, filters);

+      d01 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d0), vqmovn_s32(d1)), 7);

+      d23 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d2), vqmovn_s32(d3)), 7);

+      dd01 = load_u8(dst + 0 * dst_stride, dst_stride);

+      dd23 = load_u8(dst + 2 * dst_stride, dst_stride);

+      d01 = vrhadd_u8(d01, dd01);

+      d23 = vrhadd_u8(d23, dd23);

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);

+      dst += dst_stride;

+      vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);

+      dst += dst_stride;

+      /* Prepare block for next iteration - re-using as much as possible. */

+      /* Shuffle everything up four rows. */

+      s0123 = s4567;

+      s1234 = s5678;

+      s2345 = s6789;

+      s3456 = s78910;

+      src += 4 * src_stride;

+      h -= 4;

+    } while (h > 0);

+  } else {

+    const uint8x16x2_t tran_concat_tbl = vld1q_u8_x2(dot_prod_tran_concat_tbl);

+    int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,

+        s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo,

+        s6789_hi, s78910_lo, s78910_hi;

+    uint8x8_t d0, d1, d2, d3, dd0, dd1, dd2, dd3;

+    const uint8_t *s;

+    uint8_t *d;

+    int height;

+    do {

+      height = h;

+      s = src;

+      d = dst;

+      load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);

+      s += 4 * src_stride;

+      t4 = vld1_u8(s);

+      s += src_stride;

+      t5 = vld1_u8(s);

+      s += src_stride;

+      t6 = vld1_u8(s);

+      s += src_stride;

+      /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */

+      s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));

+      s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));

+      s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));

+      s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));

+      s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));

+      s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));

+      s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));

+      s7 = vdup_n_s8(0);

+      s8 = vdup_n_s8(0);

+      s9 = vdup_n_s8(0);

+      /* This operation combines a conventional transpose and the sample permute

+       * (see horizontal case) required before computing the dot product.

+       */

+      transpose_concat_8x4(&s0, &s1, &s2, &s3, &s0123_lo, &s0123_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s1, &s2, &s3, &s4, &s1234_lo, &s1234_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s2, &s3, &s4, &s5, &s2345_lo, &s2345_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s3, &s4, &s5, &s6, &s3456_lo, &s3456_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s4, &s5, &s6, &s7, &s4567_lo, &s4567_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s5, &s6, &s7, &s8, &s5678_lo, &s5678_hi,

+                           tran_concat_tbl);

+      transpose_concat_8x4(&s6, &s7, &s8, &s9, &s6789_lo, &s6789_hi,

+                           tran_concat_tbl);

+      do {

+        uint8x8_t t7, t8, t9, t10;

+        load_u8_8x4(s, src_stride, &t7, &t8, &t9, &t10);

+        s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));

+        s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));

+        s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));

+        s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));

+        transpose_concat_8x4(&s7, &s8, &s9, &s10, &s78910_lo, &s78910_hi,

+                             tran_concat_tbl);

+        /* Merge new data into block from previous iteration. */

+        samples_LUT.val[0] = s3456_lo;

+        samples_LUT.val[1] = s78910_lo;

+        s4567_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);

+        s5678_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);

+        s6789_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);

+        samples_LUT.val[0] = s3456_hi;

+        samples_LUT.val[1] = s78910_hi;

+        s4567_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);

+        s5678_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);

+        s6789_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);

+        d0 = convolve8_8_dot_partial(s0123_lo, s4567_lo, s0123_hi, s4567_hi,

+                                     correction, filters);

+        d1 = convolve8_8_dot_partial(s1234_lo, s5678_lo, s1234_hi, s5678_hi,

+                                     correction, filters);

+        d2 = convolve8_8_dot_partial(s2345_lo, s6789_lo, s2345_hi, s6789_hi,

+                                     correction, filters);

+        d3 = convolve8_8_dot_partial(s3456_lo, s78910_lo, s3456_hi, s78910_hi,

+                                     correction, filters);

+        dd0 = vld1_u8(d + 0 * dst_stride);

+        dd1 = vld1_u8(d + 1 * dst_stride);

+        dd2 = vld1_u8(d + 2 * dst_stride);

+        dd3 = vld1_u8(d + 3 * dst_stride);

+        d0 = vrhadd_u8(d0, dd0);

+        d1 = vrhadd_u8(d1, dd1);

+        d2 = vrhadd_u8(d2, dd2);

+        d3 = vrhadd_u8(d3, dd3);

+        vst1_u8(d + 0 * dst_stride, d0);

+        vst1_u8(d + 1 * dst_stride, d1);

+        vst1_u8(d + 2 * dst_stride, d2);

+        vst1_u8(d + 3 * dst_stride, d3);

+        /* Prepare block for next iteration - re-using as much as possible. */

+        /* Shuffle everything up four rows. */

+        s0123_lo = s4567_lo;

+        s0123_hi = s4567_hi;

+        s1234_lo = s5678_lo;

+        s1234_hi = s5678_hi;

+        s2345_lo = s6789_lo;

+        s2345_hi = s6789_hi;

+        s3456_lo = s78910_lo;

+        s3456_hi = s78910_hi;

+        s += 4 * src_stride;

+        d += 4 * dst_stride;

+        height -= 4;

+      } while (height > 0);

+      src += 8;

+      dst += 8;

+      w -= 8;

+    } while (w > 0);

+  }

+}

+#else

 static INLINE void store_u8_8x8(uint8_t *s, const ptrdiff_t p,

                                 const uint8x8_t s0, const uint8x8_t s1,

                                 const uint8x8_t s2, const uint8x8_t s3,

@@ -145,7 +881,7 @@

       src += 4;

       dst += 4;

       w -= 4;

-    } while (w > 0);

+    } while (w != 0);

   } else {

     const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);

     const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);

@@ -296,7 +1032,7 @@

           s += 8;

           d += 8;

           width -= 8;

-        } while (width > 0);

+        } while (width != 0);

         src += 8 * src_stride;

         dst += 8 * dst_stride;

         h -= 8;

@@ -402,7 +1138,7 @@

       src += 4;

       dst += 4;

       w -= 4;

-    } while (w > 0);

+    } while (w != 0);

   } else {

     const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);

     const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);

@@ -586,7 +1322,7 @@

           s += 8;

           d += 8;

           width -= 8;

-        } while (width > 0);

+        } while (width != 0);

         src += 8 * src_stride;

         dst += 8 * dst_stride;

         h -= 8;

@@ -679,7 +1415,7 @@

       s5 = s9;

       s6 = s10;

       h -= 4;

-    } while (h > 0);

+    } while (h != 0);

   } else {

     const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);

     const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);

@@ -759,11 +1495,11 @@

         s5 = s9;

         s6 = s10;

         height -= 4;

-      } while (height > 0);

+      } while (height != 0);

       src += 8;

       dst += 8;

       w -= 8;

-    } while (w > 0);

+    } while (w != 0);

@@ -860,7 +1596,7 @@

       s5 = s9;

       s6 = s10;

       h -= 4;

-    } while (h > 0);

+    } while (h != 0);

   } else {

     const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);

     const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);

@@ -950,10 +1686,12 @@

         s5 = s9;

         s6 = s10;

         height -= 4;

-      } while (height > 0);

+      } while (height != 0);

       src += 8;

       dst += 8;

       w -= 8;

-    } while (w > 0);

+    } while (w != 0);

+#endif

--- a/vpx_dsp/arm/vpx_convolve8_neon.h

+++ b/vpx_dsp/arm/vpx_convolve8_neon.h

@@ -72,6 +72,107 @@

   *s7 = vld1q_u8(s);

+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \

+    (__ARM_FEATURE_DOTPROD == 1)

+static INLINE int32x4_t convolve8_4_dot_partial(const int8x16_t samples_lo,

+                                                const int8x16_t samples_hi,

+                                                const int32x4_t correction,

+                                                const int8x8_t filters) {

+  /* Sample range-clamping and permutation are performed by the caller. */

+  int32x4_t sum;

+  /* Accumulate dot product into 'correction' to account for range clamp. */

+  sum = vdotq_lane_s32(correction, samples_lo, filters, 0);

+  sum = vdotq_lane_s32(sum, samples_hi, filters, 1);

+  /* Narrowing and packing is performed by the caller. */

+  return sum;

+}

+static INLINE int32x4_t convolve8_4_dot(uint8x16_t samples,

+                                        const int8x8_t filters,

+                                        const int32x4_t correction,

+                                        const uint8x16_t range_limit,

+                                        const uint8x16x2_t permute_tbl) {

+  int8x16_t clamped_samples, permuted_samples[2];

+  int32x4_t sum;

+  /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */

+  clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));

+  /* Permute samples ready for dot product. */

+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */

+  permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);

+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */

+  permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);

+  /* Accumulate dot product into 'correction' to account for range clamp. */

+  sum = vdotq_lane_s32(correction, permuted_samples[0], filters, 0);

+  sum = vdotq_lane_s32(sum, permuted_samples[1], filters, 1);

+  /* Narrowing and packing is performed by the caller. */

+  return sum;

+}

+static INLINE uint8x8_t convolve8_8_dot_partial(const int8x16_t samples0_lo,

+                                                const int8x16_t samples0_hi,

+                                                const int8x16_t samples1_lo,

+                                                const int8x16_t samples1_hi,

+                                                const int32x4_t correction,

+                                                const int8x8_t filters) {

+  /* Sample range-clamping and permutation are performed by the caller. */

+  int32x4_t sum0, sum1;

+  int16x8_t sum;

+  /* Accumulate dot product into 'correction' to account for range clamp. */

+  /* First 4 output values. */

+  sum0 = vdotq_lane_s32(correction, samples0_lo, filters, 0);

+  sum0 = vdotq_lane_s32(sum0, samples0_hi, filters, 1);

+  /* Second 4 output values. */

+  sum1 = vdotq_lane_s32(correction, samples1_lo, filters, 0);

+  sum1 = vdotq_lane_s32(sum1, samples1_hi, filters, 1);

+  /* Narrow and re-pack. */

+  sum = vcombine_s16(vqmovn_s32(sum0), vqmovn_s32(sum1));

+  return vqrshrun_n_s16(sum, 7);

+}

+static INLINE uint8x8_t convolve8_8_dot(uint8x16_t samples,

+                                        const int8x8_t filters,

+                                        const int32x4_t correction,

+                                        const uint8x16_t range_limit,

+                                        const uint8x16x3_t permute_tbl) {

+  int8x16_t clamped_samples, permuted_samples[3];

+  int32x4_t sum0, sum1;

+  int16x8_t sum;

+  /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */

+  clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));

+  /* Permute samples ready for dot product. */

+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */

+  permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);

+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */

+  permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);

+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */

+  permuted_samples[2] = vqtbl1q_s8(clamped_samples, permute_tbl.val[2]);

+  /* Accumulate dot product into 'correction' to account for range clamp. */

+  /* First 4 output values. */

+  sum0 = vdotq_lane_s32(correction, permuted_samples[0], filters, 0);

+  sum0 = vdotq_lane_s32(sum0, permuted_samples[1], filters, 1);

+  /* Second 4 output values. */

+  sum1 = vdotq_lane_s32(correction, permuted_samples[1], filters, 0);

+  sum1 = vdotq_lane_s32(sum1, permuted_samples[2], filters, 1);

+  /* Narrow and re-pack. */

+  sum = vcombine_s16(vqmovn_s32(sum0), vqmovn_s32(sum1));

+  return vqrshrun_n_s16(sum, 7);

+}

+#endif

 static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1,

                                     const int16x4_t s2, const int16x4_t s3,

                                     const int16x4_t s4, const int16x4_t s5,

--- a/vpx_dsp/arm/vpx_convolve_avg_neon.c

+++ b/vpx_dsp/arm/vpx_convolve_avg_neon.c

@@ -43,7 +43,7 @@

       vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dd0), 1);

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else if (w == 8) {  // avg8

     uint8x8_t s0, s1, d0, d1;

     uint8x16_t s01, d01;

@@ -64,7 +64,7 @@

       vst1_u8(dst, vget_high_u8(d01));

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else if (w < 32) {  // avg16

     uint8x16_t s0, s1, d0, d1;

     do {

@@ -83,7 +83,7 @@

       vst1q_u8(dst, d1);

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else if (w == 32) {  // avg32

     uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3;

     do {

@@ -110,7 +110,7 @@

       vst1q_u8(dst + 16, d3);

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else {  // avg64

     uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3;

     do {

--- a/vpx_dsp/arm/vpx_convolve_copy_neon.c

+++ b/vpx_dsp/arm/vpx_convolve_copy_neon.c

@@ -33,7 +33,7 @@

       src += src_stride;

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else if (w == 8) {  // copy8

     uint8x8_t s0, s1;

     do {

@@ -47,7 +47,7 @@

       vst1_u8(dst, s1);

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else if (w < 32) {  // copy16

     uint8x16_t s0, s1;

     do {

@@ -61,7 +61,7 @@

       vst1q_u8(dst, s1);

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else if (w == 32) {  // copy32

     uint8x16_t s0, s1, s2, s3;

     do {

@@ -79,7 +79,7 @@

       vst1q_u8(dst + 16, s3);

       dst += dst_stride;

       h -= 2;

-    } while (h > 0);

+    } while (h != 0);

   } else {  // copy64

     uint8x16_t s0, s1, s2, s3;

     do {

--- a/vpx_ports/x86.h

+++ b/vpx_ports/x86.h

@@ -223,6 +223,8 @@

+  (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.

   return flags & mask;

@@ -240,7 +242,7 @@

 // x86_readtsc directly, but prevent the CPU's out-of-order execution from

 // affecting the measurement (by having earlier/later instructions be evaluated

 // in the time interval). See the white paper, "How to Benchmark Code

-// Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures" by

+// Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by

 // Gabriele Paoloni for more information.

//

 // If you are timing a large function (CPU time > a couple of seconds), use

@@ -306,7 +308,13 @@

 static INLINE unsigned int x86_tsc_start(void) {

   unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;

+  // This call should not be removed. See function notes above.

   cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);

+  // Avoid compiler warnings on unused-but-set variables.

+  (void)reg_eax;

+  (void)reg_ebx;

+  (void)reg_ecx;

+  (void)reg_edx;

   return x86_readtsc();

@@ -313,7 +321,13 @@

 static INLINE unsigned int x86_tsc_end(void) {

   uint32_t v = x86_readtscp();

   unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;

+  // This call should not be removed. See function notes above.

   cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);

+  // Avoid compiler warnings on unused-but-set variables.

+  (void)reg_eax;

+  (void)reg_ebx;

+  (void)reg_ecx;

+  (void)reg_edx;

   return v;

--- a/vpx_scale/generic/yv12config.c

+++ b/vpx_scale/generic/yv12config.c

@@ -64,6 +64,10 @@

     if (!ybf->buffer_alloc) {

       ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);

+      if (!ybf->buffer_alloc) {

+        ybf->buffer_alloc_sz = 0;

+        return -1;

+      }

 #if defined(__has_feature)

 #if __has_feature(memory_sanitizer)

       // This memset is needed for fixing the issue of using uninitialized

@@ -75,7 +79,7 @@

       ybf->buffer_alloc_sz = frame_size;

-    if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) return -1;

+    if (ybf->buffer_alloc_sz < frame_size) return -1;

     /* Only support allocating buffers that have a border that's a multiple

      * of 32. The border restriction is required to get 16-byte alignment of

--- a/vpxenc.c

+++ b/vpxenc.c

@@ -114,10 +114,6 @@

     ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");

 static const arg_def_t fpf_name =

     ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");

-#if CONFIG_FP_MB_STATS

-static const arg_def_t fpmbf_name =

-    ARG_DEF(NULL, "fpmbf", 1, "First pass block statistics file name");

-#endif

 static const arg_def_t limit =

     ARG_DEF(NULL, "limit", 1, "Stop encoding after n input frames");

 static const arg_def_t skip =

@@ -287,6 +283,64 @@

   &buf_sz,           &buf_initial_sz,     &buf_optimal_sz, NULL

};

+#if CONFIG_VP9_ENCODER

+static const arg_def_t use_vizier_rc_params =

+    ARG_DEF(NULL, "use-vizier-rc-params", 1, "Use vizier rc params");

+static const arg_def_t active_wq_factor =

+    ARG_DEF(NULL, "active-wq-factor", 1, "Active worst quality factor");

+static const arg_def_t err_per_mb_factor =

+    ARG_DEF(NULL, "err-per-mb-factor", 1, "Error per macroblock factor");

+static const arg_def_t sr_default_decay_limit = ARG_DEF(

+    NULL, "sr-default-decay-limit", 1, "Second reference default decay limit");

+static const arg_def_t sr_diff_factor =

+    ARG_DEF(NULL, "sr-diff-factor", 1, "Second reference diff factor");

+static const arg_def_t kf_err_per_mb_factor = ARG_DEF(

+    NULL, "kf-err-per-mb-factor", 1, "Keyframe error per macroblock factor");

+static const arg_def_t kf_frame_min_boost_factor =

+    ARG_DEF(NULL, "kf-frame-min-boost-factor", 1, "Keyframe min boost");

+static const arg_def_t kf_frame_max_boost_first_factor =

+    ARG_DEF(NULL, "kf-frame-max-boost-first-factor", 1,

+            "Max keyframe boost adjustment factor for first frame");

+static const arg_def_t kf_frame_max_boost_subs_factor =

+    ARG_DEF(NULL, "kf-frame-max-boost-subs-factor", 1,

+            "Max boost adjustment factor for subsequent KFs");

+static const arg_def_t kf_max_total_boost_factor = ARG_DEF(

+    NULL, "kf-max-total-boost-factor", 1, "Keyframe max total boost factor");

+static const arg_def_t gf_max_total_boost_factor =

+    ARG_DEF(NULL, "gf-max-total-boost-factor", 1,

+            "Golden frame max total boost factor");

+static const arg_def_t gf_frame_max_boost_factor =

+    ARG_DEF(NULL, "gf-frame-max-boost-factor", 1,

+            "Golden frame max per frame boost factor");

+static const arg_def_t zm_factor =

+    ARG_DEF(NULL, "zm-factor", 1, "Zero motion power factor");

+static const arg_def_t rd_mult_inter_qp_fac =

+    ARG_DEF(NULL, "rd-mult-inter-qp-fac", 1,

+            "RD multiplier adjustment for inter frames");

+static const arg_def_t rd_mult_arf_qp_fac =

+    ARG_DEF(NULL, "rd-mult-arf-qp-fac", 1,

+            "RD multiplier adjustment for alt-ref frames");

+static const arg_def_t rd_mult_key_qp_fac = ARG_DEF(

+    NULL, "rd-mult-key-qp-fac", 1, "RD multiplier adjustment for key frames");

+static const arg_def_t *vizier_rc_args[] = { &use_vizier_rc_params,

+                                             &active_wq_factor,

+                                             &err_per_mb_factor,

+                                             &sr_default_decay_limit,

+                                             &sr_diff_factor,

+                                             &kf_err_per_mb_factor,

+                                             &kf_frame_min_boost_factor,

+                                             &kf_frame_max_boost_first_factor,

+                                             &kf_frame_max_boost_subs_factor,

+                                             &kf_max_total_boost_factor,

+                                             &gf_max_total_boost_factor,

+                                             &gf_frame_max_boost_factor,

+                                             &zm_factor,

+                                             &rd_mult_inter_qp_fac,

+                                             &rd_mult_arf_qp_fac,

+                                             &rd_mult_key_qp_fac,

+                                             NULL };

+#endif

 static const arg_def_t bias_pct =

     ARG_DEF(NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)");

 static const arg_def_t minsection_pct =

@@ -573,6 +627,8 @@

 #if CONFIG_VP9_ENCODER

   fprintf(fout, "\nVP9 Specific Options:\n");

   arg_show_usage(fout, vp9_args);

+  fprintf(fout, "\nVizier Rate Control Options:\n");

+  arg_show_usage(fout, vizier_rc_args);

 #endif

   fprintf(fout,

           "\nStream timebase (--timebase):\n"

@@ -614,9 +670,6 @@

   struct vpx_codec_enc_cfg cfg;

   const char *out_fn;

   const char *stats_fn;

-#if CONFIG_FP_MB_STATS

-  const char *fpmb_stats_fn;

-#endif

   stereo_format_t stereo_fmt;

   int arg_ctrls[ARG_CTRL_CNT_MAX][2];

   int arg_ctrl_cnt;

@@ -644,9 +697,6 @@

   uint64_t cx_time;

   size_t nbytes;

   stats_io_t stats;

-#if CONFIG_FP_MB_STATS

-  stats_io_t fpmb_stats;

-#endif

   struct vpx_image *img;

   vpx_codec_ctx_t decoder;

   int mismatch_seen;

@@ -883,10 +933,6 @@

       config->out_fn = arg.val;

     } else if (arg_match(&arg, &fpf_name, argi)) {

       config->stats_fn = arg.val;

-#if CONFIG_FP_MB_STATS

-    } else if (arg_match(&arg, &fpmbf_name, argi)) {

-      config->fpmb_stats_fn = arg.val;

-#endif

     } else if (arg_match(&arg, &use_webm, argi)) {

 #if CONFIG_WEBM_IO

       config->write_webm = 1;

@@ -983,6 +1029,40 @@

       config->cfg.kf_max_dist = arg_parse_uint(&arg);

     } else if (arg_match(&arg, &kf_disabled, argi)) {

       config->cfg.kf_mode = VPX_KF_DISABLED;

+#if CONFIG_VP9_ENCODER

+    } else if (arg_match(&arg, &use_vizier_rc_params, argi)) {

+      config->cfg.use_vizier_rc_params = arg_parse_int(&arg);

+    } else if (arg_match(&arg, &active_wq_factor, argi)) {

+      config->cfg.active_wq_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &err_per_mb_factor, argi)) {

+      config->cfg.err_per_mb_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &sr_default_decay_limit, argi)) {

+      config->cfg.sr_default_decay_limit = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &sr_diff_factor, argi)) {

+      config->cfg.sr_diff_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &kf_err_per_mb_factor, argi)) {

+      config->cfg.kf_err_per_mb_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &kf_frame_min_boost_factor, argi)) {

+      config->cfg.kf_frame_min_boost_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &kf_frame_max_boost_first_factor, argi)) {

+      config->cfg.kf_frame_max_boost_first_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &kf_frame_max_boost_subs_factor, argi)) {

+      config->cfg.kf_frame_max_boost_subs_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &kf_max_total_boost_factor, argi)) {

+      config->cfg.kf_max_total_boost_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &gf_max_total_boost_factor, argi)) {

+      config->cfg.gf_max_total_boost_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &gf_frame_max_boost_factor, argi)) {

+      config->cfg.gf_frame_max_boost_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &zm_factor, argi)) {

+      config->cfg.zm_factor = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &rd_mult_inter_qp_fac, argi)) {

+      config->cfg.rd_mult_inter_qp_fac = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &rd_mult_arf_qp_fac, argi)) {

+      config->cfg.rd_mult_arf_qp_fac = arg_parse_rational(&arg);

+    } else if (arg_match(&arg, &rd_mult_key_qp_fac, argi)) {

+      config->cfg.rd_mult_key_qp_fac = arg_parse_rational(&arg);

+#endif

 #if CONFIG_VP9_HIGHBITDEPTH

     } else if (arg_match(&arg, &test16bitinternalarg, argi)) {

       if (strcmp(global->codec->name, "vp9") == 0) {

@@ -1075,17 +1155,6 @@

         fatal("Stream %d: duplicate stats file (from stream %d)",

               streami->index, stream->index);

-#if CONFIG_FP_MB_STATS

-    /* Check for two streams sharing a mb stats file. */

-    if (streami != stream) {

-      const char *a = stream->config.fpmb_stats_fn;

-      const char *b = streami->config.fpmb_stats_fn;

-      if (a && b && !strcmp(a, b))

-        fatal("Stream %d: duplicate mb stats file (from stream %d)",

-              streami->index, stream->index);

-    }

-#endif

@@ -1177,6 +1246,10 @@

   SHOW(kf_mode);

   SHOW(kf_min_dist);

   SHOW(kf_max_dist);

+  // Temporary use for debug

+  SHOW(use_vizier_rc_params);

+  SHOW(active_wq_factor.num);

+  SHOW(active_wq_factor.den);

 static void open_output_file(struct stream_state *stream,

@@ -1240,26 +1313,11 @@

       fatal("Failed to open statistics store");

-#if CONFIG_FP_MB_STATS

-  if (stream->config.fpmb_stats_fn) {

-    if (!stats_open_file(&stream->fpmb_stats, stream->config.fpmb_stats_fn,

-                         pass))

-      fatal("Failed to open mb statistics store");

-  } else {

-    if (!stats_open_mem(&stream->fpmb_stats, pass))

-      fatal("Failed to open mb statistics store");

-  }

-#endif

   stream->config.cfg.g_pass = global->passes == 2

                                   ? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS

                                   : VPX_RC_ONE_PASS;

   if (pass) {

     stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats);

-#if CONFIG_FP_MB_STATS

-    stream->config.cfg.rc_firstpass_mb_stats_in =

-        stats_get(&stream->fpmb_stats);

-#endif

   stream->cx_time = 0;

@@ -1471,13 +1529,6 @@

                     pkt->data.twopass_stats.sz);

         stream->nbytes += pkt->data.raw.sz;

         break;

-#if CONFIG_FP_MB_STATS

-      case VPX_CODEC_FPMB_STATS_PKT:

-        stats_write(&stream->fpmb_stats, pkt->data.firstpass_mb_stats.buf,

-                    pkt->data.firstpass_mb_stats.sz);

-        stream->nbytes += pkt->data.raw.sz;

-        break;

-#endif

       case VPX_CODEC_PSNR_PKT:

         if (global->show_psnr) {

@@ -1636,6 +1687,7 @@

   int res = 0;

   memset(&input, 0, sizeof(input));

+  memset(&raw, 0, sizeof(raw));

   exec_name = argv_[0];

   /* Setup default input stream settings */

@@ -1781,14 +1833,10 @@

       FOREACH_STREAM(show_stream_config(stream, &global, &input));

     if (pass == (global.pass ? global.pass - 1 : 0)) {

-      if (input.file_type == FILE_TYPE_Y4M)

-        /*The Y4M reader does its own allocation.

-          Just initialize this here to avoid problems if we never read any

-           frames.*/

-        memset(&raw, 0, sizeof(raw));

-      else

+      // The Y4M reader does its own allocation.

+      if (input.file_type != FILE_TYPE_Y4M) {

         vpx_img_alloc(&raw, input.fmt, input.width, input.height, 32);

+      }

       FOREACH_STREAM(stream->rate_hist = init_rate_histogram(

                          &stream->config.cfg, &global.framerate));

@@ -1973,10 +2021,6 @@

     FOREACH_STREAM(close_output_file(stream, global.codec->fourcc));

     FOREACH_STREAM(stats_close(&stream->stats, global.passes - 1));

-#if CONFIG_FP_MB_STATS

-    FOREACH_STREAM(stats_close(&stream->fpmb_stats, global.passes - 1));

-#endif

     if (global.pass) break;

--- a/y4minput.c

+++ b/y4minput.c

@@ -10,6 +10,7 @@

  *  Based on code from the OggTheora software codec source code,

  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.

*/

+#include <assert.h>

 #include <errno.h>

 #include <stdlib.h>

 #include <string.h>

@@ -52,15 +53,8 @@

 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {

-  int got_w;

-  int got_h;

-  int got_fps;

-  int got_interlace;

-  int got_par;

-  int got_chroma;

   char *p;

   char *q;

-  got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;

   for (p = _tags;; p = q) {

     /*Skip any leading spaces.*/

     while (*p == ' ') p++;

@@ -73,12 +67,10 @@

     switch (p[0]) {

       case 'W': {

         if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1) return -1;

-        got_w = 1;

         break;

       case 'H': {

         if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1) return -1;

-        got_h = 1;

         break;

       case 'F': {

@@ -85,12 +77,10 @@

         if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {

           return -1;

-        got_fps = 1;

         break;

       case 'I': {

         _y4m->interlace = p[1];

-        got_interlace = 1;

         break;

       case 'A': {

@@ -97,7 +87,6 @@

         if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {

           return -1;

-        got_par = 1;

         break;

       case 'C': {

@@ -104,21 +93,93 @@

         if (q - p > 16) return -1;

         memcpy(_y4m->chroma_type, p + 1, q - p - 1);

         _y4m->chroma_type[q - p - 1] = '\0';

-        got_chroma = 1;

         break;

         /*Ignore unknown tags.*/

-  if (!got_w || !got_h || !got_fps) return -1;

-  if (!got_interlace) _y4m->interlace = '?';

-  if (!got_par) _y4m->par_n = _y4m->par_d = 0;

-  /*Chroma-type is not specified in older files, e.g., those generated by

-     mplayer.*/

-  if (!got_chroma) strcpy(_y4m->chroma_type, "420");

   return 0;

+// Copy a single tag into the buffer, along with a null character.

+// Returns 0 if any file IO errors occur.

+static int copy_tag(char *buf, size_t buf_len, char *end_tag, FILE *file) {

+  size_t i;

+  assert(buf_len >= 1);

+  // Skip leading space characters.

+  do {

+    if (!file_read(buf, 1, file)) {

+      return 0;

+    }

+  } while (buf[0] == ' ');

+  // If we hit the newline, treat this as the "empty" tag.

+  if (buf[0] == '\n') {

+    buf[0] = '\0';

+    *end_tag = '\n';

+    return 1;

+  }

+  // Copy over characters until a space is hit, or the buffer is exhausted.

+  for (i = 1; i < buf_len; ++i) {

+    if (!file_read(buf + i, 1, file)) {

+      return 0;

+    }

+    if (buf[i] == ' ' || buf[i] == '\n') {

+      break;

+    }

+  }

+  if (i == buf_len) {

+    fprintf(stderr, "Error: Y4M header tags must be less than %lu characters\n",

+            (unsigned long)i);

+    return 0;

+  }

+  *end_tag = buf[i];

+  buf[i] = '\0';

+  return 1;

+}

+/* Returns 1 if tags were parsed successfully, 0 otherwise. */

+static int parse_tags(y4m_input *y4m_ctx, FILE *file) {

+  char tag[256];

+  char end; /* Character denoting the end of the tag, ' ' or '\n'. */

+  /* Set Y4M tags to defaults, updating them as processing occurs. Mandatory

+     fields are marked with -1 and will be checked after the tags are parsed. */

+  y4m_ctx->pic_w = -1;

+  y4m_ctx->pic_h = -1;

+  y4m_ctx->fps_n = -1; /* Also serves as marker for fps_d */

+  y4m_ctx->par_n = 0;

+  y4m_ctx->par_d = 0;

+  y4m_ctx->interlace = '?';

+  snprintf(y4m_ctx->chroma_type, sizeof(y4m_ctx->chroma_type), "420");

+  /* Find one tag at a time. */

+  do {

+    if (!copy_tag(tag, sizeof(tag), &end, file)) {

+      return 0;

+    }

+    /* y4m_parse_tags returns 0 on success. */

+    if (y4m_parse_tags(y4m_ctx, tag)) {

+      return 0;

+    }

+  } while (end != '\n');

+  /* Check the mandatory fields. */

+  if (y4m_ctx->pic_w == -1) {

+    fprintf(stderr, "Width field missing\n");

+    return 0;

+  }

+  if (y4m_ctx->pic_h == -1) {

+    fprintf(stderr, "Height field missing\n");

+    return 0;

+  }

+  if (y4m_ctx->fps_n == -1) {

+    fprintf(stderr, "FPS field missing\n");

+    return 0;

+  }

+  return 1;

+}

 /*All anti-aliasing filters in the following conversion functions are based on

    one of two window functions:

   The 6-tap Lanczos window (for down-sampling and shifts):

@@ -225,26 +286,6 @@

-/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/

-static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,

-                                         unsigned char *_aux) {

-  int c_w;

-  int c_h;

-  int c_sz;

-  int pli;

-  /*Skip past the luma data.*/

-  _dst += _y4m->pic_w * _y4m->pic_h;

-  /*Compute the size of each chroma plane.*/

-  c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;

-  c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;

-  c_sz = c_w * c_h;

-  for (pli = 1; pli < 3; pli++) {

-    y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);

-    _dst += c_sz;

-    _aux += c_sz;

-  }

-}

 /*This format is only used for interlaced content, but is included for

    completeness.

@@ -785,277 +826,271 @@

   (void)_aux;

-int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,

-                   int only_420) {

-  char buffer[80] = { 0 };

-  int ret;

-  int i;

-  /*Read until newline, or 80 cols, whichever happens first.*/

-  for (i = 0; i < 79; i++) {

-    if (_nskip > 0) {

-      buffer[i] = *_skip++;

-      _nskip--;

-    } else {

-      if (!file_read(buffer + i, 1, _fin)) return -1;

-    }

-    if (buffer[i] == '\n') break;

+static const char TAG[] = "YUV4MPEG2";

+int y4m_input_open(y4m_input *y4m_ctx, FILE *file, char *skip_buffer,

+                   int num_skip, int only_420) {

+  // File must start with |TAG|.

+  char tag_buffer[9];  // 9 == strlen(TAG)

+  // Read as much as possible from |skip_buffer|, which were characters

+  // that were previously read from the file to do input-type detection.

+  assert(num_skip >= 0 && num_skip <= 8);

+  if (num_skip > 0) {

+    memcpy(tag_buffer, skip_buffer, num_skip);

-  /*We skipped too much header data.*/

-  if (_nskip > 0) return -1;

-  if (i == 79) {

-    fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");

+  // Start reading from the file now that the |skip_buffer| is depleted.

+  if (!file_read(tag_buffer + num_skip, 9 - num_skip, file)) {

     return -1;

-  buffer[i] = '\0';

-  if (memcmp(buffer, "YUV4MPEG", 8)) {

-    fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");

+  if (memcmp(TAG, tag_buffer, 9) != 0) {

+    fprintf(stderr, "Error parsing header: must start with %s\n", TAG);

     return -1;

-  if (buffer[8] != '2') {

-    fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");

+  // Next character must be a space.

+  if (!file_read(tag_buffer, 1, file) || tag_buffer[0] != ' ') {

+    fprintf(stderr, "Error parsing header: space must follow %s\n", TAG);

+    return -1;

-  ret = y4m_parse_tags(_y4m, buffer + 5);

-  if (ret < 0) {

-    fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");

-    return ret;

+  if (!parse_tags(y4m_ctx, file)) {

+    fprintf(stderr, "Error parsing %s header.\n", TAG);

-  if (_y4m->interlace == '?') {

+  if (y4m_ctx->interlace == '?') {

     fprintf(stderr,

             "Warning: Input video interlacing format unknown; "

             "assuming progressive scan.\n");

-  } else if (_y4m->interlace != 'p') {

+  } else if (y4m_ctx->interlace != 'p') {

     fprintf(stderr,

             "Input video is interlaced; "

             "Only progressive scan handled.\n");

     return -1;

-  _y4m->vpx_fmt = VPX_IMG_FMT_I420;

-  _y4m->bps = 12;

-  _y4m->bit_depth = 8;

-  if (strcmp(_y4m->chroma_type, "420") == 0 ||

-      strcmp(_y4m->chroma_type, "420jpeg") == 0) {

-    _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =

-        _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz =

-        _y4m->pic_w * _y4m->pic_h +

-        2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);

+  y4m_ctx->vpx_fmt = VPX_IMG_FMT_I420;

+  y4m_ctx->bps = 12;

+  y4m_ctx->bit_depth = 8;

+  y4m_ctx->aux_buf = NULL;

+  y4m_ctx->dst_buf = NULL;

+  if (strcmp(y4m_ctx->chroma_type, "420") == 0 ||

+      strcmp(y4m_ctx->chroma_type, "420jpeg") == 0 ||

+      strcmp(y4m_ctx->chroma_type, "420mpeg2") == 0) {

+    y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v =

+        y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz =

+        y4m_ctx->pic_w * y4m_ctx->pic_h +

+        2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2);

     /* Natively supported: no conversion required. */

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

-  } else if (strcmp(_y4m->chroma_type, "420p10") == 0) {

-    _y4m->src_c_dec_h = 2;

-    _y4m->dst_c_dec_h = 2;

-    _y4m->src_c_dec_v = 2;

-    _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz =

-        2 * (_y4m->pic_w * _y4m->pic_h +

-             2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2));

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

+  } else if (strcmp(y4m_ctx->chroma_type, "420p10") == 0) {

+    y4m_ctx->src_c_dec_h = 2;

+    y4m_ctx->dst_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 2;

+    y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz =

+        2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +

+             2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2));

     /* Natively supported: no conversion required. */

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

-    _y4m->bit_depth = 10;

-    _y4m->bps = 15;

-    _y4m->vpx_fmt = VPX_IMG_FMT_I42016;

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

+    y4m_ctx->bit_depth = 10;

+    y4m_ctx->bps = 15;

+    y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42016;

     if (only_420) {

       fprintf(stderr, "Unsupported conversion from 420p10 to 420jpeg\n");

       return -1;

-  } else if (strcmp(_y4m->chroma_type, "420p12") == 0) {

-    _y4m->src_c_dec_h = 2;

-    _y4m->dst_c_dec_h = 2;

-    _y4m->src_c_dec_v = 2;

-    _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz =

-        2 * (_y4m->pic_w * _y4m->pic_h +

-             2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2));

+  } else if (strcmp(y4m_ctx->chroma_type, "420p12") == 0) {

+    y4m_ctx->src_c_dec_h = 2;

+    y4m_ctx->dst_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 2;

+    y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz =

+        2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +

+             2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2));

     /* Natively supported: no conversion required. */

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

-    _y4m->bit_depth = 12;

-    _y4m->bps = 18;

-    _y4m->vpx_fmt = VPX_IMG_FMT_I42016;

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

+    y4m_ctx->bit_depth = 12;

+    y4m_ctx->bps = 18;

+    y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42016;

     if (only_420) {

       fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n");

       return -1;

-  } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {

-    _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =

-        _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

-    /*Chroma filter required: read into the aux buf first.*/

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =

-        2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);

-    _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;

-  } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {

-    _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =

-        _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

+  } else if (strcmp(y4m_ctx->chroma_type, "420paldv") == 0) {

+    y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v =

+        y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;

     /*Chroma filter required: read into the aux buf first.

       We need to make two filter passes, so we need some extra space in the

        aux buffer.*/

-    _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);

-    _y4m->aux_buf_read_sz =

-        2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);

-    _y4m->convert = y4m_convert_42xpaldv_42xjpeg;

-  } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {

-    _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;

-    _y4m->src_c_dec_v = 1;

-    _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

+    y4m_ctx->aux_buf_sz =

+        3 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2);

+    y4m_ctx->aux_buf_read_sz =

+        2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2);

+    y4m_ctx->convert = y4m_convert_42xpaldv_42xjpeg;

+  } else if (strcmp(y4m_ctx->chroma_type, "422jpeg") == 0) {

+    y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 1;

+    y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;

     /*Chroma filter required: read into the aux buf first.*/

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =

-        2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;

-    _y4m->convert = y4m_convert_422jpeg_420jpeg;

-  } else if (strcmp(_y4m->chroma_type, "422") == 0) {

-    _y4m->src_c_dec_h = 2;

-    _y4m->src_c_dec_v = 1;

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz =

+        2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;

+    y4m_ctx->convert = y4m_convert_422jpeg_420jpeg;

+  } else if (strcmp(y4m_ctx->chroma_type, "422") == 0) {

+    y4m_ctx->src_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 1;

     if (only_420) {

-      _y4m->dst_c_dec_h = 2;

-      _y4m->dst_c_dec_v = 2;

-      _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

+      y4m_ctx->dst_c_dec_h = 2;

+      y4m_ctx->dst_c_dec_v = 2;

+      y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;

       /*Chroma filter required: read into the aux buf first.

         We need to make two filter passes, so we need some extra space in the

          aux buffer.*/

-      _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;

-      _y4m->aux_buf_sz =

-          _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;

-      _y4m->convert = y4m_convert_422_420jpeg;

+      y4m_ctx->aux_buf_read_sz =

+          2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;

+      y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz +

+                            ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;

+      y4m_ctx->convert = y4m_convert_422_420jpeg;

     } else {

-      _y4m->vpx_fmt = VPX_IMG_FMT_I422;

-      _y4m->bps = 16;

-      _y4m->dst_c_dec_h = _y4m->src_c_dec_h;

-      _y4m->dst_c_dec_v = _y4m->src_c_dec_v;

-      _y4m->dst_buf_read_sz =

-          _y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;

+      y4m_ctx->vpx_fmt = VPX_IMG_FMT_I422;

+      y4m_ctx->bps = 16;

+      y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;

+      y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;

+      y4m_ctx->dst_buf_read_sz =

+          y4m_ctx->pic_w * y4m_ctx->pic_h +

+          2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;

       /*Natively supported: no conversion required.*/

-      _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-      _y4m->convert = y4m_convert_null;

+      y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+      y4m_ctx->convert = y4m_convert_null;

-  } else if (strcmp(_y4m->chroma_type, "422p10") == 0) {

-    _y4m->src_c_dec_h = 2;

-    _y4m->src_c_dec_v = 1;

-    _y4m->vpx_fmt = VPX_IMG_FMT_I42216;

-    _y4m->bps = 20;

-    _y4m->bit_depth = 10;

-    _y4m->dst_c_dec_h = _y4m->src_c_dec_h;

-    _y4m->dst_c_dec_v = _y4m->src_c_dec_v;

-    _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +

-                                 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

+  } else if (strcmp(y4m_ctx->chroma_type, "422p10") == 0) {

+    y4m_ctx->src_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 1;

+    y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42216;

+    y4m_ctx->bps = 20;

+    y4m_ctx->bit_depth = 10;

+    y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;

+    y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;

+    y4m_ctx->dst_buf_read_sz =

+        2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +

+             2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h);

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

     if (only_420) {

       fprintf(stderr, "Unsupported conversion from 422p10 to 420jpeg\n");

       return -1;

-  } else if (strcmp(_y4m->chroma_type, "422p12") == 0) {

-    _y4m->src_c_dec_h = 2;

-    _y4m->src_c_dec_v = 1;

-    _y4m->vpx_fmt = VPX_IMG_FMT_I42216;

-    _y4m->bps = 24;

-    _y4m->bit_depth = 12;

-    _y4m->dst_c_dec_h = _y4m->src_c_dec_h;

-    _y4m->dst_c_dec_v = _y4m->src_c_dec_v;

-    _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +

-                                 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

+  } else if (strcmp(y4m_ctx->chroma_type, "422p12") == 0) {

+    y4m_ctx->src_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 1;

+    y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42216;

+    y4m_ctx->bps = 24;

+    y4m_ctx->bit_depth = 12;

+    y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;

+    y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;

+    y4m_ctx->dst_buf_read_sz =

+        2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +

+             2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h);

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

     if (only_420) {

       fprintf(stderr, "Unsupported conversion from 422p12 to 420jpeg\n");

       return -1;

-  } else if (strcmp(_y4m->chroma_type, "411") == 0) {

-    _y4m->src_c_dec_h = 4;

-    _y4m->dst_c_dec_h = 2;

-    _y4m->src_c_dec_v = 1;

-    _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

+  } else if (strcmp(y4m_ctx->chroma_type, "411") == 0) {

+    y4m_ctx->src_c_dec_h = 4;

+    y4m_ctx->dst_c_dec_h = 2;

+    y4m_ctx->src_c_dec_v = 1;

+    y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;

     /*Chroma filter required: read into the aux buf first.

       We need to make two filter passes, so we need some extra space in the

        aux buffer.*/

-    _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;

-    _y4m->aux_buf_sz =

-        _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;

-    _y4m->convert = y4m_convert_411_420jpeg;

+    y4m_ctx->aux_buf_read_sz = 2 * ((y4m_ctx->pic_w + 3) / 4) * y4m_ctx->pic_h;

+    y4m_ctx->aux_buf_sz =

+        y4m_ctx->aux_buf_read_sz + ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;

+    y4m_ctx->convert = y4m_convert_411_420jpeg;

     fprintf(stderr, "Unsupported conversion from yuv 411\n");

     return -1;

-  } else if (strcmp(_y4m->chroma_type, "444") == 0) {

-    _y4m->src_c_dec_h = 1;

-    _y4m->src_c_dec_v = 1;

+  } else if (strcmp(y4m_ctx->chroma_type, "444") == 0) {

+    y4m_ctx->src_c_dec_h = 1;

+    y4m_ctx->src_c_dec_v = 1;

     if (only_420) {

-      _y4m->dst_c_dec_h = 2;

-      _y4m->dst_c_dec_v = 2;

-      _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

+      y4m_ctx->dst_c_dec_h = 2;

+      y4m_ctx->dst_c_dec_v = 2;

+      y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;

       /*Chroma filter required: read into the aux buf first.

         We need to make two filter passes, so we need some extra space in the

          aux buffer.*/

-      _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;

-      _y4m->aux_buf_sz =

-          _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;

-      _y4m->convert = y4m_convert_444_420jpeg;

+      y4m_ctx->aux_buf_read_sz = 2 * y4m_ctx->pic_w * y4m_ctx->pic_h;

+      y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz +

+                            ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;

+      y4m_ctx->convert = y4m_convert_444_420jpeg;

     } else {

-      _y4m->vpx_fmt = VPX_IMG_FMT_I444;

-      _y4m->bps = 24;

-      _y4m->dst_c_dec_h = _y4m->src_c_dec_h;

-      _y4m->dst_c_dec_v = _y4m->src_c_dec_v;

-      _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;

+      y4m_ctx->vpx_fmt = VPX_IMG_FMT_I444;

+      y4m_ctx->bps = 24;

+      y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;

+      y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;

+      y4m_ctx->dst_buf_read_sz = 3 * y4m_ctx->pic_w * y4m_ctx->pic_h;

       /*Natively supported: no conversion required.*/

-      _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-      _y4m->convert = y4m_convert_null;

+      y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+      y4m_ctx->convert = y4m_convert_null;

-  } else if (strcmp(_y4m->chroma_type, "444p10") == 0) {

-    _y4m->src_c_dec_h = 1;

-    _y4m->src_c_dec_v = 1;

-    _y4m->vpx_fmt = VPX_IMG_FMT_I44416;

-    _y4m->bps = 30;

-    _y4m->bit_depth = 10;

-    _y4m->dst_c_dec_h = _y4m->src_c_dec_h;

-    _y4m->dst_c_dec_v = _y4m->src_c_dec_v;

-    _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

+  } else if (strcmp(y4m_ctx->chroma_type, "444p10") == 0) {

+    y4m_ctx->src_c_dec_h = 1;

+    y4m_ctx->src_c_dec_v = 1;

+    y4m_ctx->vpx_fmt = VPX_IMG_FMT_I44416;

+    y4m_ctx->bps = 30;

+    y4m_ctx->bit_depth = 10;

+    y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;

+    y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;

+    y4m_ctx->dst_buf_read_sz = 2 * 3 * y4m_ctx->pic_w * y4m_ctx->pic_h;

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

     if (only_420) {

       fprintf(stderr, "Unsupported conversion from 444p10 to 420jpeg\n");

       return -1;

-  } else if (strcmp(_y4m->chroma_type, "444p12") == 0) {

-    _y4m->src_c_dec_h = 1;

-    _y4m->src_c_dec_v = 1;

-    _y4m->vpx_fmt = VPX_IMG_FMT_I44416;

-    _y4m->bps = 36;

-    _y4m->bit_depth = 12;

-    _y4m->dst_c_dec_h = _y4m->src_c_dec_h;

-    _y4m->dst_c_dec_v = _y4m->src_c_dec_v;

-    _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_null;

+  } else if (strcmp(y4m_ctx->chroma_type, "444p12") == 0) {

+    y4m_ctx->src_c_dec_h = 1;

+    y4m_ctx->src_c_dec_v = 1;

+    y4m_ctx->vpx_fmt = VPX_IMG_FMT_I44416;

+    y4m_ctx->bps = 36;

+    y4m_ctx->bit_depth = 12;

+    y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;

+    y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;

+    y4m_ctx->dst_buf_read_sz = 2 * 3 * y4m_ctx->pic_w * y4m_ctx->pic_h;

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_null;

     if (only_420) {

       fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n");

       return -1;

-  } else if (strcmp(_y4m->chroma_type, "mono") == 0) {

-    _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;

-    _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;

-    _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;

+  } else if (strcmp(y4m_ctx->chroma_type, "mono") == 0) {

+    y4m_ctx->src_c_dec_h = y4m_ctx->src_c_dec_v = 0;

+    y4m_ctx->dst_c_dec_h = y4m_ctx->dst_c_dec_v = 2;

+    y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;

     /*No extra space required, but we need to clear the chroma planes.*/

-    _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;

-    _y4m->convert = y4m_convert_mono_420jpeg;

+    y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;

+    y4m_ctx->convert = y4m_convert_mono_420jpeg;

   } else {

-    fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);

+    fprintf(stderr, "Unknown chroma sampling type: %s\n", y4m_ctx->chroma_type);

     return -1;

   /*The size of the final frame buffers is always computed from the

      destination chroma decimation type.*/

-  _y4m->dst_buf_sz =

-      _y4m->pic_w * _y4m->pic_h +

-      2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *

-          ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);

-  if (_y4m->bit_depth == 8)

-    _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);

+  y4m_ctx->dst_buf_sz =

+      y4m_ctx->pic_w * y4m_ctx->pic_h +

+      2 * ((y4m_ctx->pic_w + y4m_ctx->dst_c_dec_h - 1) / y4m_ctx->dst_c_dec_h) *

+          ((y4m_ctx->pic_h + y4m_ctx->dst_c_dec_v - 1) / y4m_ctx->dst_c_dec_v);

+  if (y4m_ctx->bit_depth == 8)

+    y4m_ctx->dst_buf = (unsigned char *)malloc(y4m_ctx->dst_buf_sz);

   else

-    _y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);

+    y4m_ctx->dst_buf = (unsigned char *)malloc(2 * y4m_ctx->dst_buf_sz);

-  if (_y4m->aux_buf_sz > 0)

-    _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);

+  if (y4m_ctx->aux_buf_sz > 0)

+    y4m_ctx->aux_buf = (unsigned char *)malloc(y4m_ctx->aux_buf_sz);

   return 0;

--- a/y4minput.h

+++ b/y4minput.h

@@ -56,8 +56,16 @@

   unsigned int bit_depth;

};

-int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,

-                   int only_420);

+/**

+ * Open the input file, treating it as Y4M. |y4m_ctx| is filled in after

+ * reading it. The |skip_buffer| indicates bytes that were previously read

+ * from |file|, to do input-type detection; this buffer will be read before

+ * the |file| is read. It is of size |num_skip|, which *must* be 8 or less.

+ *

+ * Returns 0 on success, -1 on failure.

+ */

+int y4m_input_open(y4m_input *y4m_ctx, FILE *file, char *skip_buffer,

+                   int num_skip, int only_420);

 void y4m_input_close(y4m_input *_y4m);

 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);