shithub: libvpx

--- a/examples/vp9_spatial_svc_encoder.c

+++ b/examples/vp9_spatial_svc_encoder.c

@@ -503,10 +503,8 @@

   printf("Average, rms-variance, and percent-fluct: %f %f %f \n",

          rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),

          perc_fluctuation);

-  if (frame_cnt != tot_num_frames)

-    die("Error: Number of input frames not equal to output encoded frames != "

-        "%d tot_num_frames = %d\n",

-        frame_cnt, tot_num_frames);

+  printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt,

+         tot_num_frames);

 vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,

@@ -784,8 +782,10 @@

           if (cx_pkt->data.frame.sz > 0) {

 #if OUTPUT_RC_STATS

             uint64_t sizes[8];

+            uint64_t sizes_parsed[8];

             int count = 0;

             vp9_zero(sizes);

+            vp9_zero(sizes_parsed);

 #endif

             vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,

                                          cx_pkt->data.frame.sz,

@@ -795,18 +795,21 @@

             if (svc_ctx.output_rc_stat) {

               vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);

               parse_superframe_index(cx_pkt->data.frame.buf,

-                                     cx_pkt->data.frame.sz, sizes, &count);

+                                     cx_pkt->data.frame.sz, sizes_parsed,

+                                     &count);

               if (enc_cfg.ss_number_layers == 1)

                 sizes[0] = cx_pkt->data.frame.sz;

-              // Note computing input_layer_frames here won't account for frame

-              // drops in rate control stats.

-              // TODO(marpan): Fix this for non-bypass mode so we can get stats

-              // for dropped frames.

               if (svc_ctx.temporal_layering_mode !=

                   VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {

+                int num_layers_encoded = 0;

                 for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {

                   ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +

                                           layer_id.temporal_layer_id];

+                  sizes[sl] = 0;

+                  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {

+                    sizes[sl] = sizes_parsed[num_layers_encoded];

+                    num_layers_encoded++;

+                  }

               for (tl = layer_id.temporal_layer_id;

@@ -817,20 +820,22 @@

               for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {

-                for (tl = layer_id.temporal_layer_id;

-                     tl < enc_cfg.ts_number_layers; ++tl) {

-                  const int layer = sl * enc_cfg.ts_number_layers + tl;

-                  ++rc.layer_tot_enc_frames[layer];

-                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];

-                  // Keep count of rate control stats per layer, for non-key

-                  // frames.

-                  if (tl == (unsigned int)layer_id.temporal_layer_id &&

-                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {

-                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];

-                    rc.layer_avg_rate_mismatch[layer] +=

-                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /

-                        rc.layer_pfb[layer];

-                    ++rc.layer_enc_frames[layer];

+                if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {

+                  for (tl = layer_id.temporal_layer_id;

+                       tl < enc_cfg.ts_number_layers; ++tl) {

+                    const int layer = sl * enc_cfg.ts_number_layers + tl;

+                    ++rc.layer_tot_enc_frames[layer];

+                    rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];

+                    // Keep count of rate control stats per layer, for non-key

+                    // frames.

+                    if (tl == (unsigned int)layer_id.temporal_layer_id &&

+                        !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {

+                      rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];

+                      rc.layer_avg_rate_mismatch[layer] +=

+                          fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /

+                          rc.layer_pfb[layer];

+                      ++rc.layer_enc_frames[layer];

+                    }

@@ -840,7 +845,8 @@

               // Ignore first window segment, due to key frame.

               if (frame_cnt > (unsigned int)rc.window_size) {

                 for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {

-                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;

+                  if (cx_pkt->data.frame.spatial_layer_encoded[sl])

+                    sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;

                 if (frame_cnt % rc.window_size == 0) {

                   rc.window_count += 1;

--- a/test/datarate_test.cc

+++ b/test/datarate_test.cc

@@ -1368,6 +1368,7 @@

     top_bitrate_ = 0;

     superframe_count_ = -1;

     key_frame_spacing_ = 9999;

+    num_nonref_frames_ = 0;

   virtual void BeginPassHook(unsigned int /*pass*/) {}

@@ -1588,7 +1589,9 @@

   virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {

     uint32_t sizes[8] = { 0 };

+    uint32_t sizes_parsed[8] = { 0 };

     int count = 0;

+    int num_layers_encoded = 0;

     last_pts_ = pkt->data.frame.pts;

     const bool key_frame =

         (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;

@@ -1597,10 +1600,23 @@

       superframe_count_ = 0;

     parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),

-                           pkt->data.frame.sz, sizes, &count);

+                           pkt->data.frame.sz, sizes_parsed, &count);

     // Count may be less than number of spatial layers because of frame drops.

-    ASSERT_LE(count, number_spatial_layers_);

     for (int sl = 0; sl < number_spatial_layers_; ++sl) {

+      if (pkt->data.frame.spatial_layer_encoded[sl]) {

+        sizes[sl] = sizes_parsed[num_layers_encoded];

+        num_layers_encoded++;

+      }

+    }

+    ASSERT_EQ(count, num_layers_encoded);

+    // Keep track of number of non-reference frames, needed for mismatch check.

+    // Non-reference frames are top spatial and temporal layer frames,

+    // for TL > 0.

+    if (temporal_layer_id_ == number_temporal_layers_ - 1 &&

+        temporal_layer_id_ > 0 &&

+        pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1])

+      num_nonref_frames_++;

+    for (int sl = 0; sl < number_spatial_layers_; ++sl) {

       sizes[sl] = sizes[sl] << 3;

       // Update the total encoded bits per layer.

       // For temporal layers, update the cumulative encoded bits per layer.

@@ -1678,6 +1694,7 @@

   int top_bitrate_;

   int superframe_count_;

   int key_frame_spacing_;

+  unsigned int num_nonref_frames_;

};

 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1

@@ -1718,7 +1735,11 @@

   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

   CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                           number_temporal_layers_, file_datarate_, 0.78, 1.15);

-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());

+#if CONFIG_VP9_DECODER

+  // The non-reference frames are expected to be mismatched frames as the

+  // encoder will avoid loopfilter on these frames.

+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

+#endif

 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and

@@ -1761,14 +1782,11 @@

                           layer_target_avg_bandwidth_, bits_in_buffer_model_);

     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

     CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

-                            number_temporal_layers_, file_datarate_, 0.78,

-                            1.15);

+                            number_temporal_layers_, file_datarate_, 0.75, 1.2);

 #if CONFIG_VP9_DECODER

-    // Number of temporal layers > 1, so half of the frames in this SVC pattern

-    // will be non-reference frame and hence encoder will avoid loopfilter.

-    // Since frame dropper is off, we can expect 200 (half of the sequence)

-    // mismatched frames.

-    EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());

+    // The non-reference frames are expected to be mismatched frames as the

+    // encoder will avoid loopfilter on these frames.

+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

@@ -1820,12 +1838,9 @@

                               number_temporal_layers_, file_datarate_, 0.78,

                               1.15);

 #if CONFIG_VP9_DECODER

-      // Number of temporal layers > 1, so half of the frames in this SVC

-      // pattern

-      // will be non-reference frame and hence encoder will avoid loopfilter.

-      // Since frame dropper is off, we can expect 200 (half of the sequence)

-      // mismatched frames.

-      EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());

+      // The non-reference frames are expected to be mismatched frames as the

+      // encoder will avoid loopfilter on these frames.

+      EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

@@ -1874,6 +1889,11 @@

     CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                             number_temporal_layers_, file_datarate_, 0.78,

                             1.15);

+#if CONFIG_VP9_DECODER

+    // The non-reference frames are expected to be mismatched frames as the

+    // encoder will avoid loopfilter on these frames.

+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

+#endif

@@ -1906,21 +1926,21 @@

   ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);

   top_sl_width_ = 1280;

   top_sl_height_ = 720;

-  cfg_.rc_target_bitrate = 800;

-  ResetModel();

-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,

-                        cfg_.ts_number_layers, cfg_.temporal_layering_mode,

-                        layer_target_avg_bandwidth_, bits_in_buffer_model_);

-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-  CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

-                          number_temporal_layers_, file_datarate_, 0.78, 1.15);

+  for (int i = 200; i <= 800; i += 300) {

+    cfg_.rc_target_bitrate = i;

+    ResetModel();

+    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,

+                          cfg_.ts_number_layers, cfg_.temporal_layering_mode,

+                          layer_target_avg_bandwidth_, bits_in_buffer_model_);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

+                            number_temporal_layers_, file_datarate_, 0.75, 1.2);

 #if CONFIG_VP9_DECODER

-  // Number of temporal layers > 1, so half of the frames in this SVC pattern

-  // will be non-reference frame and hence encoder will avoid loopfilter.

-  // Since frame dropper is off, we can expect 30 (half of the sequence)

-  // mismatched frames.

-  EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());

+    // The non-reference frames are expected to be mismatched frames as the

+    // encoder will avoid loopfilter on these frames.

+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

+  }

 // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and

@@ -1964,11 +1984,9 @@

   CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                           number_temporal_layers_, file_datarate_, 0.78, 1.15);

 #if CONFIG_VP9_DECODER

-  // Number of temporal layers > 1, so half of the frames in this SVC pattern

-  // will be non-reference frame and hence encoder will avoid loopfilter.

-  // Since frame dropper is off, we can expect 200 (half of the sequence)

-  // mismatched frames.

-  EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());

+  // The non-reference frames are expected to be mismatched frames as the

+  // encoder will avoid loopfilter on these frames.

+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

@@ -2014,11 +2032,9 @@

   CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                           number_temporal_layers_, file_datarate_, 0.78, 1.15);

 #if CONFIG_VP9_DECODER

-  // Number of temporal layers > 1, so half of the frames in this SVC pattern

-  // will be non-reference frame and hence encoder will avoid loopfilter.

-  // Since frame dropper is off, we can expect 200 (half of the sequence)

-  // mismatched frames.

-  EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());

+  // The non-reference frames are expected to be mismatched frames as the

+  // encoder will avoid loopfilter on these frames.

+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

@@ -2066,11 +2082,9 @@

   CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                           number_temporal_layers_, file_datarate_, 0.78, 1.15);

 #if CONFIG_VP9_DECODER

-  // Number of temporal layers > 1, so half of the frames in this SVC pattern

-  // will be non-reference frame and hence encoder will avoid loopfilter.

-  // Since frame dropper is off, we can expect 200 (half of the sequence)

-  // mismatched frames.

-  EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());

+  // The non-reference frames are expected to be mismatched frames as the

+  // encoder will avoid loopfilter on these frames.

+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

@@ -2117,6 +2131,11 @@

   // for part of the sequence.

   CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,

                           number_temporal_layers_, file_datarate_, 0.78, 1.15);

+#if CONFIG_VP9_DECODER

+  // The non-reference frames are expected to be mismatched frames as the

+  // encoder will avoid loopfilter on these frames.

+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

+#endif

 // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3

@@ -2164,12 +2183,17 @@

     CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                             number_temporal_layers_, file_datarate_, 0.78,

                             1.15);

+#if CONFIG_VP9_DECODER

+    // The non-reference frames are expected to be mismatched frames as the

+    // encoder will avoid loopfilter on these frames.

+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

+#endif

 // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and

 // 3 temporal layers. Run HD clip with 4 threads.

-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {

+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4Threads) {

   cfg_.rc_buf_initial_sz = 500;

   cfg_.rc_buf_optimal_sz = 500;

   cfg_.rc_buf_sz = 1000;

@@ -2198,21 +2222,21 @@

   ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);

   top_sl_width_ = 1280;

   top_sl_height_ = 720;

-  cfg_.rc_target_bitrate = 800;

-  ResetModel();

-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,

-                        cfg_.ts_number_layers, cfg_.temporal_layering_mode,

-                        layer_target_avg_bandwidth_, bits_in_buffer_model_);

-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-  CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

-                          number_temporal_layers_, file_datarate_, 0.78, 1.15);

+  for (int i = 200; i <= 800; i += 300) {

+    cfg_.rc_target_bitrate = i;

+    ResetModel();

+    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,

+                          cfg_.ts_number_layers, cfg_.temporal_layering_mode,

+                          layer_target_avg_bandwidth_, bits_in_buffer_model_);

+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

+                            number_temporal_layers_, file_datarate_, 0.75, 1.2);

 #if CONFIG_VP9_DECODER

-  // Number of temporal layers > 1, so half of the frames in this SVC pattern

-  // will be non-reference frame and hence encoder will avoid loopfilter.

-  // Since frame dropper is off, we can expect 30 (half of the sequence)

-  // mismatched frames.

-  EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());

+    // The non-reference frames are expected to be mismatched frames as the

+    // encoder will avoid loopfilter on these frames.

+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

 #endif

+  }

 // Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial

@@ -2258,7 +2282,11 @@

   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

   CheckLayerRateTargeting(&cfg_, number_spatial_layers_,

                           number_temporal_layers_, file_datarate_, 0.78, 1.15);

-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());

+#if CONFIG_VP9_DECODER

+  // The non-reference frames are expected to be mismatched frames as the

+  // encoder will avoid loopfilter on these frames.

+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());

+#endif

 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES,

--- a/vp8/vp8_cx_iface.c

+++ b/vp8/vp8_cx_iface.c

@@ -917,7 +917,7 @@

         pkt.data.frame.flags = lib_flags << 16;

         pkt.data.frame.width[0] = cpi->common.Width;

         pkt.data.frame.height[0] = cpi->common.Height;

-        pkt.data.frame.last_spatial_layer_encoded = 0;

+        pkt.data.frame.spatial_layer_encoded[0] = 1;

         if (lib_flags & FRAMEFLAGS_KEY) {

           pkt.data.frame.flags |= VPX_FRAME_IS_KEY;

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -3751,6 +3751,24 @@

   suppress_active_map(cpi);

+  // For SVC on non-zero spatial layer: if the previous spatial layer

+  // was dropped then disable the prediciton from this (scaled) reference.

+  if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&

+      cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {

+    MV_REFERENCE_FRAME ref_frame;

+    static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,

+                                      VP9_ALT_FLAG };

+    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {

+      const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);

+      if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {

+        const struct scale_factors *const scale_fac =

+            &cm->frame_refs[ref_frame - 1].sf;

+        if (vp9_is_scaled(scale_fac))

+          cpi->ref_frame_flags &= (~flag_list[ref_frame]);

+      }

+    }

+  }

   // Variance adaptive and in frame q adjustment experiments are mutually

   // exclusive.

   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {

@@ -4504,6 +4522,9 @@

     vp9_rc_postencode_update_drop_frame(cpi);

     vp9_inc_frame_in_layer(cpi);

     cpi->ext_refresh_frame_flags_pending = 0;

+    cpi->last_frame_dropped = 1;

+    cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;

+    cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;

     return;

@@ -4591,28 +4612,31 @@

   // For 1 pass CBR, check if we are dropping this frame.

-  // For spatial layers, for now if we decide to drop current spatial

-  // layer then we will also drop all upper spatial layers.

-  // TODO(marpan): Allow for the case of dropping single layer only without

-  // dropping all upper layers.

+  // Never drop on key frame, of if base layer is key for svc.

   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&

-      cm->frame_type != KEY_FRAME) {

-    if (vp9_rc_drop_frame(cpi) ||

-        (is_one_pass_cbr_svc(cpi) &&

-         cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {

+      cm->frame_type != KEY_FRAME &&

+      (!cpi->use_svc ||

+       !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {

+    if (vp9_rc_drop_frame(cpi)) {

       vp9_rc_postencode_update_drop_frame(cpi);

       cpi->ext_refresh_frame_flags_pending = 0;

       cpi->last_frame_dropped = 1;

+      cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;

       if (cpi->use_svc) {

-        int i;

-        // If we are dropping this spatial layer, then we will drop all

-        // upper spatial layers.

-        for (i = cpi->svc.spatial_layer_id; i < cpi->svc.number_spatial_layers;

-             i++)

-          cpi->svc.rc_drop_spatial_layer[i] = 1;

+        cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;

         vp9_inc_frame_in_layer(cpi);

-        if (cpi->svc.rc_drop_spatial_layer[0] == 0)

-          cpi->svc.skip_enhancement_layer = 1;

+        cpi->svc.skip_enhancement_layer = 1;

+        if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {

+          int i;

+          int all_layers_drop = 1;

+          for (i = 0; i < cpi->svc.spatial_layer_id; i++) {

+            if (cpi->svc.drop_spatial_layer[i] == 0) {

+              all_layers_drop = 0;

+              break;

+            }

+          }

+          if (all_layers_drop == 1) cpi->svc.skip_enhancement_layer = 0;

+        }

       return;

@@ -4632,7 +4656,7 @@

   cpi->last_frame_dropped = 0;

-  cpi->svc.last_layer_encoded = cpi->svc.spatial_layer_id;

+  cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;

   // Disable segmentation if it decrease rate/distortion ratio

   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)

--- a/vp9/encoder/vp9_ratectrl.c

+++ b/vp9/encoder/vp9_ratectrl.c

@@ -396,9 +396,7 @@

 int vp9_rc_drop_frame(VP9_COMP *cpi) {

   const VP9EncoderConfig *oxcf = &cpi->oxcf;

   RATE_CONTROL *const rc = &cpi->rc;

-  if (!oxcf->drop_frames_water_mark ||

-      (is_one_pass_cbr_svc(cpi) &&

-       cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {

+  if (!oxcf->drop_frames_water_mark) {

     return 0;

   } else {

     if (rc->buffer_level < 0) {

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -596,7 +596,8 @@

     if (!cpi->last_frame_dropped && cpi->resize_state == ORIG &&

         !cpi->external_resize &&

         (!cpi->use_svc ||

-         cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {

+         (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&

+          !cpi->svc.last_layer_dropped[cpi->svc.number_spatial_layers - 1]))) {

       sf->copy_partition_flag = 1;

       cpi->max_copied_frame = 2;

       // The top temporal enhancement layer (for number of temporal layers > 1)

@@ -666,6 +667,11 @@

           (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),

                                 sizeof(*cpi->count_lastgolden_frame_usage));

+  // Disable adaptive_rd_thresh for row_mt for SVC with frame dropping.

+  // This is causing some tests to fail.

+  // TODO(marpan/jianj): Look into this failure and re-enable later.

+  if (cpi->use_svc && cpi->oxcf.drop_frames_water_mark)

+    sf->adaptive_rd_thresh_row_mt = 0;

 void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -38,11 +38,11 @@

   svc->current_superframe = 0;

   svc->non_reference_frame = 0;

   svc->skip_enhancement_layer = 0;

-  svc->last_layer_encoded = 0;

   for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;

   for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {

-    svc->rc_drop_spatial_layer[sl] = 0;

+    svc->last_layer_dropped[sl] = 0;

+    svc->drop_spatial_layer[sl] = 0;

     svc->ext_frame_flags[sl] = 0;

     svc->ext_lst_fb_idx[sl] = 0;

     svc->ext_gld_fb_idx[sl] = 1;

@@ -649,11 +649,12 @@

-  // Reset the drop flags for all spatial lauyers, on the base layer.

+  // Reset the drop flags for all spatial layers, on the base layer.

   if (cpi->svc.spatial_layer_id == 0) {

     int i;

-    for (i = 0; i < cpi->svc.number_spatial_layers; i++)

-      cpi->svc.rc_drop_spatial_layer[i] = 0;

+    for (i = 0; i < cpi->svc.number_spatial_layers; i++) {

+      cpi->svc.drop_spatial_layer[i] = 0;

+    }

   lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *

@@ -701,6 +702,13 @@

         cpi->svc.use_partition_reuse = 0;

         break;

+    }

+    // For non-zero spatial layers: if the previous spatial layer was dropped

+    // disable the base_mv and partition_reuse features.

+    if (cpi->svc.spatial_layer_id > 0 &&

+        cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {

+      cpi->svc.use_base_mv = 0;

+      cpi->svc.use_partition_reuse = 0;

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -57,7 +57,6 @@

   int spatial_layer_to_encode;

   int first_spatial_layer_to_encode;

-  int rc_drop_spatial_layer[VPX_MAX_LAYERS];

   // Workaround for multiple frame contexts

   enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;

@@ -107,7 +106,8 @@

   int lower_layer_qindex;

-  int last_layer_encoded;

+  int last_layer_dropped[VPX_MAX_LAYERS];

+  int drop_spatial_layer[VPX_MAX_LAYERS];

 } SVC;

 struct VP9_COMP;

--- a/vp9/vp9_cx_iface.c

+++ b/vp9/vp9_cx_iface.c

@@ -1205,8 +1205,8 @@

           cx_data_sz -= size;

           pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;

           pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;

-          pkt.data.frame.last_spatial_layer_encoded =

-              cpi->svc.last_layer_encoded;

+          pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =

+              1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];

           if (ctx->output_cx_pkt_cb.output_cx_pkt) {

             pkt.kind = VPX_CODEC_CX_FRAME_PKT;

@@ -1235,7 +1235,8 @@

         pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);

         pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;

         pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;

-        pkt.data.frame.last_spatial_layer_encoded = cpi->svc.last_layer_encoded;

+        pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =

+            1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];

         if (ctx->pending_cx_data) {

           if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;

--- a/vpx/vpx_encoder.h

+++ b/vpx/vpx_encoder.h

@@ -63,7 +63,7 @@

  * fields to structures

*/

 #define VPX_ENCODER_ABI_VERSION \

-  (8 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/

+  (9 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/

 /*! \brief Encoder capabilities bitfield

@@ -181,9 +181,9 @@

        * first one.*/

       unsigned int width[VPX_SS_MAX_LAYERS];  /**< frame width */

       unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */

-      /*!\brief Last spatial layer frame in this packet. VP8 will always be set

-       * to 0.*/

-      unsigned int last_spatial_layer_encoded;

+      /*!\brief Flag to indicate if spatial layer frame in this packet is

+       * encoded or dropped. VP8 will always be set to 1.*/

+      uint8_t spatial_layer_encoded[VPX_SS_MAX_LAYERS];

     } frame;                            /**< data for compressed frame packet */

     vpx_fixed_buf_t twopass_stats;      /**< data for two-pass packet */

     vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */

--

⑨