shithub: libvpx

--- a/examples/vpx_temporal_svc_encoder.c

+++ b/examples/vpx_temporal_svc_encoder.c

@@ -32,15 +32,23 @@

 void usage_exit(void) { exit(EXIT_FAILURE); }

-// Denoiser states, for temporal denoising.

-enum denoiserState {

-  kDenoiserOff,

-  kDenoiserOnYOnly,

-  kDenoiserOnYUV,

-  kDenoiserOnYUVAggressive,

-  kDenoiserOnAdaptive

+// Denoiser states for vp8, for temporal denoising.

+enum denoiserStateVp8 {

+  kVp8DenoiserOff,

+  kVp8DenoiserOnYOnly,

+  kVp8DenoiserOnYUV,

+  kVp8DenoiserOnYUVAggressive,

+  kVp8DenoiserOnAdaptive

};

+// Denoiser states for vp9, for temporal denoising.

+enum denoiserStateVp9 {

+  kVp9DenoiserOff,

+  kVp9DenoiserOnYOnly,

+  // For SVC: denoise the top two spatial layers.

+  kVp9DenoiserOnYTwoSpatialLayers

+};

 static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };

 // For rate control encoding stats.

@@ -755,7 +763,7 @@

   if (strncmp(encoder->name, "vp8", 3) == 0) {

     vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);

-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);

+    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);

     vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);

     vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);

 #if VP8_ROI_MAP

@@ -772,7 +780,7 @@

     vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);

     vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);

     vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);

-    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);

+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff);

     vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);

     vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);

     vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));

--- a/test/datarate_test.cc

+++ b/test/datarate_test.cc

@@ -1449,24 +1449,29 @@

   ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);

   // TODO(marpan): Check that effective_datarate for each layer hits the

   // layer target_bitrate.

-  for (int i = 600; i <= 1000; i += 200) {

-    cfg_.rc_target_bitrate = i;

-    ResetModel();

-    denoiser_on_ = 1;

-    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,

-                          cfg_.ts_number_layers, cfg_.temporal_layering_mode);

-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-    ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)

-        << " The datarate for the file exceeds the target by too much!";

-    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)

-        << " The datarate for the file is lower than the target by too much!";

+  // For SVC, noise_sen = 1 means denoising only the top spatial layer

+  // noise_sen = 2 means denoising the two top spatial layers.

+  for (int noise_sen = 1; noise_sen <= 2; noise_sen++) {

+    for (int i = 600; i <= 1000; i += 200) {

+      cfg_.rc_target_bitrate = i;

+      ResetModel();

+      denoiser_on_ = noise_sen;

+      assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,

+                            cfg_.ts_number_layers, cfg_.temporal_layering_mode);

+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

+      ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)

+          << " The datarate for the file exceeds the target by too much!";

+      ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)

+          << " The datarate for the file is lower than the target by too much!";

 #if CONFIG_VP9_DECODER

-    // Number of temporal layers > 1, so half of the frames in this SVC pattern

-    // will be non-reference frame and hence encoder will avoid loopfilter.

-    // Since frame dropper is off, we can expcet 150 (half of the sequence)

-    // mismatched frames.

-    EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());

+      // Number of temporal layers > 1, so half of the frames in this SVC

+      // pattern

+      // will be non-reference frame and hence encoder will avoid loopfilter.

+      // Since frame dropper is off, we can expcet 150 (half of the sequence)

+      // mismatched frames.

+      EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());

 #endif

+    }

--- a/vp9/encoder/vp9_denoiser.c

+++ b/vp9/encoder/vp9_denoiser.c

@@ -189,11 +189,12 @@

     int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,

     int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,

     int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,

-    int use_svc) {

+    int use_svc, int spatial_layer) {

   const int sse_diff = (ctx->newmv_sse == UINT_MAX)

? 0

                            : ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);

   int frame;

+  int denoise_layer_idx = 0;

   MACROBLOCKD *filter_mbd = &mb->e_mbd;

   MODE_INFO *mi = filter_mbd->mi[0];

   MODE_INFO saved_mi;

@@ -254,6 +255,10 @@

       frame = lst_fb_idx + 1;

     else if (frame == GOLDEN_FRAME)

       frame = gld_fb_idx + 1;

+    // Shift for the second spatial layer.

+    if (num_spatial_layers - spatial_layer == 2)

+      frame = frame + denoiser->num_ref_frames;

+    denoise_layer_idx = num_spatial_layers - spatial_layer - 1;

   if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {

@@ -289,18 +294,21 @@

                   denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);

   filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;

-  filter_mbd->plane[0].dst.buf =

-      block_start(denoiser->mc_running_avg_y.y_buffer,

-                  denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);

-  filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;

-  filter_mbd->plane[1].dst.buf =

-      block_start(denoiser->mc_running_avg_y.u_buffer,

-                  denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);

-  filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;

-  filter_mbd->plane[2].dst.buf =

-      block_start(denoiser->mc_running_avg_y.v_buffer,

-                  denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);

-  filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;

+  filter_mbd->plane[0].dst.buf = block_start(

+      denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer,

+      denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col);

+  filter_mbd->plane[0].dst.stride =

+      denoiser->mc_running_avg_y[denoise_layer_idx].y_stride;

+  filter_mbd->plane[1].dst.buf = block_start(

+      denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer,

+      denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);

+  filter_mbd->plane[1].dst.stride =

+      denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;

+  filter_mbd->plane[2].dst.buf = block_start(

+      denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer,

+      denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);

+  filter_mbd->plane[2].dst.stride =

+      denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;

   set_ref_ptrs(cm, filter_mbd, saved_frame, NONE);

   vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);

@@ -324,9 +332,17 @@

   int zeromv_filter = 0;

   VP9_DENOISER *denoiser = &cpi->denoiser;

   VP9_DENOISER_DECISION decision = COPY_BLOCK;

-  YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];

-  YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;

+  const int shift =

+      cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2

+          ? denoiser->num_ref_frames

+          : 0;

+  YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift];

+  const int denoise_layer_index =

+      cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1;

+  YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index];

   uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);

   uint8_t *mc_avg_start =

       block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);

   struct buf_2d src = mb->plane[0].src;

@@ -381,7 +397,7 @@

         &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,

         motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,

         cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,

-        cpi->gld_fb_idx, cpi->use_svc);

+        cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id);

   if (decision == FILTER_BLOCK) {

     decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,

@@ -432,7 +448,8 @@

     VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,

     int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,

     int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,

-    int svc_base_is_key) {

+    int svc_base_is_key, int second_spatial_layer) {

+  const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;

   // Copy source into denoised reference buffers on KEY_FRAME or

   // if the just encoded frame was resized. For SVC, copy source if the base

   // spatial layer was key frame.

@@ -441,8 +458,8 @@

     int i;

     // Start at 1 so as not to overwrite the INTRA_FRAME

     for (i = 1; i < denoiser->num_ref_frames; ++i) {

-      if (denoiser->running_avg_y[i].buffer_alloc != NULL)

-        copy_frame(&denoiser->running_avg_y[i], &src);

+      if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL)

+        copy_frame(&denoiser->running_avg_y[i + shift], &src);

     denoiser->reset = 0;

     return;

@@ -451,29 +468,29 @@

   // If more than one refresh occurs, must copy frame buffer.

   if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {

     if (refresh_alt_ref_frame) {

-      copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1],

-                 &denoiser->running_avg_y[INTRA_FRAME]);

+      copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],

+                 &denoiser->running_avg_y[INTRA_FRAME + shift]);

     if (refresh_golden_frame) {

-      copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1],

-                 &denoiser->running_avg_y[INTRA_FRAME]);

+      copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],

+                 &denoiser->running_avg_y[INTRA_FRAME + shift]);

     if (refresh_last_frame) {

-      copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1],

-                 &denoiser->running_avg_y[INTRA_FRAME]);

+      copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],

+                 &denoiser->running_avg_y[INTRA_FRAME + shift]);

   } else {

     if (refresh_alt_ref_frame) {

-      swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1],

-                        &denoiser->running_avg_y[INTRA_FRAME]);

+      swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],

+                        &denoiser->running_avg_y[INTRA_FRAME + shift]);

     if (refresh_golden_frame) {

-      swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1],

-                        &denoiser->running_avg_y[INTRA_FRAME]);

+      swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],

+                        &denoiser->running_avg_y[INTRA_FRAME + shift]);

     if (refresh_last_frame) {

-      swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1],

-                        &denoiser->running_avg_y[INTRA_FRAME]);

+      swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],

+                        &denoiser->running_avg_y[INTRA_FRAME + shift]);

@@ -522,45 +539,91 @@

 int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,

-                             int refresh_alt, int refresh_gld, int refresh_lst,

-                             int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {

+                             int svc_buf_shift, int refresh_alt,

+                             int refresh_gld, int refresh_lst, int alt_fb_idx,

+                             int gld_fb_idx, int lst_fb_idx) {

   int fail = 0;

   if (refresh_alt) {

     // Increase the frame buffer index by 1 to map it to the buffer index in the

     // denoiser.

-    fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1);

+    fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,

+                                           alt_fb_idx + 1 + svc_buf_shift);

     if (fail) return 1;

   if (refresh_gld) {

-    fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1);

+    fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,

+                                           gld_fb_idx + 1 + svc_buf_shift);

     if (fail) return 1;

   if (refresh_lst) {

-    fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1);

+    fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,

+                                           lst_fb_idx + 1 + svc_buf_shift);

     if (fail) return 1;

   return 0;

-int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,

-                       int width, int height, int ssx, int ssy,

+int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,

+                       int use_svc, int noise_sen, int width, int height,

+                       int ssx, int ssy,

 #if CONFIG_VP9_HIGHBITDEPTH

                        int use_highbitdepth,

 #endif

                        int border) {

-  int i, fail, init_num_ref_frames;

+  int i, layer, fail, init_num_ref_frames;

   const int legacy_byte_alignment = 0;

+  int num_layers = 1;

+  int scaled_width = width;

+  int scaled_height = height;

+  if (use_svc) {

+    LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id *

+                                                svc->number_temporal_layers +

+                                            svc->temporal_layer_id];

+    get_layer_resolution(width, height, lc->scaling_factor_num,

+                         lc->scaling_factor_den, &scaled_width, &scaled_height);

+    // For SVC: only denoise at most 2 spatial (highest) layers.

+    if (noise_sen >= 2)

+      // Denoise from one spatial layer below the top.

+      svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0);

+    else

+      // Only denoise the top spatial layer.

+      svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0);

+    num_layers = svc->number_spatial_layers - svc->first_layer_denoise;

+  }

   assert(denoiser != NULL);

   denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES;

   init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES;

+  denoiser->num_layers = num_layers;

+  CHECK_MEM_ERROR(cm, denoiser->running_avg_y,

+                  vpx_calloc(denoiser->num_ref_frames * num_layers,

+                             sizeof(denoiser->running_avg_y[0])));

   CHECK_MEM_ERROR(

-      cm, denoiser->running_avg_y,

-      vpx_calloc(denoiser->num_ref_frames, sizeof(denoiser->running_avg_y[0])));

-  for (i = 0; i < init_num_ref_frames; ++i) {

-    fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,

-                                  ssx, ssy,

+      cm, denoiser->mc_running_avg_y,

+      vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0])));

+  for (layer = 0; layer < num_layers; ++layer) {

+    const int denoise_width = (layer == 0) ? width : scaled_width;

+    const int denoise_height = (layer == 0) ? height : scaled_height;

+    for (i = 0; i < init_num_ref_frames; ++i) {

+      fail = vpx_alloc_frame_buffer(

+          &denoiser->running_avg_y[i + denoiser->num_ref_frames * layer],

+          denoise_width, denoise_height, ssx, ssy,

 #if CONFIG_VP9_HIGHBITDEPTH

+          use_highbitdepth,

+#endif

+          border, legacy_byte_alignment);

+      if (fail) {

+        vp9_denoiser_free(denoiser);

+        return 1;

+      }

+#ifdef OUTPUT_YUV_DENOISED

+      make_grayscale(&denoiser->running_avg_y[i]);

+#endif

+    }

+    fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer],

+                                  denoise_width, denoise_height, ssx, ssy,

+#if CONFIG_VP9_HIGHBITDEPTH

                                   use_highbitdepth,

 #endif

                                   border, legacy_byte_alignment);

@@ -568,22 +631,10 @@

       vp9_denoiser_free(denoiser);

       return 1;

-#ifdef OUTPUT_YUV_DENOISED

-    make_grayscale(&denoiser->running_avg_y[i]);

-#endif

-  fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,

-                                ssy,

-#if CONFIG_VP9_HIGHBITDEPTH

-                                use_highbitdepth,

-#endif

-                                border, legacy_byte_alignment);

-  if (fail) {

-    vp9_denoiser_free(denoiser);

-    return 1;

-  }

+  // denoiser->last_source only used for noise_estimation, so only for top

+  // layer.

   fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,

 #if CONFIG_VP9_HIGHBITDEPTH

                                 use_highbitdepth,

@@ -609,12 +660,18 @@

     return;

   denoiser->frame_buffer_initialized = 0;

-  for (i = 0; i < denoiser->num_ref_frames; ++i) {

+  for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) {

     vpx_free_frame_buffer(&denoiser->running_avg_y[i]);

   vpx_free(denoiser->running_avg_y);

   denoiser->running_avg_y = NULL;

-  vpx_free_frame_buffer(&denoiser->mc_running_avg_y);

+  for (i = 0; i < denoiser->num_layers; ++i) {

+    vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]);

+  }

+  vpx_free(denoiser->mc_running_avg_y);

+  denoiser->mc_running_avg_y = NULL;

   vpx_free_frame_buffer(&denoiser->last_source);

--- a/vp9/encoder/vp9_denoiser.h

+++ b/vp9/encoder/vp9_denoiser.h

@@ -44,11 +44,12 @@

 typedef struct vp9_denoiser {

   YV12_BUFFER_CONFIG *running_avg_y;

-  YV12_BUFFER_CONFIG mc_running_avg_y;

+  YV12_BUFFER_CONFIG *mc_running_avg_y;

   YV12_BUFFER_CONFIG last_source;

   int frame_buffer_initialized;

   int reset;

   int num_ref_frames;

+  int num_layers;

   VP9_DENOISER_LEVEL denoising_level;

   VP9_DENOISER_LEVEL prev_denoising_level;

 } VP9_DENOISER;

@@ -66,12 +67,13 @@

 } VP9_PICKMODE_CTX_DEN;

 struct VP9_COMP;

+struct SVC;

 void vp9_denoiser_update_frame_info(

     VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,

     int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,

     int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,

-    int svc_base_is_key);

+    int svc_base_is_key, int second_spatial_layer);

 void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,

                           int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,

@@ -84,11 +86,13 @@

                                      PICK_MODE_CONTEXT *ctx);

 int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,

-                             int refresh_alt, int refresh_gld, int refresh_lst,

-                             int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);

+                             int svc_buf_shift, int refresh_alt,

+                             int refresh_gld, int refresh_lst, int alt_fb_idx,

+                             int gld_fb_idx, int lst_fb_idx);

-int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,

-                       int width, int height, int ssx, int ssy,

+int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,

+                       int use_svc, int noise_sen, int width, int height,

+                       int ssx, int ssy,

 #if CONFIG_VP9_HIGHBITDEPTH

                        int use_highbitdepth,

 #endif

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -2861,18 +2861,26 @@

   if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&

       cpi->denoiser.denoising_level > kDenLowLow) {

     int svc_base_is_key = 0;

+    int denoise_svc_second_layer = 0;

     if (cpi->use_svc) {

       int realloc_fail = 0;

+      const int svc_buf_shift =

+          cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2

+              ? cpi->denoiser.num_ref_frames

+              : 0;

       int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,

                                    cpi->svc.temporal_layer_id,

                                    cpi->svc.number_temporal_layers);

       LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];

       svc_base_is_key = lc->is_key_frame;

-      // Check if we need to allocate extra buffers in the denoiser for

+      denoise_svc_second_layer =

+          cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1

+                                                                          : 0;

+      // Check if we need to allocate extra buffers in the denoiser

+      // for

       // refreshed frames.

       realloc_fail = vp9_denoiser_realloc_svc(

-          cm, &cpi->denoiser, cpi->refresh_alt_ref_frame,

+          cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame,

           cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,

           cpi->gld_fb_idx, cpi->lst_fb_idx);

       if (realloc_fail)

@@ -2883,7 +2891,8 @@

         &cpi->denoiser, *cpi->Source, cpi->common.frame_type,

         cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,

         cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,

-        cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key);

+        cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key,

+        denoise_svc_second_layer);

 #endif

   if (is_one_pass_cbr_svc(cpi)) {

@@ -3318,8 +3327,9 @@

   VP9_COMMON *const cm = &cpi->common;

   if (cpi->oxcf.noise_sensitivity > 0 &&

       !cpi->denoiser.frame_buffer_initialized) {

-    if (vp9_denoiser_alloc(cm, cpi->use_svc, &cpi->denoiser, cm->width,

-                           cm->height, cm->subsampling_x, cm->subsampling_y,

+    if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,

+                           cpi->oxcf.noise_sensitivity, cm->width, cm->height,

+                           cm->subsampling_x, cm->subsampling_y,

 #if CONFIG_VP9_HIGHBITDEPTH

                            cm->use_highbitdepth,

 #endif

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -870,7 +870,7 @@

 static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {

   return (!cpi->use_svc ||

           (cpi->use_svc &&

-           cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));

+           cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise));

 #endif

--- a/vp9/encoder/vp9_noise_estimate.c

+++ b/vp9/encoder/vp9_noise_estimate.c

@@ -21,6 +21,15 @@

 #include "vp9/encoder/vp9_noise_estimate.h"

 #include "vp9/encoder/vp9_encoder.h"

+#if CONFIG_VP9_TEMPORAL_DENOISING

+// For SVC: only do noise estimation on top spatial layer.

+static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) {

+  return (!cpi->use_svc ||

+          (cpi->use_svc &&

+           cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));

+}

+#endif

 void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) {

   ne->enabled = 0;

   ne->level = kLowLow;

@@ -45,7 +54,7 @@

 #endif

 // Enable noise estimation if denoising is on.

 #if CONFIG_VP9_TEMPORAL_DENOISING

-  if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&

+  if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&

       cpi->common.width >= 320 && cpi->common.height >= 180)

     return 1;

 #endif

@@ -111,7 +120,7 @@

   // Estimate is between current source and last source.

   YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;

 #if CONFIG_VP9_TEMPORAL_DENOISING

-  if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) {

+  if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) {

     last_source = &cpi->denoiser.last_source;

     // Tune these thresholds for different resolutions when denoising is

     // enabled.

@@ -131,7 +140,7 @@

       (cpi->svc.number_spatial_layers == 1 &&

        (ne->last_w != cm->width || ne->last_h != cm->height))) {

 #if CONFIG_VP9_TEMPORAL_DENOISING

-    if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))

+    if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))

       copy_frame(&cpi->denoiser.last_source, cpi->Source);

 #endif

     if (last_source != NULL) {

@@ -146,7 +155,7 @@

     ne->count = 0;

     ne->num_frames_estimate = 10;

 #if CONFIG_VP9_TEMPORAL_DENOISING

-    if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&

+    if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&

         cpi->svc.current_superframe > 1) {

       vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);

       copy_frame(&cpi->denoiser.last_source, cpi->Source);

@@ -257,7 +266,7 @@

         ne->count = 0;

         ne->level = vp9_noise_estimate_extract_level(ne);

 #if CONFIG_VP9_TEMPORAL_DENOISING

-        if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))

+        if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))

           vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);

 #endif

@@ -264,7 +273,7 @@

 #if CONFIG_VP9_TEMPORAL_DENOISING

-  if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))

+  if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))

     copy_frame(&cpi->denoiser.last_source, cpi->Source);

 #endif

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -37,6 +37,7 @@

   svc->scaled_one_half = 0;

   svc->current_superframe = 0;

   svc->non_reference_frame = 0;

   for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;

   for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {

     svc->ext_frame_flags[sl] = 0;

@@ -389,9 +390,9 @@

              .is_key_frame;

-static void get_layer_resolution(const int width_org, const int height_org,

-                                 const int num, const int den, int *width_out,

-                                 int *height_out) {

+void get_layer_resolution(const int width_org, const int height_org,

+                          const int num, const int den, int *width_out,

+                          int *height_out) {

   int w, h;

   if (width_out == NULL || height_out == NULL || den == 0) return;

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -49,7 +49,7 @@

   uint8_t speed;

 } LAYER_CONTEXT;

-typedef struct {

+typedef struct SVC {

   int spatial_layer_id;

   int temporal_layer_id;

   int number_spatial_layers;

@@ -99,6 +99,8 @@

   BLOCK_SIZE *prev_partition_svc;

   int mi_stride[VPX_MAX_LAYERS];

+  int first_layer_denoise;

 } SVC;

 struct VP9_COMP;

@@ -127,6 +129,10 @@

 // Initialize second pass rc for spatial svc.

 void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);

+void get_layer_resolution(const int width_org, const int height_org,

+                          const int num, const int den, int *width_out,

+                          int *height_out);

 // Increment number of video frames in layer

 void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi);

--- a/vpx/vp8cx.h

+++ b/vpx/vp8cx.h

@@ -408,7 +408,7 @@

   /*!\brief Codec control function to set noise sensitivity.

-   *  0: off, 1: On(YOnly)

+   *  0: off, 1: On(YOnly), 2: For SVC only, on top two spatial layers(YOnly)

    * Supported in codecs: VP9

*/

--

⑨