shithub: libvpx

Download patch

ref: 475d1d60b8fbddf03c34f7950bd8ebf7c05135ce
parent: 584c72992a972ba124a3f05ba9fc683da8190680
author: Adrian Grange <agrange@google.com>
date: Wed Nov 20 04:49:33 EST 2013

Modified spatial scalable encoder & unit tests

Modifications to the spatial scalable encoder to match
changes made to the scaling code in the decoder.

In particular, the use of a dummy first frame was removed
now that the decoder is able to handle a smaller first
frame.

SvcTest.FirstFrameHasLayers unit test re-enabled.

Change-Id: Ic2e91fbe4eadf95895569947670d36d68abaf458

--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -41,7 +41,6 @@
   virtual ~SvcTest() {}
 
   virtual void SetUp() {
-    svc_.first_frame_full_size = 1;
     svc_.encoding_mode = INTER_LAYER_PREDICTION_IP;
     svc_.log_level = SVC_LOG_DEBUG;
     svc_.log_print = 0;
@@ -185,7 +184,6 @@
   res = vpx_svc_set_quantizers(&svc_, NULL);
   EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
 
-  svc_.first_frame_full_size = 0;
   svc_.spatial_layers = 2;
   res = vpx_svc_set_quantizers(&svc_, "40");
   EXPECT_EQ(VPX_CODEC_OK, res);
@@ -206,7 +204,6 @@
   res = vpx_svc_set_scale_factors(&svc_, NULL);
   EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
 
-  svc_.first_frame_full_size = 0;
   svc_.spatial_layers = 2;
   res = vpx_svc_set_scale_factors(&svc_, "4/16");
   EXPECT_EQ(VPX_CODEC_OK, res);
@@ -220,11 +217,8 @@
   codec_initialized_ = true;
 }
 
-// test that decoder can handle an SVC frame as the first frame in a sequence
-// this test is disabled since it always fails because of a decoder issue
-// https://code.google.com/p/webm/issues/detail?id=654
-TEST_F(SvcTest, DISABLED_FirstFrameHasLayers) {
-  svc_.first_frame_full_size = 0;
+// Test that decoder can handle an SVC frame as the first frame in a sequence.
+TEST_F(SvcTest, FirstFrameHasLayers) {
   svc_.spatial_layers = 2;
   vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
@@ -252,7 +246,6 @@
 }
 
 TEST_F(SvcTest, EncodeThreeFrames) {
-  svc_.first_frame_full_size = 1;
   svc_.spatial_layers = 2;
   vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
@@ -265,9 +258,9 @@
   libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
                                      codec_enc_.g_timebase.den,
                                      codec_enc_.g_timebase.num, 0, 30);
-  // FRAME 1
+  // FRAME 0
   video.Begin();
-  // this frame is full size, with only one layer
+  // This frame is a keyframe.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                        video.duration(), VPX_DL_REALTIME);
   ASSERT_EQ(VPX_CODEC_OK, res);
@@ -278,13 +271,13 @@
       vpx_svc_get_frame_size(&svc_));
   ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
 
-  // FRAME 2
+  // FRAME 1
   video.Next();
-  // this is an I-frame
+  // This is a P-frame.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                        video.duration(), VPX_DL_REALTIME);
   ASSERT_EQ(VPX_CODEC_OK, res);
-  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
 
   res_dec = decoder_->DecodeFrame(
       static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
@@ -293,7 +286,7 @@
 
   // FRAME 2
   video.Next();
-  // this is a P-frame
+  // This is a P-frame.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                        video.duration(), VPX_DL_REALTIME);
   ASSERT_EQ(VPX_CODEC_OK, res);
@@ -306,7 +299,6 @@
 }
 
 TEST_F(SvcTest, GetLayerResolution) {
-  svc_.first_frame_full_size = 0;
   svc_.spatial_layers = 2;
   vpx_svc_set_scale_factors(&svc_, "4/16,8/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -53,14 +53,11 @@
     ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
 static const arg_def_t quantizers_arg =
     ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)");
-static const arg_def_t dummy_frame_arg =
-    ARG_DEF("z", "dummy-frame", 1, "make first frame blank and full size");
 
 static const arg_def_t *svc_args[] = {
   &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
   &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
-  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  &dummy_frame_arg,
-  NULL
+  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  NULL
 };
 
 static const SVC_ENCODING_MODE default_encoding_mode =
@@ -74,7 +71,6 @@
 static const uint32_t default_bitrate = 1000;
 static const uint32_t default_spatial_layers = 5;
 static const uint32_t default_kf_dist = 100;
-static const int default_use_dummy_frame = 1;
 
 typedef struct {
   char *output_filename;
@@ -116,8 +112,6 @@
   svc_ctx->log_level = SVC_LOG_DEBUG;
   svc_ctx->spatial_layers = default_spatial_layers;
   svc_ctx->encoding_mode = default_encoding_mode;
-  // when using a dummy frame, that frame is only encoded to be full size
-  svc_ctx->first_frame_full_size = default_use_dummy_frame;
 
   // start with default encoder configuration
   res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -150,8 +144,6 @@
       enc_cfg->g_w = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &height_arg, argi)) {
       enc_cfg->g_h = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &height_arg, argi)) {
-      enc_cfg->g_h = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &timebase_arg, argi)) {
       enc_cfg->g_timebase = arg_parse_rational(&arg);
     } else if (arg_match(&arg, &bitrate_arg, argi)) {
@@ -167,8 +159,6 @@
       vpx_svc_set_scale_factors(svc_ctx, arg.val);
     } else if (arg_match(&arg, &quantizers_arg, argi)) {
       vpx_svc_set_quantizers(svc_ctx, arg.val);
-    } else if (arg_match(&arg, &dummy_frame_arg, argi)) {
-      svc_ctx->first_frame_full_size = arg_parse_int(&arg);
     } else {
       ++argj;
     }
@@ -195,13 +185,12 @@
       "mode: %d, layers: %d\n"
       "width %d, height: %d,\n"
       "num: %d, den: %d, bitrate: %d,\n"
-      "gop size: %d, use_dummy_frame: %d\n",
+      "gop size: %d\n",
       vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
       app_input->frames_to_skip, svc_ctx->encoding_mode,
       svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
       enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
-      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist,
-      svc_ctx->first_frame_full_size);
+      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
 }
 
 int main(int argc, const char **argv) {
@@ -246,12 +235,9 @@
   }
 
   // Encode frames
-  while (frame_cnt <= app_input.frames_to_code) {
-    if (frame_cnt == 0 && svc_ctx.first_frame_full_size) {
-      create_dummy_frame(&raw);
-    } else {
-      if (!read_yuv_frame(&app_input.input_ctx, &raw)) break;
-    }
+  while (frame_cnt < app_input.frames_to_code) {
+    if (read_yuv_frame(&app_input.input_ctx, &raw)) break;
+
     res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
                          VPX_DL_REALTIME);
     printf("%s", vpx_svc_get_message(&svc_ctx));
@@ -269,7 +255,7 @@
     pts += frame_duration;
   }
 
-  printf("Processed %d frames\n", frame_cnt - svc_ctx.first_frame_full_size);
+  printf("Processed %d frames\n", frame_cnt);
 
   fclose(app_input.input_ctx.file);
   if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -584,14 +584,6 @@
   return flags;
 }
 
-/**
- * Helper to check if the current frame is the first, full resolution dummy.
- */
-static int vpx_svc_dummy_frame(SvcContext *svc_ctx) {
-  SvcInternal *const si = get_svc_internal(svc_ctx);
-  return svc_ctx->first_frame_full_size == 1 && si->encode_frame_count == 0;
-}
-
 static void calculate_enc_frame_flags(SvcContext *svc_ctx) {
   vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF;
   SvcInternal *const si = get_svc_internal(svc_ctx);
@@ -598,7 +590,7 @@
   const int is_keyframe = (si->frame_within_gop == 0);
 
   // keyframe layer zero is identical for all modes
-  if ((is_keyframe && si->layer == 0) || vpx_svc_dummy_frame(svc_ctx)) {
+  if (is_keyframe && si->layer == 0) {
     si->enc_frame_flags = VPX_EFLAG_FORCE_KF;
     return;
   }
@@ -783,10 +775,9 @@
   memset(&superframe, 0, sizeof(superframe));
   svc_log_reset(svc_ctx);
 
-  si->layers = vpx_svc_dummy_frame(svc_ctx) ? 1 : svc_ctx->spatial_layers;
+  si->layers = svc_ctx->spatial_layers;
   if (si->frame_within_gop >= si->kf_dist ||
-      si->encode_frame_count == 0 ||
-      (si->encode_frame_count == 1 && svc_ctx->first_frame_full_size == 1)) {
+      si->encode_frame_count == 0) {
     si->frame_within_gop = 0;
   }
   si->is_keyframe = (si->frame_within_gop == 0);
@@ -805,12 +796,8 @@
     }
     calculate_enc_frame_flags(svc_ctx);
 
-    if (vpx_svc_dummy_frame(svc_ctx)) {
-      // do not set svc parameters, use normal encode
-      svc_log(svc_ctx, SVC_LOG_DEBUG, "encoding full size first frame\n");
-    } else {
-      set_svc_parameters(svc_ctx, codec_ctx);
-    }
+    set_svc_parameters(svc_ctx, codec_ctx);
+
     res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
                            si->enc_frame_flags, deadline);
     if (res != VPX_CODEC_OK) {
@@ -822,12 +809,10 @@
       switch (cx_pkt->kind) {
         case VPX_CODEC_CX_FRAME_PKT: {
           const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
-          if (!vpx_svc_dummy_frame(svc_ctx)) {
-            si->bytes_in_layer[si->layer] += frame_pkt_size;
-            svc_log(svc_ctx, SVC_LOG_DEBUG,
-                    "SVC frame: %d, layer: %d, size: %u\n",
-                    si->encode_frame_count, si->layer, frame_pkt_size);
-          }
+          si->bytes_in_layer[si->layer] += frame_pkt_size;
+          svc_log(svc_ctx, SVC_LOG_DEBUG,
+                  "SVC frame: %d, layer: %d, size: %u\n",
+                  si->encode_frame_count, si->layer, frame_pkt_size);
           layer_data =
               ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size);
           if (layer_data == NULL) {
@@ -842,15 +827,13 @@
           break;
         }
         case VPX_CODEC_PSNR_PKT: {
-          if (!vpx_svc_dummy_frame(svc_ctx)) {
-            svc_log(svc_ctx, SVC_LOG_DEBUG,
-                    "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
-                    "%2.3f  %2.3f  %2.3f  %2.3f \n",
-                    si->encode_frame_count, si->layer,
-                    cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
-                    cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
-            si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
-          }
+          svc_log(svc_ctx, SVC_LOG_DEBUG,
+                  "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
+                  "%2.3f  %2.3f  %2.3f  %2.3f \n",
+                  si->encode_frame_count, si->layer,
+                  cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
+                  cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
+          si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
           break;
         }
         default: {
@@ -860,11 +843,10 @@
     }
   }
   // add superframe index to layer data list
-  if (!vpx_svc_dummy_frame(svc_ctx)) {
-    sf_create_index(&superframe);
-    layer_data = ld_create(superframe.buffer, superframe.index_size);
-    ld_list_add(&cx_layer_list, layer_data);
-  }
+  sf_create_index(&superframe);
+  layer_data = ld_create(superframe.buffer, superframe.index_size);
+  ld_list_add(&cx_layer_list, layer_data);
+
   // get accumulated size of layer data
   si->frame_size = ld_list_get_buffer_size(cx_layer_list);
   if (si->frame_size == 0) return VPX_CODEC_ERROR;
@@ -940,7 +922,6 @@
   svc_log_reset(svc_ctx);
 
   encode_frame_count = si->encode_frame_count;
-  if (svc_ctx->first_frame_full_size) encode_frame_count--;
   if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx);
 
   svc_log(svc_ctx, SVC_LOG_INFO, "\n");
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -39,7 +39,6 @@
 typedef struct {
   // public interface to svc_command options
   int spatial_layers;               // number of layers
-  int first_frame_full_size;        // set to one to force first frame full size
   SVC_ENCODING_MODE encoding_mode;  // svc encoding strategy
   SVC_LOG_LEVEL log_level;  // amount of information to display
   int log_print;  // when set, printf log messages instead of returning the
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -697,7 +697,7 @@
 
 
   if (arg_skip)
-    fprintf(stderr, "Skiping first %d frames.\n", arg_skip);
+    fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
   while (arg_skip) {
     if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size))
       break;