shithub: libvpx

Download patch

ref: e479379abb071050d45273c614c37253522bf7b0
parent: 9c2ca8c1ca87f219965f3a528190a57a1ce72da8
author: Adrian Grange <agrange@google.com>
date: Fri Jan 13 09:09:40 EST 2012

Fixed bugs in multi-layer code related to changing params

When running multi-layer (ML) encodes and dynamically
changing coding parameters on the fly (e.g. frame
duration/rate, bandwidths allocated to each layer)
the encoder would not produce sensible output.

In certain cases the rate targeting would be
hideously inaccurate.

These fixes make it possible to change these coding
parameters correctly and to maintain accurate control
of the rate targeting.

I also added the specification of the input timebase
into the test program, vp8_scalable_patterns.c.

Patch 2: Moved declaration to appease MS compiler)

Change-Id: Ic8bb5a16daa924bb64974e740696e040d07ae363

--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -147,9 +147,13 @@
         int over_shoot_pct;
 
         // buffering parameters
-        int64_t starting_buffer_level;  // in seconds
+        int64_t starting_buffer_level;  // in bytes
         int64_t optimal_buffer_level;
         int64_t maximum_buffer_size;
+
+        int64_t starting_buffer_level_in_ms;  // in milli-seconds
+        int64_t optimal_buffer_level_in_ms;
+        int64_t maximum_buffer_size_in_ms;
 
         // controlling quality
         int fixed_q;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -250,6 +250,9 @@
     lc->starting_buffer_level            = cpi->oxcf.starting_buffer_level;
     lc->optimal_buffer_level             = cpi->oxcf.optimal_buffer_level;
     lc->maximum_buffer_size              = cpi->oxcf.maximum_buffer_size;
+    lc->starting_buffer_level_in_ms      = cpi->oxcf.starting_buffer_level_in_ms;
+    lc->optimal_buffer_level_in_ms       = cpi->oxcf.optimal_buffer_level_in_ms;
+    lc->maximum_buffer_size_in_ms        = cpi->oxcf.maximum_buffer_size_in_ms;
     lc->buffer_level                     = cpi->buffer_level;
     lc->bits_off_target                  = cpi->bits_off_target;
     lc->total_actual_bits                = cpi->total_actual_bits;
@@ -287,6 +290,9 @@
     cpi->oxcf.starting_buffer_level       = lc->starting_buffer_level;
     cpi->oxcf.optimal_buffer_level        = lc->optimal_buffer_level;
     cpi->oxcf.maximum_buffer_size         = lc->maximum_buffer_size;
+    cpi->oxcf.starting_buffer_level_in_ms = lc->starting_buffer_level_in_ms;
+    cpi->oxcf.optimal_buffer_level_in_ms  = lc->optimal_buffer_level_in_ms;
+    cpi->oxcf.maximum_buffer_size_in_ms   = lc->maximum_buffer_size_in_ms;
     cpi->buffer_level                     = lc->buffer_level;
     cpi->bits_off_target                  = lc->bits_off_target;
     cpi->total_actual_bits                = lc->total_actual_bits;
@@ -1254,6 +1260,8 @@
     if (cpi->frame_rate > 180)
         cpi->frame_rate = 30;
 
+    cpi->ref_frame_rate = cpi->frame_rate;
+
     // change includes all joint functionality
     vp8_change_config(cpi, oxcf);
 
@@ -1289,6 +1297,10 @@
                         cpi->output_frame_rate / cpi->oxcf.rate_decimator[i];
             lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000;
 
+            lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
+            lc->optimal_buffer_level_in_ms  = oxcf->optimal_buffer_level;
+            lc->maximum_buffer_size_in_ms   = oxcf->maximum_buffer_size;
+
             lc->starting_buffer_level =
               rescale(oxcf->starting_buffer_level,
                           lc->target_bandwidth, 1000);
@@ -1345,7 +1357,57 @@
 #endif
 }
 
+void update_layer_contexts (VP8_COMP *cpi)
+{
+    VP8_CONFIG *oxcf = &cpi->oxcf;
 
+    /* Update snapshots of the layer contexts to reflect new parameters */
+    if (oxcf->number_of_layers > 1)
+    {
+        unsigned int i;
+        double prev_layer_frame_rate=0;
+
+        for (i=0; i<oxcf->number_of_layers; i++)
+        {
+            LAYER_CONTEXT *lc = &cpi->layer_context[i];
+
+            lc->frame_rate =
+                cpi->ref_frame_rate / oxcf->rate_decimator[i];
+            lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
+
+            lc->starting_buffer_level = rescale(
+                          oxcf->starting_buffer_level_in_ms,
+                          lc->target_bandwidth, 1000);
+
+            if (oxcf->optimal_buffer_level == 0)
+                lc->optimal_buffer_level = lc->target_bandwidth / 8;
+            else
+                lc->optimal_buffer_level = rescale(
+                          oxcf->optimal_buffer_level_in_ms,
+                          lc->target_bandwidth, 1000);
+
+            if (oxcf->maximum_buffer_size == 0)
+                lc->maximum_buffer_size = lc->target_bandwidth / 8;
+            else
+                lc->maximum_buffer_size = rescale(
+                          oxcf->maximum_buffer_size_in_ms,
+                          lc->target_bandwidth, 1000);
+
+            // Work out the average size of a frame within this layer
+            if (i > 0)
+                lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] -
+                    oxcf->target_bitrate[i-1]) * 1000 /
+                    (lc->frame_rate - prev_layer_frame_rate);
+
+            lc->active_worst_quality         = oxcf->worst_allowed_q;
+            lc->active_best_quality          = oxcf->best_allowed_q;
+            lc->avg_frame_qindex             = oxcf->worst_allowed_q;
+
+            prev_layer_frame_rate = lc->frame_rate;
+        }
+    }
+}
+
 void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
 {
     VP8_COMMON *cm = &cpi->common;
@@ -1485,9 +1547,12 @@
     // local file playback mode == really big buffer
     if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
     {
-        cpi->oxcf.starting_buffer_level   = 60000;
-        cpi->oxcf.optimal_buffer_level    = 60000;
-        cpi->oxcf.maximum_buffer_size     = 240000;
+        cpi->oxcf.starting_buffer_level       = 60000;
+        cpi->oxcf.optimal_buffer_level        = 60000;
+        cpi->oxcf.maximum_buffer_size         = 240000;
+        cpi->oxcf.starting_buffer_level_in_ms = 60000;
+        cpi->oxcf.optimal_buffer_level_in_ms  = 60000;
+        cpi->oxcf.maximum_buffer_size_in_ms   = 240000;
     }
 
     // Convert target bandwidth from Kbit/s to Bit/s
@@ -4256,8 +4321,8 @@
 
         vp8_clear_system_state();  //__asm emms;
 
-        if (cpi->twopass.total_left_stats->coded_error != 0.0)
-            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
+        if (cpi->twopass.total_left_stats.coded_error != 0.0)
+            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
                        "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
                        "%10.3f %8d\n",
                        cpi->common.current_video_frame, cpi->this_frame_target,
@@ -4264,6 +4329,7 @@
                        cpi->projected_frame_size,
                        (cpi->projected_frame_size - cpi->this_frame_target),
                        (int)cpi->total_target_vs_actual,
+                       cpi->buffer_level,
                        (cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
                        (int)cpi->total_actual_bits, cm->base_qindex,
                        cpi->active_best_quality, cpi->active_worst_quality,
@@ -4274,12 +4340,12 @@
                        cm->frame_type, cpi->gfu_boost,
                        cpi->twopass.est_max_qcorrection_factor,
                        (int)cpi->twopass.bits_left,
-                       cpi->twopass.total_left_stats->coded_error,
+                       cpi->twopass.total_left_stats.coded_error,
                        (double)cpi->twopass.bits_left /
-                           cpi->twopass.total_left_stats->coded_error,
+                           cpi->twopass.total_left_stats.coded_error,
                        cpi->tot_recode_hits);
         else
-            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
+            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
                        "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
                        "%8d\n",
                        cpi->common.current_video_frame,
@@ -4286,6 +4352,7 @@
                        cpi->this_frame_target, cpi->projected_frame_size,
                        (cpi->projected_frame_size - cpi->this_frame_target),
                        (int)cpi->total_target_vs_actual,
+                       cpi->buffer_level,
                        (cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
                        (int)cpi->total_actual_bits, cm->base_qindex,
                        cpi->active_best_quality, cpi->active_worst_quality,
@@ -4296,7 +4363,7 @@
                        cm->frame_type, cpi->gfu_boost,
                        cpi->twopass.est_max_qcorrection_factor,
                        (int)cpi->twopass.bits_left,
-                       cpi->twopass.total_left_stats->coded_error,
+                       cpi->twopass.total_left_stats.coded_error,
                        cpi->tot_recode_hits);
 
         fclose(f);
@@ -4669,13 +4736,6 @@
         return -1;
     }
 
-    // Restore layer specific context if necessary
-    if (cpi->oxcf.number_of_layers > 1)
-    {
-        restore_layer_context (cpi,
-           cpi->oxcf.layer_id[cm->current_video_frame % cpi->oxcf.periodicity]);
-    }
-
     if (cpi->source->ts_start < cpi->first_time_stamp_ever)
     {
         cpi->first_time_stamp_ever = cpi->source->ts_start;
@@ -4683,17 +4743,8 @@
     }
 
     // adjust frame rates based on timestamps given
-    if (cpi->oxcf.number_of_layers > 1 )
+    if (!cm->refresh_alt_ref_frame || (cpi->oxcf.number_of_layers > 1))
     {
-        vp8_new_frame_rate (
-              cpi, cpi->layer_context[cpi->current_layer].frame_rate);
-
-        cpi->last_time_stamp_seen = cpi->source->ts_start;
-        cpi->last_end_time_stamp_seen = cpi->source->ts_end;
-
-    }
-    else if (!cm->refresh_alt_ref_frame)
-    {
         int64_t this_duration;
         int step = 0;
 
@@ -4717,7 +4768,7 @@
         if (this_duration)
         {
             if (step)
-                vp8_new_frame_rate(cpi, 10000000.0 / this_duration);
+                cpi->ref_frame_rate = 10000000.0 / this_duration;
             else
             {
                 double avg_duration, interval;
@@ -4730,16 +4781,44 @@
                 if(interval > 10000000.0)
                     interval = 10000000;
 
-                avg_duration = 10000000.0 / cpi->frame_rate;
+                avg_duration = 10000000.0 / cpi->ref_frame_rate;
                 avg_duration *= (interval - avg_duration + this_duration);
                 avg_duration /= interval;
 
-                vp8_new_frame_rate(cpi, 10000000.0 / avg_duration);
+                cpi->ref_frame_rate = 10000000.0 / avg_duration;
             }
+
+            if (cpi->oxcf.number_of_layers > 1)
+            {
+                int i;
+
+                // Update frame rates for each layer
+                for (i=0; i<cpi->oxcf.number_of_layers; i++)
+                {
+                    LAYER_CONTEXT *lc = &cpi->layer_context[i];
+                    lc->frame_rate = cpi->ref_frame_rate /
+                                  cpi->oxcf.rate_decimator[i];
+                }
+            }
+            else
+                vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
         }
 
         cpi->last_time_stamp_seen = cpi->source->ts_start;
         cpi->last_end_time_stamp_seen = cpi->source->ts_end;
+    }
+
+    if (cpi->oxcf.number_of_layers > 1)
+    {
+        int layer;
+
+        update_layer_contexts (cpi);
+
+        // Restore layer specific context & set frame rate
+        layer = cpi->oxcf.layer_id[
+                            cm->current_video_frame % cpi->oxcf.periodicity];
+        restore_layer_context (cpi, layer);
+        vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
     }
 
     if (cpi->compressor_speed == 2)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -253,6 +253,9 @@
     int starting_buffer_level;
     int optimal_buffer_level;
     int maximum_buffer_size;
+    int starting_buffer_level_in_ms;
+    int optimal_buffer_level_in_ms;
+    int maximum_buffer_size_in_ms;
 
     int avg_frame_size_for_layer;
 
@@ -421,6 +424,7 @@
     int buffered_mode;
 
     double frame_rate;
+    double ref_frame_rate;
     int64_t buffer_level;
     int bits_off_target;
 
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -335,6 +335,10 @@
     oxcf->under_shoot_pct          = cfg.rc_undershoot_pct;
     oxcf->over_shoot_pct           = cfg.rc_overshoot_pct;
 
+    oxcf->maximum_buffer_size_in_ms   = cfg.rc_buf_sz;
+    oxcf->starting_buffer_level_in_ms = cfg.rc_buf_initial_sz;
+    oxcf->optimal_buffer_level_in_ms  = cfg.rc_buf_optimal_sz;
+
     oxcf->maximum_buffer_size      = cfg.rc_buf_sz;
     oxcf->starting_buffer_level    = cfg.rc_buf_initial_sz;
     oxcf->optimal_buffer_level     = cfg.rc_buf_optimal_sz;
--- a/vp8_scalable_patterns.c
+++ b/vp8_scalable_patterns.c
@@ -129,6 +129,8 @@
     int                  got_data;
     int                  flags = 0;
     int                  i;
+    int                  pts = 0;              // PTS starts at 0
+    int                  frame_duration = 1;   // 1 timebase tick per frame
 
     int                  layering_mode = 0;
     int                  frames_in_layer[MAX_LAYERS] = {0};
@@ -135,9 +137,9 @@
     int                  layer_flags[MAX_PERIODICITY] = {0};
 
     // Check usage and arguments
-    if (argc < 7)
-        die("Usage: %s <infile> <outfile> <width> <height> <mode> "
-            "<Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
+    if (argc < 9)
+        die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
+            " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
 
     width  = strtol (argv[3], NULL, 0);
     height = strtol (argv[4], NULL, 0);
@@ -144,12 +146,12 @@
     if (width < 16 || width%2 || height <16 || height%2)
         die ("Invalid resolution: %d x %d", width, height);
 
-    if (!sscanf(argv[5], "%d", &layering_mode))
-        die ("Invalid mode %s", argv[5]);
+    if (!sscanf(argv[7], "%d", &layering_mode))
+        die ("Invalid mode %s", argv[7]);
     if (layering_mode<0 || layering_mode>6)
-        die ("Invalid mode (0..6) %s", argv[5]);
+        die ("Invalid mode (0..6) %s", argv[7]);
 
-    if (argc != 6+mode_to_num_layers[layering_mode])
+    if (argc != 8+mode_to_num_layers[layering_mode])
         die ("Invalid number of arguments");
 
     if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
@@ -168,8 +170,14 @@
     cfg.g_w = width;
     cfg.g_h = height;
 
-    for (i=6; i<6+mode_to_num_layers[layering_mode]; i++)
-        if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-6]))
+    // Timebase format e.g. 30fps: numerator=1, demoninator=30
+    if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
+        die ("Invalid timebase numerator %s", argv[5]);
+    if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
+        die ("Invalid timebase denominator %s", argv[6]);
+
+    for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
+        if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
             die ("Invalid data rate %s", argv[i]);
 
     // Real time parameters
@@ -193,7 +201,7 @@
     cfg.kf_min_dist = cfg.kf_max_dist = 1000;
 
     // Temporal scaling parameters:
-    // NOTE: The 3 prediction frames cannot be used interchangebly due to
+    // NOTE: The 3 prediction frames cannot be used interchangeably due to
     // differences in the way they are handled throughout the code. The
     // frames should be allocated to layers in the order LAST, GF, ARF.
     // Other combinations work, but may produce slightly inferior results.
@@ -210,6 +218,7 @@
         cfg.ts_rate_decimator[1] = 1;
         memcpy(cfg.ts_layer_id, ids, sizeof(ids));
 
+#if 1
         // 0=L, 1=GF, Intra-layer prediction enabled
         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
@@ -216,8 +225,8 @@
                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
         layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
                          VP8_EFLAG_NO_REF_ARF;
-#if 0
-        // 0=L, 1=GF, Intra-layer 1 prediction disabled
+#else
+        // 0=L, 1=GF, Intra-layer prediction disabled
         layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
                          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
@@ -275,7 +284,7 @@
     case 3:
     {
         // 3-layers, 4-frame period
-        int ids[6] = {0,2,1,2};
+        int ids[4] = {0,2,1,2};
         cfg.ts_number_layers     = 3;
         cfg.ts_periodicity       = 4;
         cfg.ts_rate_decimator[0] = 4;
@@ -295,13 +304,12 @@
                          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
                          VP8_EFLAG_NO_UPD_ARF;
         break;
-        cfg.ts_rate_decimator[2] = 1;
     }
 
     case 4:
     {
         // 3-layers, 4-frame period
-        int ids[6] = {0,2,1,2};
+        int ids[4] = {0,2,1,2};
         cfg.ts_number_layers     = 3;
         cfg.ts_periodicity       = 4;
         cfg.ts_rate_decimator[0] = 4;
@@ -326,7 +334,7 @@
     case 5:
     {
         // 3-layers, 4-frame period
-        int ids[6] = {0,2,1,2};
+        int ids[4] = {0,2,1,2};
         cfg.ts_number_layers     = 3;
         cfg.ts_periodicity       = 4;
         cfg.ts_rate_decimator[0] = 4;
@@ -417,7 +425,7 @@
         flags = layer_flags[frame_cnt % cfg.ts_periodicity];
 
         frame_avail = read_frame(infile, &raw);
-        if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
+        if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts,
                             1, flags, VPX_DL_REALTIME))
             die_codec(&codec, "Failed to encode frame");
 
@@ -446,6 +454,7 @@
             fflush (stdout);
         }
         frame_cnt++;
+        pts += frame_duration;
     }
     printf ("\n");
     fclose (infile);
--