shithub: libvpx

Download patch

ref: 0ba1542f1242f2b076180aa0a8ebf279941eb520
parent: 08d86bc9043f55d86f20f4bab74bc4ca949b3a4c
author: Deb Mukherjee <debargha@google.com>
date: Fri Nov 7 06:01:53 EST 2014

Vidyo: Support for one-pass rc-enabled SVC encoder

Adds support for one-pass rc-enabled SVC encoder with callbacks for
getting per-layer packets.
- the callback function registration is implemented as an encoder
control function.
- if the callback function is not registered, the old way of
aggregating packets with superframe will take effect.
- one more control function “VP9E_GET_SVC_LAYER_ID” has been
implemented to get the temporal/spatial id from the encoder
within the callback. This can be used to get the ids to put on RTP
packet.

Change-Id: I1a90e00135dde65da128b758e6c00b57299a111a

--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -125,7 +125,7 @@
 }
 
 static void parse_command_line(int argc, const char **argv_,
-                               AppInput *app_input, SvcContext *svc_ctx,
+                               AppInput *app_input, SvcContext_t *svc_ctx,
                                vpx_codec_enc_cfg_t *enc_cfg) {
   struct arg arg = {0};
   char **argv = NULL;
@@ -322,7 +322,7 @@
   VpxVideoInfo info = {0};
   vpx_codec_ctx_t codec;
   vpx_codec_enc_cfg_t enc_cfg;
-  SvcContext svc_ctx;
+  SvcContext_t svc_ctx;
   uint32_t i;
   uint32_t frame_cnt = 0;
   vpx_image_t raw;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -83,6 +83,7 @@
   vp8_postproc_cfg_t      preview_ppcfg;
   vpx_codec_pkt_list_decl(256) pkt_list;
   unsigned int                 fixed_kf_cntr;
+  vpx_codec_priv_output_cx_pkt_cb_pair_t output_cx_pkt_cb;
 };
 
 static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
@@ -994,6 +995,24 @@
           ctx->pending_frame_magnitude |= size;
           cx_data += size;
           cx_data_sz -= size;
+
+          if (ctx->output_cx_pkt_cb.output_cx_pkt) {
+            pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+            pkt.data.frame.pts = ticks_to_timebase_units(timebase,
+                                                         dst_time_stamp);
+            pkt.data.frame.duration =
+               (unsigned long)ticks_to_timebase_units(timebase,
+                   dst_end_time_stamp - dst_time_stamp);
+            pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+            pkt.data.frame.buf = ctx->pending_cx_data;
+            pkt.data.frame.sz  = size;
+            ctx->pending_cx_data = NULL;
+            ctx->pending_cx_data_sz = 0;
+            ctx->pending_frame_count = 0;
+            ctx->pending_frame_magnitude = 0;
+            ctx->output_cx_pkt_cb.output_cx_pkt(
+                &pkt, ctx->output_cx_pkt_cb.user_priv);
+          }
           continue;
         }
 
@@ -1009,7 +1028,9 @@
           ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
           ctx->pending_frame_magnitude |= size;
           ctx->pending_cx_data_sz += size;
-          size += write_superframe_index(ctx);
+          // write the superframe only for the case when
+          if (!ctx->output_cx_pkt_cb.output_cx_pkt)
+            size += write_superframe_index(ctx);
           pkt.data.frame.buf = ctx->pending_cx_data;
           pkt.data.frame.sz  = ctx->pending_cx_data_sz;
           ctx->pending_cx_data = NULL;
@@ -1021,11 +1042,16 @@
           pkt.data.frame.sz  = size;
         }
         pkt.data.frame.partition_id = -1;
-        vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
+        if(ctx->output_cx_pkt_cb.output_cx_pkt)
+          ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv);
+        else
+          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
         cx_data += size;
         cx_data_sz -= size;
 #if CONFIG_SPATIAL_SVC
-        if (is_two_pass_svc(cpi)) {
+        if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) {
           vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr;
           int i;
           vp9_zero(pkt_sizes);
@@ -1038,7 +1064,9 @@
             pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt;
             lc->layer_size = 0;
           }
+
           vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_sizes);
+
           vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr);
         }
 #endif
@@ -1239,6 +1267,18 @@
   return VPX_CODEC_OK;
 }
 
+static vpx_codec_err_t ctrl_get_svc_layer_id(vpx_codec_alg_priv_t *ctx,
+                                             va_list args) {
+  vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *);
+  VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
+  SVC *const svc = &cpi->svc;
+
+  data->spatial_layer_id = svc->spatial_layer_id;
+  data->temporal_layer_id = svc->temporal_layer_id;
+
+  return VPX_CODEC_OK;
+}
+
 static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
                                                va_list args) {
   VP9_COMP *const cpi = ctx->cpi;
@@ -1257,6 +1297,16 @@
   return VPX_CODEC_OK;
 }
 
+static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx,
+                                                 va_list args) {
+  vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp =
+      (vpx_codec_priv_output_cx_pkt_cb_pair_t *)va_arg(args, void *);
+  ctx->output_cx_pkt_cb.output_cx_pkt = cbp->output_cx_pkt;
+  ctx->output_cx_pkt_cb.user_priv = cbp->user_priv;
+
+  return VPX_CODEC_OK;
+}
+
 static vpx_codec_err_t ctrl_set_tune_content(vpx_codec_alg_priv_t *ctx,
                                              va_list args) {
   struct vp9_extracfg extra_cfg = ctx->extra_cfg;
@@ -1296,6 +1346,7 @@
   {VP9E_SET_FRAME_PERIODIC_BOOST,     ctrl_set_frame_periodic_boost},
   {VP9E_SET_SVC,                      ctrl_set_svc},
   {VP9E_SET_SVC_PARAMETERS,           ctrl_set_svc_parameters},
+  {VP9E_REGISTER_CX_CALLBACK,         ctrl_register_cx_callback},
   {VP9E_SET_SVC_LAYER_ID,             ctrl_set_svc_layer_id},
   {VP9E_SET_TUNE_CONTENT,             ctrl_set_tune_content},
   {VP9E_SET_NOISE_SENSITIVITY,        ctrl_set_noise_sensitivity},
@@ -1304,6 +1355,7 @@
   {VP8E_GET_LAST_QUANTIZER,           ctrl_get_quantizer},
   {VP8E_GET_LAST_QUANTIZER_64,        ctrl_get_quantizer64},
   {VP9_GET_REFERENCE,                 ctrl_get_reference},
+  {VP9E_GET_SVC_LAYER_ID,             ctrl_get_svc_layer_id},
 
   { -1, NULL},
 };
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -44,8 +44,6 @@
 #define SVC_REFERENCE_FRAMES 8
 #define SUPERFRAME_SLOTS (8)
 #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2)
-#define OPTION_BUFFER_SIZE 1024
-#define COMPONENTS 4  // psnr & sse statistics maintained for total, y, u, v
 
 #define MAX_QUANTIZER 63
 
@@ -81,61 +79,35 @@
   struct FrameData         *next;
 } FrameData;
 
-typedef struct SvcInternal {
-  char options[OPTION_BUFFER_SIZE];        // set by vpx_svc_set_options
-
-  // values extracted from option, quantizers
-  vpx_svc_extra_cfg_t svc_params;
-  int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
-  int bitrates[VPX_SS_MAX_LAYERS];
-
-  // accumulated statistics
-  double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS];   // total/Y/U/V
-  uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
-  uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
-
-  // codec encoding values
-  int width;    // width of highest layer
-  int height;   // height of highest layer
-  int kf_dist;  // distance between keyframes
-
-  // state variables
-  int psnr_pkt_received;
-  int layer;
-  int use_multiple_frame_contexts;
-
-  char message_buffer[2048];
-  vpx_codec_ctx_t *codec_ctx;
-} SvcInternal;
-
-static SvcInternal *get_svc_internal(SvcContext *svc_ctx) {
+static SvcInternal_t *get_svc_internal(SvcContext_t *svc_ctx) {
   if (svc_ctx == NULL) return NULL;
   if (svc_ctx->internal == NULL) {
-    SvcInternal *const si = (SvcInternal *)malloc(sizeof(*si));
+    SvcInternal_t *const si = (SvcInternal_t *)malloc(sizeof(*si));
     if (si != NULL) {
       memset(si, 0, sizeof(*si));
     }
     svc_ctx->internal = si;
   }
-  return (SvcInternal *)svc_ctx->internal;
+  return (SvcInternal_t *)svc_ctx->internal;
 }
 
-static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) {
+static const SvcInternal_t *get_const_svc_internal(
+    const SvcContext_t *svc_ctx) {
   if (svc_ctx == NULL) return NULL;
-  return (const SvcInternal *)svc_ctx->internal;
+  return (const SvcInternal_t *)svc_ctx->internal;
 }
 
-static void svc_log_reset(SvcContext *svc_ctx) {
-  SvcInternal *const si = (SvcInternal *)svc_ctx->internal;
+static void svc_log_reset(SvcContext_t *svc_ctx) {
+  SvcInternal_t *const si = (SvcInternal_t *)svc_ctx->internal;
   si->message_buffer[0] = '\0';
 }
 
-static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level,
+static int svc_log(SvcContext_t *svc_ctx, SVC_LOG_LEVEL level,
                    const char *fmt, ...) {
   char buf[512];
   int retval = 0;
   va_list ap;
-  SvcInternal *const si = get_svc_internal(svc_ctx);
+  SvcInternal_t *const si = get_svc_internal(svc_ctx);
 
   if (level > svc_ctx->log_level) {
     return retval;
@@ -183,7 +155,7 @@
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
+static vpx_codec_err_t parse_layer_options_from_string(SvcContext_t *svc_ctx,
                                                        LAYER_OPTION_TYPE type,
                                                        const char *input,
                                                        int *option0,
@@ -228,12 +200,12 @@
  *         quantizers=<q1>,<q2>,...
  * svc_mode = [i|ip|alt_ip|gf]
  */
-static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
+static vpx_codec_err_t parse_options(SvcContext_t *svc_ctx, const char *options) {
   char *input_string;
   char *option_name;
   char *option_value;
   char *input_ptr;
-  SvcInternal *const si = get_svc_internal(svc_ctx);
+  SvcInternal_t *const si = get_svc_internal(svc_ctx);
   vpx_codec_err_t res = VPX_CODEC_OK;
   int i, alt_ref_enabled = 0;
 
@@ -315,8 +287,9 @@
   return res;
 }
 
-vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) {
-  SvcInternal *const si = get_svc_internal(svc_ctx);
+vpx_codec_err_t vpx_svc_set_options(SvcContext_t *svc_ctx,
+                                    const char *options) {
+  SvcInternal_t *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || options == NULL || si == NULL) {
     return VPX_CODEC_INVALID_PARAM;
   }
@@ -325,10 +298,10 @@
   return VPX_CODEC_OK;
 }
 
-void assign_layer_bitrates(const SvcContext *svc_ctx,
+void assign_layer_bitrates(const SvcContext_t *svc_ctx,
                            vpx_codec_enc_cfg_t *const enc_cfg) {
   int i;
-  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
 
   if (si->bitrates[0] != 0) {
     enc_cfg->rc_target_bitrate = 0;
@@ -359,12 +332,12 @@
   }
 }
 
-vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+vpx_codec_err_t vpx_svc_init(SvcContext_t *svc_ctx, vpx_codec_ctx_t *codec_ctx,
                              vpx_codec_iface_t *iface,
                              vpx_codec_enc_cfg_t *enc_cfg) {
   vpx_codec_err_t res;
   int i;
-  SvcInternal *const si = get_svc_internal(svc_ctx);
+  SvcInternal_t *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
       enc_cfg == NULL) {
     return VPX_CODEC_INVALID_PARAM;
@@ -454,13 +427,15 @@
  * Encode a frame into multiple layers
  * Create a superframe containing the individual layers
  */
-vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
-                               struct vpx_image *rawimg, vpx_codec_pts_t pts,
+vpx_codec_err_t vpx_svc_encode(SvcContext_t *svc_ctx,
+                               vpx_codec_ctx_t *codec_ctx,
+                               struct vpx_image *rawimg,
+                               vpx_codec_pts_t pts,
                                int64_t duration, int deadline) {
   vpx_codec_err_t res;
   vpx_codec_iter_t iter;
   const vpx_codec_cx_pkt_t *cx_pkt;
-  SvcInternal *const si = get_svc_internal(svc_ctx);
+  SvcInternal_t *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) {
     return VPX_CODEC_INVALID_PARAM;
   }
@@ -523,8 +498,8 @@
   return VPX_CODEC_OK;
 }
 
-const char *vpx_svc_get_message(const SvcContext *svc_ctx) {
-  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+const char *vpx_svc_get_message(const SvcContext_t *svc_ctx) {
+  const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
   if (svc_ctx == NULL || si == NULL) return NULL;
   return si->message_buffer;
 }
@@ -535,7 +510,7 @@
 }
 
 // dump accumulated statistics and reset accumulated values
-const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
+const char *vpx_svc_dump_statistics(SvcContext_t *svc_ctx) {
   int number_of_frames;
   int i, j;
   uint32_t bytes_total = 0;
@@ -544,7 +519,7 @@
   double mse[COMPONENTS];
   double y_scale;
 
-  SvcInternal *const si = get_svc_internal(svc_ctx);
+  SvcInternal_t *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || si == NULL) return NULL;
 
   svc_log_reset(svc_ctx);
@@ -594,12 +569,12 @@
   return vpx_svc_get_message(svc_ctx);
 }
 
-void vpx_svc_release(SvcContext *svc_ctx) {
-  SvcInternal *si;
+void vpx_svc_release(SvcContext_t *svc_ctx) {
+  SvcInternal_t *si;
   if (svc_ctx == NULL) return;
   // do not use get_svc_internal as it will unnecessarily allocate an
-  // SvcInternal if it was not already allocated
-  si = (SvcInternal *)svc_ctx->internal;
+  // SvcInternal_t if it was not already allocated
+  si = (SvcInternal_t *)svc_ctx->internal;
   if (si != NULL) {
     free(si);
     svc_ctx->internal = NULL;
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -29,7 +29,7 @@
   SVC_LOG_DEBUG
 } SVC_LOG_LEVEL;
 
-typedef struct {
+typedef struct SvcContext {
   // public interface to svc_command options
   int spatial_layers;               // number of spatial layers
   int temporal_layers;               // number of temporal layers
@@ -39,8 +39,38 @@
 
   // private storage for vpx_svc_encode
   void *internal;
-} SvcContext;
+} SvcContext_t;
 
+#define OPTION_BUFFER_SIZE 1024
+#define COMPONENTS 4  // psnr & sse statistics maintained for total, y, u, v
+
+typedef struct SvcInternal {
+  char options[OPTION_BUFFER_SIZE];        // set by vpx_svc_set_options
+
+  // values extracted from option, quantizers
+  vpx_svc_extra_cfg_t svc_params;
+  int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
+  int bitrates[VPX_SS_MAX_LAYERS];
+
+  // accumulated statistics
+  double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS];   // total/Y/U/V
+  uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
+  uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
+
+  // codec encoding values
+  int width;    // width of highest layer
+  int height;   // height of highest layer
+  int kf_dist;  // distance between keyframes
+
+  // state variables
+  int psnr_pkt_received;
+  int layer;
+  int use_multiple_frame_contexts;
+
+  char message_buffer[2048];
+  vpx_codec_ctx_t *codec_ctx;
+} SvcInternal_t;
+
 /**
  * Set SVC options
  * options are supplied as a single string separated by spaces
@@ -49,35 +79,38 @@
  *         scaling-factors=<n1>/<d1>,<n2>/<d2>,...
  *         quantizers=<q1>,<q2>,...
  */
-vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options);
+vpx_codec_err_t vpx_svc_set_options(SvcContext_t *svc_ctx, const char *options);
 
 /**
  * initialize SVC encoding
  */
-vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
+vpx_codec_err_t vpx_svc_init(SvcContext_t *svc_ctx,
+                             vpx_codec_ctx_t *codec_ctx,
                              vpx_codec_iface_t *iface,
                              vpx_codec_enc_cfg_t *cfg);
 /**
  * encode a frame of video with multiple layers
  */
-vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
-                               struct vpx_image *rawimg, vpx_codec_pts_t pts,
+vpx_codec_err_t vpx_svc_encode(SvcContext_t *svc_ctx,
+                               vpx_codec_ctx_t *codec_ctx,
+                               struct vpx_image *rawimg,
+                               vpx_codec_pts_t pts,
                                int64_t duration, int deadline);
 
 /**
  * finished with svc encoding, release allocated resources
  */
-void vpx_svc_release(SvcContext *svc_ctx);
+void vpx_svc_release(SvcContext_t *svc_ctx);
 
 /**
  * dump accumulated statistics and reset accumulated values
  */
-const char *vpx_svc_dump_statistics(SvcContext *svc_ctx);
+const char *vpx_svc_dump_statistics(SvcContext_t *svc_ctx);
 
 /**
  *  get status message from previous encode
  */
-const char *vpx_svc_get_message(const SvcContext *svc_ctx);
+const char *vpx_svc_get_message(const SvcContext_t *svc_ctx);
 
 #ifdef __cplusplus
 }  // extern "C"
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -237,6 +237,7 @@
 
   VP9E_SET_SVC,
   VP9E_SET_SVC_PARAMETERS,
+
   /*!\brief control function to set svc layer for spatial and temporal.
    * \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial
    *                     layer and 0..#vpx_codec_enc_cfg::ts_number_layers for
@@ -243,7 +244,9 @@
    *                     temporal layer.
    */
   VP9E_SET_SVC_LAYER_ID,
-  VP9E_SET_TUNE_CONTENT
+  VP9E_SET_TUNE_CONTENT,
+  VP9E_GET_SVC_LAYER_ID,
+  VP9E_REGISTER_CX_CALLBACK,
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -365,6 +368,7 @@
 
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC,                int)
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS,     void *)
+VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK,   void *)
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID,       vpx_svc_layer_id_t *)
 
 VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED,            int)
@@ -385,6 +389,7 @@
 
 VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER,     int *)
 VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64,  int *)
+VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID,  vpx_svc_layer_id_t *)
 
 VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
 VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -220,6 +220,22 @@
   } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */
 
 
+  /*!\brief Encoder return output buffer callback
+   *
+   * This callback function, when registered, returns with packets when each
+   * spatial layer is encoded.
+   */
+  // putting the definitions here for now. (agrange: find if there
+  // is a better place for this)
+  typedef void (* vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt,
+                                                       void *user_data);
+
+  /*!\brief Callback function pointer / user data pair storage */
+  typedef struct vpx_codec_enc_output_cx_cb_pair {
+    vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt;
+    void                            *user_priv;
+  } vpx_codec_priv_output_cx_pkt_cb_pair_t;
+
   /*!\brief Rational Number
    *
    * This structure holds a fractional value.