shithub: libvpx

ref: d9ca52452bff9481ec1ed4684985a86fb7fc3c3e
dir: /vp8_scalable_patterns.c/

View raw version
/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */


/*
 * This is an example demonstrating how to implement a multi-layer VP8
 * encoding scheme based on temporal scalability for video applications
 * that benefit from a scalable bitstream.
 */
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_encoder.h"
#include "vpx/vp8cx.h"
#define interface (vpx_codec_vp8_cx())
#define fourcc    0x30385056

#define IVF_FILE_HDR_SZ  (32)
#define IVF_FRAME_HDR_SZ (12)

static void mem_put_le16(char *mem, unsigned int val) {
    mem[0] = val;
    mem[1] = val>>8;
}

static void mem_put_le32(char *mem, unsigned int val) {
    mem[0] = val;
    mem[1] = val>>8;
    mem[2] = val>>16;
    mem[3] = val>>24;
}

static void die(const char *fmt, ...) {
    va_list ap;

    va_start(ap, fmt);
    vprintf(fmt, ap);
    if(fmt[strlen(fmt)-1] != '\n')
        printf("\n");
    exit(EXIT_FAILURE);
}

static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
    const char *detail = vpx_codec_error_detail(ctx);

    printf("%s: %s\n", s, vpx_codec_error(ctx));
    if(detail)
        printf("    %s\n",detail);
    exit(EXIT_FAILURE);
}

static int read_frame(FILE *f, vpx_image_t *img) {
    size_t nbytes, to_read;
    int    res = 1;

    to_read = img->w*img->h*3/2;
    nbytes = fread(img->planes[0], 1, to_read, f);
    if(nbytes != to_read) {
        res = 0;
        if(nbytes > 0)
            printf("Warning: Read partial frame. Check your width & height!\n");
    }
    return res;
}

static void write_ivf_file_header(FILE *outfile,
                                  const vpx_codec_enc_cfg_t *cfg,
                                  int frame_cnt) {
    char header[32];

    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
        return;
    header[0] = 'D';
    header[1] = 'K';
    header[2] = 'I';
    header[3] = 'F';
    mem_put_le16(header+4,  0);                   /* version */
    mem_put_le16(header+6,  32);                  /* headersize */
    mem_put_le32(header+8,  fourcc);              /* headersize */
    mem_put_le16(header+12, cfg->g_w);            /* width */
    mem_put_le16(header+14, cfg->g_h);            /* height */
    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
    mem_put_le32(header+24, frame_cnt);           /* length */
    mem_put_le32(header+28, 0);                   /* unused */

    if(fwrite(header, 1, 32, outfile));
}


static void write_ivf_frame_header(FILE *outfile,
                                   const vpx_codec_cx_pkt_t *pkt)
{
    char             header[12];
    vpx_codec_pts_t  pts;

    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
        return;

    pts = pkt->data.frame.pts;
    mem_put_le32(header, pkt->data.frame.sz);
    mem_put_le32(header+4, pts&0xFFFFFFFF);
    mem_put_le32(header+8, pts >> 32);

    if(fwrite(header, 1, 12, outfile));
}

static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3};

int main(int argc, char **argv) {
    FILE                *infile, *outfile[MAX_LAYERS];
    vpx_codec_ctx_t      codec;
    vpx_codec_enc_cfg_t  cfg;
    int                  frame_cnt = 0;
    vpx_image_t          raw;
    vpx_codec_err_t      res;
    unsigned int         width;
    unsigned int         height;
    int                  frame_avail;
    int                  got_data;
    int                  flags = 0;
    int                  i;
    int                  pts = 0;              // PTS starts at 0
    int                  frame_duration = 1;   // 1 timebase tick per frame

    int                  layering_mode = 0;
    int                  frames_in_layer[MAX_LAYERS] = {0};
    int                  layer_flags[MAX_PERIODICITY] = {0};
    int                  flag_periodicity;
    int                  max_intra_size_pct;

    // Check usage and arguments
    if (argc < 9)
        die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
            " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);

    width  = strtol (argv[3], NULL, 0);
    height = strtol (argv[4], NULL, 0);
    if (width < 16 || width%2 || height <16 || height%2)
        die ("Invalid resolution: %d x %d", width, height);

    if (!sscanf(argv[7], "%d", &layering_mode))
        die ("Invalid mode %s", argv[7]);
    if (layering_mode<0 || layering_mode>8)
        die ("Invalid mode (0..8) %s", argv[7]);

    if (argc != 8+mode_to_num_layers[layering_mode])
        die ("Invalid number of arguments");

    if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
        die ("Failed to allocate image", width, height);

    printf("Using %s\n",vpx_codec_iface_name(interface));

    // Populate encoder configuration
    res = vpx_codec_enc_config_default(interface, &cfg, 0);
    if(res) {
        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
        return EXIT_FAILURE;
    }

    // Update the default configuration with our settings
    cfg.g_w = width;
    cfg.g_h = height;

    // Timebase format e.g. 30fps: numerator=1, demoninator=30
    if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
        die ("Invalid timebase numerator %s", argv[5]);
    if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
        die ("Invalid timebase denominator %s", argv[6]);

    for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
        if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
            die ("Invalid data rate %s", argv[i]);

    // Real time parameters
    cfg.rc_dropframe_thresh = 0;  // 30
    cfg.rc_end_usage        = VPX_CBR;
    cfg.rc_resize_allowed   = 0;
    cfg.rc_min_quantizer    = 8;
    cfg.rc_max_quantizer    = 56;
    cfg.rc_undershoot_pct   = 100;
    cfg.rc_overshoot_pct    = 15;
    cfg.rc_buf_initial_sz   = 500;
    cfg.rc_buf_optimal_sz   = 600;
    cfg.rc_buf_sz           = 1000;

    // Enable error resilient mode
    cfg.g_error_resilient = 1;
    cfg.g_lag_in_frames   = 0;
    cfg.kf_mode           = VPX_KF_DISABLED;

    // Disable automatic keyframe placement
    cfg.kf_min_dist = cfg.kf_max_dist = 1000;

    // Temporal scaling parameters:
    // NOTE: The 3 prediction frames cannot be used interchangeably due to
    // differences in the way they are handled throughout the code. The
    // frames should be allocated to layers in the order LAST, GF, ARF.
    // Other combinations work, but may produce slightly inferior results.
    switch (layering_mode)
    {

    case 0:
    {
        // 2-layers, 2-frame period
        int ids[2] = {0,1};
        cfg.ts_number_layers     = 2;
        cfg.ts_periodicity       = 2;
        cfg.ts_rate_decimator[0] = 2;
        cfg.ts_rate_decimator[1] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;
#if 1
        // 0=L, 1=GF, Intra-layer prediction enabled
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
        layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
                         VP8_EFLAG_NO_REF_ARF;
#else
        // 0=L, 1=GF, Intra-layer prediction disabled
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
        layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
                         VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
#endif
        break;
    }

    case 1:
    {
        // 2-layers, 3-frame period
        int ids[3] = {0,1,1};
        cfg.ts_number_layers     = 2;
        cfg.ts_periodicity       = 3;
        cfg.ts_rate_decimator[0] = 3;
        cfg.ts_rate_decimator[1] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;

        // 0=L, 1=GF, Intra-layer prediction enabled
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[1] =
        layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
                         VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
                                                VP8_EFLAG_NO_UPD_LAST;
        break;
    }

    case 2:
    {
        // 3-layers, 6-frame period
        int ids[6] = {0,2,2,1,2,2};
        cfg.ts_number_layers     = 3;
        cfg.ts_periodicity       = 6;
        cfg.ts_rate_decimator[0] = 6;
        cfg.ts_rate_decimator[1] = 3;
        cfg.ts_rate_decimator[2] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;

        // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
                                                VP8_EFLAG_NO_UPD_LAST;
        layer_flags[1] =
        layer_flags[2] =
        layer_flags[4] =
        layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
        break;
    }

    case 3:
    {
        // 3-layers, 4-frame period
        int ids[4] = {0,2,1,2};
        cfg.ts_number_layers     = 3;
        cfg.ts_periodicity       = 4;
        cfg.ts_rate_decimator[0] = 4;
        cfg.ts_rate_decimator[1] = 2;
        cfg.ts_rate_decimator[2] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;

        // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_ARF |
                         VP8_EFLAG_NO_UPD_LAST;
        layer_flags[1] =
        layer_flags[3] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
                         VP8_EFLAG_NO_UPD_ARF;
        break;
    }

    case 4:
    {
        // 3-layers, 4-frame period
        int ids[4] = {0,2,1,2};
        cfg.ts_number_layers     = 3;
        cfg.ts_periodicity       = 4;
        cfg.ts_rate_decimator[0] = 4;
        cfg.ts_rate_decimator[1] = 2;
        cfg.ts_rate_decimator[2] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;

        // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
        // disabled in layer 2
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[2] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[1] =
        layer_flags[3] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
                         VP8_EFLAG_NO_UPD_ARF;
        break;
    }

    case 5:
    {
        // 3-layers, 4-frame period
        int ids[4] = {0,2,1,2};
        cfg.ts_number_layers     = 3;
        cfg.ts_periodicity       = 4;
        cfg.ts_rate_decimator[0] = 4;
        cfg.ts_rate_decimator[1] = 2;
        cfg.ts_rate_decimator[2] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;

        // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[2] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[1] =
        layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
        break;
    }

    case 6:
    {
        // NOTE: Probably of academic interest only

        // 5-layers, 16-frame period
        int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4};
        cfg.ts_number_layers     = 5;
        cfg.ts_periodicity       = 16;
        cfg.ts_rate_decimator[0] = 16;
        cfg.ts_rate_decimator[1] = 8;
        cfg.ts_rate_decimator[2] = 4;
        cfg.ts_rate_decimator[3] = 2;
        cfg.ts_rate_decimator[4] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = cfg.ts_periodicity;

        layer_flags[0]  = VPX_EFLAG_FORCE_KF;
        layer_flags[1]  =
        layer_flags[3]  =
        layer_flags[5]  =
        layer_flags[7]  =
        layer_flags[9]  =
        layer_flags[11] =
        layer_flags[13] =
        layer_flags[15] = VP8_EFLAG_NO_UPD_LAST |
                          VP8_EFLAG_NO_UPD_GF   |
                          VP8_EFLAG_NO_UPD_ARF;
        layer_flags[2]  =
        layer_flags[6]  =
        layer_flags[10] =
        layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
        layer_flags[4]  =
        layer_flags[12] = VP8_EFLAG_NO_REF_LAST |
                          VP8_EFLAG_NO_UPD_ARF;
        layer_flags[8]  = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
        break;
    }

    case 7:
    {
        // 2-layers
        int ids[2] = {0,1};
        cfg.ts_number_layers     = 2;
        cfg.ts_periodicity       = 2;
        cfg.ts_rate_decimator[0] = 2;
        cfg.ts_rate_decimator[1] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = 8;

        // 0=L, 1=GF
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[2] =
        layer_flags[4] =
        layer_flags[6] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF  | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[3] =
        layer_flags[5] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
        layer_flags[7] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
                         VP8_EFLAG_NO_UPD_ARF |
                         VP8_EFLAG_NO_UPD_ENTROPY;
        break;
    }

    case 8:
    {
        // 3-layers
        int ids[4] = {0,2,1,2};
        cfg.ts_number_layers     = 3;
        cfg.ts_periodicity       = 4;
        cfg.ts_rate_decimator[0] = 4;
        cfg.ts_rate_decimator[1] = 2;
        cfg.ts_rate_decimator[2] = 1;
        memcpy(cfg.ts_layer_id, ids, sizeof(ids));

        flag_periodicity = 8;

        // 0=L, 1=GF, 2=ARF
        layer_flags[0] = VPX_EFLAG_FORCE_KF  |
                         VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
        layer_flags[2] = VP8_EFLAG_NO_REF_GF   | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[3] =
        layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
        layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[6] = VP8_EFLAG_NO_REF_ARF |
                         VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
        layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
                         VP8_EFLAG_NO_UPD_ARF |
                         VP8_EFLAG_NO_UPD_ENTROPY;
        break;
    }
    default:
        break;
    }

    // Open input file
    if(!(infile = fopen(argv[1], "rb")))
        die("Failed to open %s for reading", argv[1]);

    // Open an output file for each stream
    for (i=0; i<cfg.ts_number_layers; i++)
    {
        char file_name[512];
        sprintf (file_name, "%s_%d.ivf", argv[2], i);
        if (!(outfile[i] = fopen(file_name, "wb")))
            die("Failed to open %s for writing", file_name);
        write_ivf_file_header(outfile[i], &cfg, 0);
    }

    // Initialize codec
    if (vpx_codec_enc_init (&codec, interface, &cfg, 0))
        die_codec (&codec, "Failed to initialize encoder");

    // Cap CPU & first I-frame size
    vpx_codec_control (&codec, VP8E_SET_CPUUSED,                -6);
    vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD,      800);
    vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY,       2);

    max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
                         * ((double) cfg.g_timebase.den / cfg.g_timebase.num)
                         / 10.0);
    //printf ("max_intra_size_pct=%d\n", max_intra_size_pct);

    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);
    //    vpx_codec_control (&codec, VP8E_SET_TOKEN_PARTITIONS,
    //                      static_cast<vp8e_token_partitions>(_tokenPartitions));

    frame_avail = 1;
    while (frame_avail || got_data) {
        vpx_codec_iter_t iter = NULL;
        const vpx_codec_cx_pkt_t *pkt;

        flags = layer_flags[frame_cnt % flag_periodicity];

        frame_avail = read_frame(infile, &raw);
        if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts,
                            1, flags, VPX_DL_REALTIME))
            die_codec(&codec, "Failed to encode frame");

        // Reset KF flag
        if (layering_mode != 6)
            layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;

        got_data = 0;
        while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
            got_data = 1;
            switch (pkt->kind) {
            case VPX_CODEC_CX_FRAME_PKT:
                for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
                                              i<cfg.ts_number_layers; i++)
                {
                    write_ivf_frame_header(outfile[i], pkt);
                    if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
                              outfile[i]));
                    frames_in_layer[i]++;
                }
                break;
            default:
                break;
            }
            printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT
                    && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
            fflush (stdout);
        }
        frame_cnt++;
        pts += frame_duration;
    }
    printf ("\n");
    fclose (infile);

    printf ("Processed %d frames.\n",frame_cnt-1);
    if (vpx_codec_destroy(&codec))
            die_codec (&codec, "Failed to destroy codec");

    // Try to rewrite the output file headers with the actual frame count
    for (i=0; i<cfg.ts_number_layers; i++)
    {
        if (!fseek(outfile[i], 0, SEEK_SET))
            write_ivf_file_header (outfile[i], &cfg, frames_in_layer[i]);
        fclose (outfile[i]);
    }

    return EXIT_SUCCESS;
}