shithub: libvpx

Download patch

ref: 323b11a99b00a019e497e87b6b76df7a9d45c1d5
parent: 5532775efe808cb0942e7b99bf2f232c6ce99fee
parent: e7aa1e3630bf695a739cf455f3eafe7d135608ef
author: Angie Chiang <angiebird@google.com>
date: Thu Mar 5 14:24:29 EST 2020

Merge changes Ie7c70a1d,I2c5abbe2,If41a1ea6,Id6ba4664,I156308bc

* changes:
  Add unit test for ref_frame_info
  Add key frame group info to SimpleEncode
  Add ref_frame_info to encode_frame_result
  Add init/update_frame_indexes()
  Add GetVectorData()

--- a/test/simple_encode_test.cc
+++ b/test/simple_encode_test.cc
@@ -92,6 +92,8 @@
       EXPECT_GE(encode_frame_result.psnr, 34)
           << "The psnr is supposed to be greater than 34 given the "
              "target_bitrate 1000 kbps";
+      EXPECT_EQ(encode_frame_result.ref_frame_info,
+                encode_frame_list[group_index].ref_frame_info);
       total_data_bit_size += encode_frame_result.coding_data_bit_size;
       ++frame_coding_index;
     }
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -64,6 +64,12 @@
 
 typedef int8_t MV_REFERENCE_FRAME;
 
+static INLINE int mv_ref_frame_to_inter_ref_idx(
+    MV_REFERENCE_FRAME mv_ref_frame) {
+  assert(mv_ref_frame >= LAST_FRAME && mv_ref_frame < MAX_REF_FRAMES);
+  return mv_ref_frame - 1;
+}
+
 // This structure now relates to 8x8 block regions.
 typedef struct MODE_INFO {
   // Common for both INTER and INTRA blocks
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -69,7 +69,18 @@
   int mi_rows;
   int mi_cols;
   uint8_t released;
-  int frame_index;
+
+  // Note that frame_index/frame_coding_index are only set by set_frame_index()
+  // on the encoder side.
+
+  // TODO(angiebird): Set frame_index/frame_coding_index on the decoder side
+  // properly.
+  int frame_index;  // Display order in the video, it's equivalent to the
+                    // show_idx defined in EncodeFrameInfo.
+#if CONFIG_RATE_CTRL
+  int frame_coding_index;  // The coding order (starting from zero) of this
+                           // frame.
+#endif                     // CONFIG_RATE_CTRL
   vpx_codec_frame_buffer_t raw_frame_buffer;
   YV12_BUFFER_CONFIG buf;
 } RefCntBuffer;
@@ -226,7 +237,16 @@
   unsigned int frame_context_idx; /* Context to use/update */
   FRAME_COUNTS counts;
 
+  // TODO(angiebird): current_video_frame/current_frame_coding_index into a
+  // structure
   unsigned int current_video_frame;
+#if CONFIG_RATE_CTRL
+  // Each show or no show frame is assigned with a coding index based on its
+  // coding order (starting from zero).
+
+  // Current frame's coding index.
+  int current_frame_coding_index;
+#endif
   BITSTREAM_PROFILE profile;
 
   // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3.
@@ -253,6 +273,24 @@
 
   int lf_row;
 } VP9_COMMON;
+
+static INLINE void init_frame_indexes(VP9_COMMON *cm) {
+  cm->current_video_frame = 0;
+#if CONFIG_RATE_CTRL
+  cm->current_frame_coding_index = 0;
+#endif  // CONFIG_RATE_CTRL
+}
+
+static INLINE void update_frame_indexes(VP9_COMMON *cm, int show_frame) {
+  if (show_frame) {
+    // Don't increment frame counters if this was an altref buffer
+    // update not a real frame
+    ++cm->current_video_frame;
+  }
+#if CONFIG_RATE_CTRL
+  ++cm->current_frame_coding_index;
+#endif  // CONFIG_RATE_CTRL
+}
 
 typedef struct {
   int frame_width;
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -188,7 +188,7 @@
   memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
   memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));
 
-  cm->current_video_frame = 0;
+  init_frame_indexes(cm);
   pbi->ready_for_new_data = 1;
   pbi->common.buffer_pool = pool;
 
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2311,7 +2311,7 @@
 
   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
 
-  cm->current_video_frame = 0;
+  init_frame_indexes(cm);
   cpi->partition_search_skippable_frame = 0;
   cpi->tile_data = NULL;
 
@@ -4887,6 +4887,9 @@
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
     ref_buffer->frame_index =
         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
+#if CONFIG_RATE_CTRL
+    ref_buffer->frame_coding_index = cm->current_frame_coding_index;
+#endif  // CONFIG_RATE_CTRL
   }
 }
 
@@ -5080,9 +5083,22 @@
   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
 }
 
-static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
-                                      uint8_t *dest,
-                                      unsigned int *frame_flags) {
+#if !CONFIG_REALTIME_ONLY
+static void update_encode_frame_result(
+    int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
+    const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
+    RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,
+    uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
+#if CONFIG_RATE_CTRL
+    const PARTITION_INFO *partition_info,
+    const MOTION_VECTOR_INFO *motion_vector_info,
+#endif  // CONFIG_RATE_CTRL
+    ENCODE_FRAME_RESULT *encode_frame_result);
+#endif  // !CONFIG_REALTIME_ONLY
+
+static void encode_frame_to_data_rate(
+    VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags,
+    ENCODE_FRAME_RESULT *encode_frame_result) {
   VP9_COMMON *const cm = &cpi->common;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   struct segmentation *const seg = &cm->seg;
@@ -5247,6 +5263,44 @@
   // build the bitstream
   vp9_pack_bitstream(cpi, dest, size);
 
+#if CONFIG_REALTIME_ONLY
+  (void)encode_frame_result;
+  assert(encode_frame_result == NULL);
+#else  // CONFIG_REALTIME_ONLY
+  if (encode_frame_result != NULL) {
+    const int ref_frame_flags = get_ref_frame_flags(cpi);
+    const RefCntBuffer *coded_frame_buf =
+        get_ref_cnt_buffer(cm, cm->new_fb_idx);
+    RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
+    get_ref_frame_bufs(cpi, ref_frame_bufs);
+    // update_encode_frame_result() depends on twopass.gf_group.index and
+    // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and
+    // cpi->alt_fb_idx are updated for current frame and have
+    // not been updated for the next frame yet.
+    // The update locations are as follows.
+    // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
+    // for the first frame in the gf_group and is updated for the next frame at
+    // vp9_twopass_postencode_update().
+    // 2) cpi->Source is updated at the beginning of vp9_get_compressed_data()
+    // 3) cm->new_fb_idx is updated at the beginning of
+    // vp9_get_compressed_data() by get_free_fb(cm).
+    // 4) cpi->lst_fb_idx/gld_fb_idx/alt_fb_idx will be updated for the next
+    // frame at vp9_update_reference_frames().
+    // This function needs to be called before vp9_update_reference_frames().
+    // TODO(angiebird): Improve the codebase to make the update of frame
+    // dependent variables more robust.
+    update_encode_frame_result(
+        ref_frame_flags,
+        cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+        cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
+        cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts,
+#if CONFIG_RATE_CTRL
+        cpi->partition_info, cpi->motion_vector_info,
+#endif  // CONFIG_RATE_CTRL
+        encode_frame_result);
+  }
+#endif  // CONFIG_REALTIME_ONLY
+
   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
     restore_coding_context(cpi);
@@ -5340,11 +5394,9 @@
 
   if (cm->show_frame) {
     vp9_swap_mi_and_prev_mi(cm);
-    // Don't increment frame counters if this was an altref buffer
-    // update not a real frame
-    ++cm->current_video_frame;
     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
   }
+  update_frame_indexes(cm, cm->show_frame);
 
   if (cpi->use_svc) {
     cpi->svc
@@ -5370,7 +5422,8 @@
 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
                       unsigned int *frame_flags) {
   vp9_rc_get_svc_params(cpi);
-  encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+  encode_frame_to_data_rate(cpi, size, dest, frame_flags,
+                            /*encode_frame_result = */ NULL);
 }
 
 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -5380,17 +5433,19 @@
   } else {
     vp9_rc_get_one_pass_vbr_params(cpi);
   }
-  encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+  encode_frame_to_data_rate(cpi, size, dest, frame_flags,
+                            /*encode_frame_result = */ NULL);
 }
 
 #if !CONFIG_REALTIME_ONLY
 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
-                        unsigned int *frame_flags) {
+                        unsigned int *frame_flags,
+                        ENCODE_FRAME_RESULT *encode_frame_result) {
   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
 #if CONFIG_MISMATCH_DEBUG
   mismatch_move_frame_idx_w();
 #endif
-  encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+  encode_frame_to_data_rate(cpi, size, dest, frame_flags, encode_frame_result);
 }
 #endif  // !CONFIG_REALTIME_ONLY
 
@@ -7249,11 +7304,10 @@
   }
 }
 #endif  // CONFIG_RATE_CTRL
-
 static void update_encode_frame_result(
-    int show_idx, FRAME_UPDATE_TYPE update_type,
-    const YV12_BUFFER_CONFIG *source_frame,
-    const YV12_BUFFER_CONFIG *coded_frame, int quantize_index,
+    int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
+    const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
+    RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
     uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
 #if CONFIG_RATE_CTRL
     const PARTITION_INFO *partition_info,
@@ -7263,13 +7317,35 @@
 #if CONFIG_RATE_CTRL
   PSNR_STATS psnr;
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
+  vpx_calc_highbd_psnr(source_frame, coded_frame_buf->buf, &psnr, bit_depth,
                        input_bit_depth);
 #else   // CONFIG_VP9_HIGHBITDEPTH
   (void)bit_depth;
   (void)input_bit_depth;
-  vpx_calc_psnr(source_frame, coded_frame, &psnr);
+  vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+  encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
+
+  if (update_type != KF_UPDATE) {
+    const VP9_REFFRAME inter_ref_flags[MAX_INTER_REF_FRAMES] = { VP9_LAST_FLAG,
+                                                                 VP9_GOLD_FLAG,
+                                                                 VP9_ALT_FLAG };
+    int i;
+    for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
+      assert(ref_frame_bufs[i] != NULL);
+      encode_frame_result->ref_frame_coding_indexes[i] =
+          ref_frame_bufs[i]->frame_coding_index;
+      encode_frame_result->ref_frame_valid_list[i] =
+          (ref_frame_flags & inter_ref_flags[i]) != 0;
+    }
+  } else {
+    // No reference frame is available when this is a key frame.
+    int i;
+    for (i = 0; i < MAX_INTER_REF_FRAMES; ++i) {
+      encode_frame_result->ref_frame_coding_indexes[i] = -1;
+      encode_frame_result->ref_frame_valid_list[i] = 0;
+    }
+  }
   encode_frame_result->psnr = psnr.psnr[0];
   encode_frame_result->sse = psnr.sse[0];
   copy_frame_counts(counts, &encode_frame_result->frame_counts);
@@ -7276,16 +7352,19 @@
   encode_frame_result->partition_info = partition_info;
   encode_frame_result->motion_vector_info = motion_vector_info;
   if (encode_frame_result->coded_frame.allocated) {
-    yv12_buffer_to_image_buffer(coded_frame, &encode_frame_result->coded_frame);
+    yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
+                                &encode_frame_result->coded_frame);
   }
 #else   // CONFIG_RATE_CTRL
+  (void)ref_frame_flags;
   (void)bit_depth;
   (void)input_bit_depth;
   (void)source_frame;
-  (void)coded_frame;
+  (void)coded_frame_buf;
+  (void)ref_frame_bufs;
   (void)counts;
 #endif  // CONFIG_RATE_CTRL
-  encode_frame_result->show_idx = show_idx;
+  encode_frame_result->show_idx = coded_frame_buf->frame_index;
   encode_frame_result->update_type = update_type;
   encode_frame_result->quantize_index = quantize_index;
 }
@@ -7294,6 +7373,7 @@
 void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
   encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
 #if CONFIG_RATE_CTRL
+  encode_frame_result->frame_coding_index = -1;
   vp9_zero(encode_frame_result->coded_frame);
   encode_frame_result->coded_frame.allocated = 0;
 #endif  // CONFIG_RATE_CTRL
@@ -7575,29 +7655,7 @@
     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
     vp9_first_pass(cpi, source);
   } else if (oxcf->pass == 2 && !cpi->use_svc) {
-    Pass2Encode(cpi, size, dest, frame_flags);
-    // update_encode_frame_result() depends on twopass.gf_group.index and
-    // cm->new_fb_idx and cpi->Source are updated for current properly and have
-    // not been updated for the next frame yet.
-    // The update locations are as follows.
-    // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
-    // for the first frame in the gf_group and is updated for the next frame at
-    // vp9_twopass_postencode_update().
-    // 2) cpi->Source is updated at the beginning of this function, i.e.
-    // vp9_get_compressed_data()
-    // 3) cm->new_fb_idx is updated at the beginning of this function by
-    // get_free_fb(cm)
-    // TODO(angiebird): Improve the codebase to make the update of frame
-    // dependent variables more robust.
-    update_encode_frame_result(
-        source->show_idx,
-        cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
-        cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi),
-        cpi->oxcf.input_bit_depth, cm->bit_depth, cpi->td.counts,
-#if CONFIG_RATE_CTRL
-        cpi->partition_info, cpi->motion_vector_info,
-#endif  // CONFIG_RATE_CTRL
-        encode_frame_result);
+    Pass2Encode(cpi, size, dest, frame_flags, encode_frame_result);
     vp9_twopass_postencode_update(cpi);
   } else if (cpi->use_svc) {
     SvcEncode(cpi, size, dest, frame_flags);
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -936,6 +936,9 @@
   int show_idx;
   FRAME_UPDATE_TYPE update_type;
 #if CONFIG_RATE_CTRL
+  int frame_coding_index;
+  int ref_frame_coding_indexes[MAX_INTER_REF_FRAMES];
+  int ref_frame_valid_list[MAX_INTER_REF_FRAMES];
   double psnr;
   uint64_t sse;
   FRAME_COUNTS frame_counts;
@@ -1043,8 +1046,20 @@
   return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
 }
 
-static INLINE RefCntBuffer *get_ref_cnt_buffer(VP9_COMMON *cm, int fb_idx) {
+static INLINE RefCntBuffer *get_ref_cnt_buffer(const VP9_COMMON *cm,
+                                               int fb_idx) {
   return fb_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[fb_idx] : NULL;
+}
+
+static INLINE void get_ref_frame_bufs(
+    const VP9_COMP *cpi, RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES]) {
+  const VP9_COMMON *const cm = &cpi->common;
+  MV_REFERENCE_FRAME ref_frame;
+  for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
+    int ref_frame_buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+    int inter_ref_idx = mv_ref_frame_to_inter_ref_idx(ref_frame);
+    ref_frame_bufs[inter_ref_idx] = get_ref_cnt_buffer(cm, ref_frame_buf_idx);
+  }
 }
 
 static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1478,7 +1478,8 @@
     fclose(recon_file);
   }
 
-  ++cm->current_video_frame;
+  // In the first pass, every frame is considered as a show frame.
+  update_frame_indexes(cm, /*show_frame=*/1);
   if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
 }
 
--- a/vp9/simple_encode.cc
+++ b/vp9/simple_encode.cc
@@ -498,6 +498,14 @@
   encode_frame_result->coding_data_bit_size =
       encode_frame_result->coding_data_byte_size * 8;
   encode_frame_result->show_idx = encode_frame_info->show_idx;
+  encode_frame_result->coding_idx = encode_frame_info->frame_coding_index;
+  assert(kRefFrameTypeMax == MAX_INTER_REF_FRAMES);
+  for (int i = 0; i < kRefFrameTypeMax; ++i) {
+    encode_frame_result->ref_frame_info.coding_indexes[i] =
+        encode_frame_info->ref_frame_coding_indexes[i];
+    encode_frame_result->ref_frame_info.valid_list[i] =
+        encode_frame_info->ref_frame_valid_list[i];
+  }
   encode_frame_result->frame_type =
       get_frame_type_from_update_type(encode_frame_info->update_type);
   encode_frame_result->psnr = encode_frame_info->psnr;
@@ -524,9 +532,18 @@
          group_of_picture.encode_frame_list.size();
 }
 
+bool operator==(const RefFrameInfo &a, const RefFrameInfo &b) {
+  bool match = true;
+  for (int i = 0; i < kRefFrameTypeMax; ++i) {
+    match &= a.coding_indexes[i] == b.coding_indexes[i];
+    match &= a.valid_list[i] == b.valid_list[i];
+  }
+  return match;
+}
+
 static void InitRefFrameInfo(RefFrameInfo *ref_frame_info) {
   for (int i = 0; i < kRefFrameTypeMax; ++i) {
-    ref_frame_info->coding_indexes[i] = 0;
+    ref_frame_info->coding_indexes[i] = -1;
     ref_frame_info->valid_list[i] = 0;
   }
 }
@@ -579,7 +596,7 @@
   }
 
   if (past_index == last_index) {
-    ref_frame_valid_list[kRefFrameTypeLast] = 0;
+    ref_frame_valid_list[kRefFrameTypePast] = 0;
   }
 
   if (future_index == last_index) {
@@ -693,7 +710,13 @@
   frame_rate_den_ = frame_rate_den;
   target_bitrate_ = target_bitrate;
   num_frames_ = num_frames;
+
   frame_coding_index_ = 0;
+  show_frame_count_ = 0;
+
+  key_frame_group_index_ = 0;
+  key_frame_group_size_ = 0;
+
   // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
   assert(infile_path != nullptr);
   in_file_ = fopen(infile_path, "r");
@@ -789,6 +812,14 @@
   external_arf_indexes_ = external_arf_indexes;
 }
 
+template <typename T>
+T *GetVectorData(const std::vector<T> &v) {
+  if (v.empty()) {
+    return nullptr;
+  }
+  return const_cast<T *>(v.data());
+}
+
 void SimpleEncode::StartEncode() {
   assert(impl_ptr_->first_pass_stats.size() > 0);
   vpx_rational_t frame_rate =
@@ -797,7 +828,7 @@
       vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
                              target_bitrate_, VPX_RC_LAST_PASS);
   vpx_fixed_buf_t stats;
-  stats.buf = impl_ptr_->first_pass_stats.data();
+  stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
   stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
              impl_ptr_->first_pass_stats.size();
 
@@ -806,9 +837,15 @@
   impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
   vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
                 frame_height_, 1);
+
   frame_coding_index_ = 0;
+  show_frame_count_ = 0;
+
   encode_command_set_external_arf_indexes(&impl_ptr_->cpi->encode_command,
-                                          external_arf_indexes_.data());
+                                          GetVectorData(external_arf_indexes_));
+
+  UpdateKeyFrameGroup(show_frame_count_);
+
   UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
                        &group_of_picture_);
   rewind(in_file_);
@@ -829,13 +866,26 @@
   rewind(in_file_);
 }
 
-int SimpleEncode::GetKeyFrameGroupSize(int key_frame_index) const {
+void SimpleEncode::UpdateKeyFrameGroup(int key_frame_show_index) {
   const VP9_COMP *cpi = impl_ptr_->cpi;
-  return vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info,
-                                    &cpi->twopass.first_pass_info,
-                                    key_frame_index, cpi->rc.min_gf_interval);
+  key_frame_group_index_ = 0;
+  key_frame_group_size_ = vp9_get_frames_to_next_key(
+      &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info,
+      key_frame_show_index, cpi->rc.min_gf_interval);
+  assert(key_frame_group_size_ > 0);
+  // Init the reference frame info when a new key frame group appears.
+  InitRefFrameInfo(&ref_frame_info_);
 }
 
+void SimpleEncode::PostUpdateKeyFrameGroupIndex(FrameType frame_type) {
+  if (frame_type != kFrameTypeAltRef) {
+    // key_frame_group_index_ only counts show frames
+    ++key_frame_group_index_;
+  }
+}
+
+int SimpleEncode::GetKeyFrameGroupSize() const { return key_frame_group_size_; }
+
 GroupOfPicture SimpleEncode::ObserveGroupOfPicture() const {
   return group_of_picture_;
 }
@@ -852,8 +902,20 @@
   PostUpdateRefFrameInfo(encode_frame_result.frame_type, frame_coding_index_,
                          &ref_frame_info_);
   ++frame_coding_index_;
+  if (encode_frame_result.frame_type != kFrameTypeAltRef) {
+    // Only kFrameTypeAltRef is not a show frame
+    ++show_frame_count_;
+  }
+
+  PostUpdateKeyFrameGroupIndex(encode_frame_result.frame_type);
+  if (key_frame_group_index_ == key_frame_group_size_) {
+    UpdateKeyFrameGroup(show_frame_count_);
+  }
+
   IncreaseGroupOfPictureIndex(&group_of_picture_);
   if (IsGroupOfPictureFinished(group_of_picture_)) {
+    // This function needs to be called after ref_frame_info_ is updated
+    // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
     UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
                          &group_of_picture_);
   }
@@ -952,7 +1014,8 @@
                              target_bitrate_, VPX_RC_LAST_PASS);
   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
   FIRST_PASS_INFO first_pass_info;
-  fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(),
+  fps_init_first_pass_info(&first_pass_info,
+                           GetVectorData(impl_ptr_->first_pass_stats),
                            num_frames_);
   return vp9_get_coding_frame_num(external_arf_indexes_.data(), &oxcf,
                                   &frame_info, &first_pass_info,
--- a/vp9/simple_encode.h
+++ b/vp9/simple_encode.h
@@ -86,6 +86,8 @@
   int valid_list[kRefFrameTypeMax];
 };
 
+bool operator==(const RefFrameInfo &a, const RefFrameInfo &b);
+
 struct EncodeFrameInfo {
   int show_idx;
 
@@ -199,6 +201,8 @@
 struct EncodeFrameResult {
   int show_idx;
   FrameType frame_type;
+  int coding_idx;
+  RefFrameInfo ref_frame_info;
   size_t coding_data_bit_size;
   size_t coding_data_byte_size;
   // The EncodeFrame will allocate a buffer, write the coding data into the
@@ -304,12 +308,11 @@
   // This function should be called after StartEncode() or EncodeFrame().
   void EndEncode();
 
-  // Given a key_frame_index, computes this key frame group's size.
   // The key frame group size includes one key frame plus the number of
   // following inter frames. Note that the key frame group size only counts the
   // show frames. The number of no show frames like alternate refereces are not
   // counted.
-  int GetKeyFrameGroupSize(int key_frame_index) const;
+  int GetKeyFrameGroupSize() const;
 
   // Provides the group of pictures that the next coding frame is in.
   // Only call this function between StartEncode() and EndEncode()
@@ -353,10 +356,29 @@
   std::vector<int> external_arf_indexes_;
   GroupOfPicture group_of_picture_;
 
+  // The key frame group size includes one key frame plus the number of
+  // following inter frames. Note that the key frame group size only counts the
+  // show frames. The number of no show frames like alternate refereces are not
+  // counted.
+  int key_frame_group_size_;
+
+  // The index for the to-be-coded show frame in the key frame group.
+  int key_frame_group_index_;
+
+  // Update key_frame_group_size_, reset key_frame_group_index_ and init
+  // ref_frame_info_.
+  void UpdateKeyFrameGroup(int key_frame_show_index);
+
+  // Update key_frame_group_index_.
+  void PostUpdateKeyFrameGroupIndex(FrameType frame_type);
+
   // Each show or no show frame is assigned with a coding index based on its
   // coding order (starting from zero) in the coding process of the entire
   // video. The coding index of the to-be-coded frame.
   int frame_coding_index_;
+
+  // Number of show frames we have coded so far.
+  int show_frame_count_;
 
   // TODO(angiebird): Do we need to reset ref_frames_info_ when the next key
   // frame appears?