shithub: libvpx

--- a/test/simple_encode_test.cc

+++ b/test/simple_encode_test.cc

@@ -60,6 +60,27 @@

+TEST_F(SimpleEncodeTest, ObserveFirstPassMotionVectors) {

+  SimpleEncode simple_encode(width_, height_, frame_rate_num_, frame_rate_den_,

+                             target_bitrate_, num_frames_,

+                             in_file_path_str_.c_str());

+  simple_encode.ComputeFirstPassStats();

+  std::vector<std::vector<MotionVectorInfo>> fps_motion_vectors =

+      simple_encode.ObserveFirstPassMotionVectors();

+  EXPECT_EQ(fps_motion_vectors.size(), static_cast<size_t>(num_frames_));

+  const size_t num_blocks = ((width_ + 15) >> 4) * ((height_ + 15) >> 4);

+  EXPECT_EQ(num_blocks, fps_motion_vectors[0].size());

+  for (size_t i = 0; i < fps_motion_vectors.size(); ++i) {

+    EXPECT_EQ(num_blocks, fps_motion_vectors[i].size());

+    for (size_t j = 0; j < num_blocks; ++j) {

+      const int mv_count = fps_motion_vectors[i][j].mv_count;

+      const int ref_count = (fps_motion_vectors[i][j].ref_frame[0] > 0) +

+                            (fps_motion_vectors[i][j].ref_frame[1] > 0);

+      EXPECT_EQ(mv_count, ref_count);

+    }

+  }

+}

 TEST_F(SimpleEncodeTest, GetCodingFrameNum) {

   SimpleEncode simple_encode(width_, height_, frame_rate_num_, frame_rate_den_,

                              target_bitrate_, num_frames_,

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -1024,6 +1024,7 @@

 #if CONFIG_RATE_CTRL

   free_partition_info(cpi);

   free_motion_vector_info(cpi);

+  free_fp_motion_vector_info(cpi);

 #endif

   vp9_free_ref_frame_buffers(cm->buffer_pool);

@@ -2661,6 +2662,7 @@

   encode_command_init(&cpi->encode_command);

   partition_info_init(cpi);

   motion_vector_info_init(cpi);

+  fp_motion_vector_info_init(cpi);

 #endif

   return cpi;

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -642,6 +642,9 @@

 // Returns number of units in size of 4, if not multiple not a multiple of 4,

 // round it up. For example, size is 7, return 2.

 static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; }

+// Returns number of units in size of 16, if not multiple not a multiple of 16,

+// round it up. For example, size is 17, return 2.

+static INLINE int get_num_unit_16x16(int size) { return (size + 15) >> 4; }

 #endif  // CONFIG_RATE_CTRL

 typedef struct VP9_COMP {

@@ -952,6 +955,7 @@

   ENCODE_COMMAND encode_command;

   PARTITION_INFO *partition_info;

   MOTION_VECTOR_INFO *motion_vector_info;

+  MOTION_VECTOR_INFO *fp_motion_vector_info;

   RATE_QSTEP_MODEL rq_model[ENCODE_FRAME_TYPES];

 #endif

@@ -1000,6 +1004,27 @@

   cpi->motion_vector_info = NULL;

+// Allocates memory for the first pass motion vector information.

+// The unit size is each 16x16 block.

+// Only called once in vp9_create_compressor().

+static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) {

+  VP9_COMMON *const cm = &cpi->common;

+  const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width);

+  const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height);

+  CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info,

+                  (MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,

+                                                   sizeof(MOTION_VECTOR_INFO)));

+  memset(cpi->fp_motion_vector_info, 0,

+         unit_width * unit_height * sizeof(MOTION_VECTOR_INFO));

+}

+// Frees memory of the first pass motion vector information.

+// Only called once in dealloc_compressor_data().

+static INLINE void free_fp_motion_vector_info(struct VP9_COMP *cpi) {

+  vpx_free(cpi->fp_motion_vector_info);

+  cpi->fp_motion_vector_info = NULL;

+}

 // This is the c-version counter part of ImageBuffer

 typedef struct IMAGE_BUFFER {

   int allocated;

@@ -1021,6 +1046,7 @@

   FRAME_COUNTS frame_counts;

   const PARTITION_INFO *partition_info;

   const MOTION_VECTOR_INFO *motion_vector_info;

+  const MOTION_VECTOR_INFO *fp_motion_vector_info;

   IMAGE_BUFFER coded_frame;

 #endif  // CONFIG_RATE_CTRL

   int quantize_index;

--- a/vp9/encoder/vp9_firstpass.c

+++ b/vp9/encoder/vp9_firstpass.c

@@ -839,6 +839,26 @@

                    fp_acc_data->image_data_start_row);

+#if CONFIG_RATE_CTRL

+static void store_fp_motion_vector(VP9_COMP *cpi, const MV *mv,

+                                   const int mb_row, const int mb_col,

+                                   const int is_second_mv) {

+  VP9_COMMON *const cm = &cpi->common;

+  const int mb_index = mb_row * cm->mb_cols + mb_col;

+  MOTION_VECTOR_INFO *this_motion_vector_info =

+      &cpi->fp_motion_vector_info[mb_index];

+  if (!is_second_mv) {

+    this_motion_vector_info->ref_frame[0] = LAST_FRAME;

+    this_motion_vector_info->mv[0].as_mv.row = mv->row;

+    this_motion_vector_info->mv[0].as_mv.col = mv->col;

+    return;

+  }

+  this_motion_vector_info->ref_frame[1] = GOLDEN_FRAME;

+  this_motion_vector_info->mv[1].as_mv.row = mv->row;

+  this_motion_vector_info->mv[1].as_mv.col = mv->col;

+}

+#endif  // CONFIG_RATE_CTRL

 #define NZ_MOTION_PENALTY 128

 #define INTRA_MODE_PENALTY 1024

 void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,

@@ -1137,6 +1157,9 @@

                 vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0);

+#if CONFIG_RATE_CTRL

+        store_fp_motion_vector(cpi, &mv, mb_row, mb_col, /*is_second_mv=*/0);

+#endif  // CONFIG_RAGE_CTRL

         // Search in an older reference frame.

         if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {

@@ -1158,6 +1181,10 @@

 #endif  // CONFIG_VP9_HIGHBITDEPTH

           first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);

+#if CONFIG_RATE_CTRL

+          store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col,

+                                 /*is_second_mv=*/1);

+#endif  // CONFIG_RAGE_CTRL

           if (gf_motion_error < motion_error && gf_motion_error < this_error)

             ++(fp_acc_data->second_ref_count);

--- a/vp9/simple_encode.cc

+++ b/vp9/simple_encode.cc

@@ -471,8 +471,8 @@

   encode_frame_result->coding_data.reset(

       new (std::nothrow) uint8_t[max_coding_data_byte_size]);

-  encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_width);

-  encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_height);

+  encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);

+  encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);

   encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *

                                              encode_frame_result->num_cols_4x4);

   encode_frame_result->motion_vector_info.resize(

@@ -742,6 +742,8 @@

   struct lookahead_ctx *lookahead = cpi->lookahead;

   int i;

   int use_highbitdepth = 0;

+  const int num_rows_16x16 = get_num_unit_16x16(frame_height_);

+  const int num_cols_16x16 = get_num_unit_16x16(frame_width_);

 #if CONFIG_VP9_HIGHBITDEPTH

   use_highbitdepth = cpi->common.use_highbitdepth;

 #endif

@@ -774,6 +776,12 @@

         // vp9_get_compressed_data only generates first pass stats not

         // compresses data

         assert(size == 0);

+        // Get vp9 first pass motion vector info.

+        std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);

+        update_motion_vector_info(&encode_frame_info.fp_motion_vector_info[0],

+                                  num_rows_16x16, num_cols_16x16,

+                                  mv_info.data());

+        fp_motion_vector_info_.push_back(mv_info);

       impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));

@@ -809,6 +817,11 @@

     output_stats.push_back(this_stats);

   return output_stats;

+}

+std::vector<std::vector<MotionVectorInfo>>

+SimpleEncode::ObserveFirstPassMotionVectors() {

+  return fp_motion_vector_info_;

 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,

--- a/vp9/simple_encode.h

+++ b/vp9/simple_encode.h

@@ -60,7 +60,9 @@

 constexpr int kMotionVectorPrecision = 8;

-// The frame is split to 4x4 blocks.

+// In the first pass. The frame is split to 16x16 blocks.

+// This structure contains the information of each 16x16 block.

+// In the second pass. The frame is split to 4x4 blocks.

 // This structure contains the information of each 4x4 block.

 struct MotionVectorInfo {

   // Number of valid motion vectors, always 0 if this block is in the key frame.

@@ -68,8 +70,8 @@

   int mv_count;

   // The reference frame for motion vectors. If the second motion vector does

   // not exist (mv_count = 1), the reference frame is kNoneRefFrame.

-  // Otherwise, the reference frame is either kLastFrame, or kGoldenFrame,

-  // or kAltRefFrame.

+  // Otherwise, the reference frame is either kRefFrameTypeLast, or

+  // kRefFrameTypePast, or kRefFrameTypeFuture.

   RefFrameType ref_frame[2];

   // The row offset of motion vectors in the unit of pixel.

   // If the second motion vector does not exist, the value is 0.

@@ -245,7 +247,7 @@

   std::vector<PartitionInfo> partition_info;

   // A vector of the motion vector information of the frame.

   // The number of elements is |num_rows_4x4| * |num_cols_4x4|.

-  // The frame is divided 4x4 blocks of |num_rows_4x4| rows and

+  // The frame is divided into 4x4 blocks of |num_rows_4x4| rows and

   // |num_cols_4x4| columns.

   // Each 4x4 block contains 0 motion vector if this is an intra predicted

   // frame (for example, the key frame). If the frame is inter predicted,

@@ -324,6 +326,12 @@

   // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h

   std::vector<std::vector<double>> ObserveFirstPassStats();

+  // Outputs the first pass motion vectors represented by a 2-D vector.

+  // One can use the frame index at first dimension to retrieve the mvs for

+  // each video frame. The frame is divided into 16x16 blocks. The number of

+  // elements is round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).

+  std::vector<std::vector<MotionVectorInfo>> ObserveFirstPassMotionVectors();

   // Ouputs a copy of key_frame_map_, a binary vector with size equal to the

   // number of show frames in the video. For each entry in the vector, 1

   // indicates the position is a key frame and 0 indicates it's not a key frame.

@@ -451,6 +459,17 @@

   // frame appears?

   // Reference frames info of the to-be-coded frame.

   RefFrameInfo ref_frame_info_;

+  // A 2-D vector of motion vector information of the frame collected

+  // from the first pass. The first dimension is the frame index.

+  // Each frame is divided into 16x16 blocks. The number of elements is

+  // round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).

+  // Each 16x16 block contains 0 motion vector if this is an intra predicted

+  // frame (for example, the key frame). If the frame is inter predicted,

+  // each 16x16 block contains either 1 or 2 motion vectors.

+  // The first motion vector is always from the LAST_FRAME.

+  // The second motion vector is always from the GOLDEN_FRAME.

+  std::vector<std::vector<MotionVectorInfo>> fp_motion_vector_info_;

};

 }  // namespace vp9

--

⑨