ref: f9ab864199a7a4d94df8c3ba3492d4feb3cfa90d
parent: 8a8e780b584cef07e3615ff41707145afd429b8a
author: Cheng Chen <chengchen@google.com>
date: Wed Jul 29 08:49:51 EDT 2020
L2E: Add ObserveFirstPassMotionVector Store motion vectors for each 16x16 block found in the first pass motion search. Provide an api "ObserveFirstPassMotionVector()" in SimpleEncode class, similar to "ObserveFirstPassStats()". Change-Id: Ia86386b7e4aa549f7000e7965c287380bf52e62c
--- a/test/simple_encode_test.cc
+++ b/test/simple_encode_test.cc
@@ -60,6 +60,27 @@
}
}
+TEST_F(SimpleEncodeTest, ObserveFirstPassMotionVectors) {
+ SimpleEncode simple_encode(width_, height_, frame_rate_num_, frame_rate_den_,
+ target_bitrate_, num_frames_,
+ in_file_path_str_.c_str());
+ simple_encode.ComputeFirstPassStats();
+ std::vector<std::vector<MotionVectorInfo>> fps_motion_vectors =
+ simple_encode.ObserveFirstPassMotionVectors();
+ EXPECT_EQ(fps_motion_vectors.size(), static_cast<size_t>(num_frames_));
+ const size_t num_blocks = ((width_ + 15) >> 4) * ((height_ + 15) >> 4);
+ EXPECT_EQ(num_blocks, fps_motion_vectors[0].size());
+ for (size_t i = 0; i < fps_motion_vectors.size(); ++i) {
+ EXPECT_EQ(num_blocks, fps_motion_vectors[i].size());
+ for (size_t j = 0; j < num_blocks; ++j) {
+ const int mv_count = fps_motion_vectors[i][j].mv_count;
+ const int ref_count = (fps_motion_vectors[i][j].ref_frame[0] > 0) +
+ (fps_motion_vectors[i][j].ref_frame[1] > 0);
+ EXPECT_EQ(mv_count, ref_count);
+ }
+ }
+}
+
TEST_F(SimpleEncodeTest, GetCodingFrameNum) {
SimpleEncode simple_encode(width_, height_, frame_rate_num_, frame_rate_den_,
target_bitrate_, num_frames_,
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1024,6 +1024,7 @@
#if CONFIG_RATE_CTRL
free_partition_info(cpi);
free_motion_vector_info(cpi);
+ free_fp_motion_vector_info(cpi);
#endif
vp9_free_ref_frame_buffers(cm->buffer_pool);
@@ -2661,6 +2662,7 @@
encode_command_init(&cpi->encode_command);
partition_info_init(cpi);
motion_vector_info_init(cpi);
+ fp_motion_vector_info_init(cpi);
#endif
return cpi;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -642,6 +642,9 @@
// Returns number of units in size of 4, if not multiple not a multiple of 4,
// round it up. For example, size is 7, return 2.
static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; }
+// Returns number of units in size of 16, if not multiple not a multiple of 16,
+// round it up. For example, size is 17, return 2.
+static INLINE int get_num_unit_16x16(int size) { return (size + 15) >> 4; }
#endif // CONFIG_RATE_CTRL
typedef struct VP9_COMP {
@@ -952,6 +955,7 @@
ENCODE_COMMAND encode_command;
PARTITION_INFO *partition_info;
MOTION_VECTOR_INFO *motion_vector_info;
+ MOTION_VECTOR_INFO *fp_motion_vector_info;
RATE_QSTEP_MODEL rq_model[ENCODE_FRAME_TYPES];
#endif
@@ -1000,6 +1004,27 @@
cpi->motion_vector_info = NULL;
}
+// Allocates memory for the first pass motion vector information.
+// The unit size is each 16x16 block.
+// Only called once in vp9_create_compressor().
+static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width);
+ const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height);
+ CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info,
+ (MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
+ sizeof(MOTION_VECTOR_INFO)));
+ memset(cpi->fp_motion_vector_info, 0,
+ unit_width * unit_height * sizeof(MOTION_VECTOR_INFO));
+}
+
+// Frees memory of the first pass motion vector information.
+// Only called once in dealloc_compressor_data().
+static INLINE void free_fp_motion_vector_info(struct VP9_COMP *cpi) {
+ vpx_free(cpi->fp_motion_vector_info);
+ cpi->fp_motion_vector_info = NULL;
+}
+
// This is the c-version counter part of ImageBuffer
typedef struct IMAGE_BUFFER {
int allocated;
@@ -1021,6 +1046,7 @@
FRAME_COUNTS frame_counts;
const PARTITION_INFO *partition_info;
const MOTION_VECTOR_INFO *motion_vector_info;
+ const MOTION_VECTOR_INFO *fp_motion_vector_info;
IMAGE_BUFFER coded_frame;
#endif // CONFIG_RATE_CTRL
int quantize_index;
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -839,6 +839,26 @@
fp_acc_data->image_data_start_row);
}
+#if CONFIG_RATE_CTRL
+static void store_fp_motion_vector(VP9_COMP *cpi, const MV *mv,
+ const int mb_row, const int mb_col,
+ const int is_second_mv) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int mb_index = mb_row * cm->mb_cols + mb_col;
+ MOTION_VECTOR_INFO *this_motion_vector_info =
+ &cpi->fp_motion_vector_info[mb_index];
+ if (!is_second_mv) {
+ this_motion_vector_info->ref_frame[0] = LAST_FRAME;
+ this_motion_vector_info->mv[0].as_mv.row = mv->row;
+ this_motion_vector_info->mv[0].as_mv.col = mv->col;
+ return;
+ }
+ this_motion_vector_info->ref_frame[1] = GOLDEN_FRAME;
+ this_motion_vector_info->mv[1].as_mv.row = mv->row;
+ this_motion_vector_info->mv[1].as_mv.col = mv->col;
+}
+#endif // CONFIG_RATE_CTRL
+
#define NZ_MOTION_PENALTY 128
#define INTRA_MODE_PENALTY 1024
void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
@@ -1137,6 +1157,9 @@
vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0);
}
}
+#if CONFIG_RATE_CTRL
+ store_fp_motion_vector(cpi, &mv, mb_row, mb_col, /*is_second_mv=*/0);
+#endif // CONFIG_RAGE_CTRL
// Search in an older reference frame.
if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
@@ -1158,6 +1181,10 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);
+#if CONFIG_RATE_CTRL
+ store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col,
+ /*is_second_mv=*/1);
+#endif // CONFIG_RAGE_CTRL
if (gf_motion_error < motion_error && gf_motion_error < this_error)
++(fp_acc_data->second_ref_count);
--- a/vp9/simple_encode.cc
+++ b/vp9/simple_encode.cc
@@ -471,8 +471,8 @@
encode_frame_result->coding_data.reset(
new (std::nothrow) uint8_t[max_coding_data_byte_size]);
- encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_width);
- encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_height);
+ encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
+ encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
encode_frame_result->num_cols_4x4);
encode_frame_result->motion_vector_info.resize(
@@ -742,6 +742,8 @@
struct lookahead_ctx *lookahead = cpi->lookahead;
int i;
int use_highbitdepth = 0;
+ const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
+ const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth = cpi->common.use_highbitdepth;
#endif
@@ -774,6 +776,12 @@
// vp9_get_compressed_data only generates first pass stats not
// compresses data
assert(size == 0);
+ // Get vp9 first pass motion vector info.
+ std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
+ update_motion_vector_info(&encode_frame_info.fp_motion_vector_info[0],
+ num_rows_16x16, num_cols_16x16,
+ mv_info.data());
+ fp_motion_vector_info_.push_back(mv_info);
}
impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
}
@@ -809,6 +817,11 @@
output_stats.push_back(this_stats);
}
return output_stats;
+}
+
+std::vector<std::vector<MotionVectorInfo>>
+SimpleEncode::ObserveFirstPassMotionVectors() {
+ return fp_motion_vector_info_;
}
void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
--- a/vp9/simple_encode.h
+++ b/vp9/simple_encode.h
@@ -60,7 +60,9 @@
constexpr int kMotionVectorPrecision = 8;
-// The frame is split to 4x4 blocks.
+// In the first pass. The frame is split to 16x16 blocks.
+// This structure contains the information of each 16x16 block.
+// In the second pass. The frame is split to 4x4 blocks.
// This structure contains the information of each 4x4 block.
struct MotionVectorInfo {
// Number of valid motion vectors, always 0 if this block is in the key frame.
@@ -68,8 +70,8 @@
int mv_count;
// The reference frame for motion vectors. If the second motion vector does
// not exist (mv_count = 1), the reference frame is kNoneRefFrame.
- // Otherwise, the reference frame is either kLastFrame, or kGoldenFrame,
- // or kAltRefFrame.
+ // Otherwise, the reference frame is either kRefFrameTypeLast, or
+ // kRefFrameTypePast, or kRefFrameTypeFuture.
RefFrameType ref_frame[2];
// The row offset of motion vectors in the unit of pixel.
// If the second motion vector does not exist, the value is 0.
@@ -245,7 +247,7 @@
std::vector<PartitionInfo> partition_info;
// A vector of the motion vector information of the frame.
// The number of elements is |num_rows_4x4| * |num_cols_4x4|.
- // The frame is divided 4x4 blocks of |num_rows_4x4| rows and
+ // The frame is divided into 4x4 blocks of |num_rows_4x4| rows and
// |num_cols_4x4| columns.
// Each 4x4 block contains 0 motion vector if this is an intra predicted
// frame (for example, the key frame). If the frame is inter predicted,
@@ -324,6 +326,12 @@
// values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
std::vector<std::vector<double>> ObserveFirstPassStats();
+ // Outputs the first pass motion vectors represented by a 2-D vector.
+ // One can use the frame index at first dimension to retrieve the mvs for
+ // each video frame. The frame is divided into 16x16 blocks. The number of
+ // elements is round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).
+ std::vector<std::vector<MotionVectorInfo>> ObserveFirstPassMotionVectors();
+
// Ouputs a copy of key_frame_map_, a binary vector with size equal to the
// number of show frames in the video. For each entry in the vector, 1
// indicates the position is a key frame and 0 indicates it's not a key frame.
@@ -451,6 +459,17 @@
// frame appears?
// Reference frames info of the to-be-coded frame.
RefFrameInfo ref_frame_info_;
+
+ // A 2-D vector of motion vector information of the frame collected
+ // from the first pass. The first dimension is the frame index.
+ // Each frame is divided into 16x16 blocks. The number of elements is
+ // round_up(|num_rows_4x4| / 4) * round_up(|num_cols_4x4| / 4).
+ // Each 16x16 block contains 0 motion vector if this is an intra predicted
+ // frame (for example, the key frame). If the frame is inter predicted,
+ // each 16x16 block contains either 1 or 2 motion vectors.
+ // The first motion vector is always from the LAST_FRAME.
+ // The second motion vector is always from the GOLDEN_FRAME.
+ std::vector<std::vector<MotionVectorInfo>> fp_motion_vector_info_;
};
} // namespace vp9