shithub: libvpx

--- a/vp9/common/vp9_alloccommon.c

+++ b/vp9/common/vp9_alloccommon.c

@@ -95,9 +95,8 @@

 int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {

   int i, mi_cols;

-  // Our internal buffers are always multiples of 16

-  const int aligned_width = multiple8(width);

-  const int aligned_height = multiple8(height);

+  const int aligned_width = ALIGN_POWER_OF_TWO(width, LOG2_MI_SIZE);

+  const int aligned_height = ALIGN_POWER_OF_TWO(height, LOG2_MI_SIZE);

   const int ss_x = oci->subsampling_x;

   const int ss_y = oci->subsampling_y;

   int mi_size;

@@ -147,7 +146,7 @@

   // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling

   // information is exposed at this level

-  mi_cols = mi_cols_aligned_to_sb(oci);

+  mi_cols = mi_cols_aligned_to_sb(oci->mi_cols);

   // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm

   // block where mi unit size is 8x8.

@@ -198,8 +197,8 @@

 void vp9_update_frame_size(VP9_COMMON *cm) {

-  const int aligned_width = multiple8(cm->width);

-  const int aligned_height = multiple8(cm->height);

+  const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);

+  const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);

   set_mb_mi(cm, aligned_width, aligned_height);

   setup_mi(cm);

--- a/vp9/common/vp9_common.h

+++ b/vp9/common/vp9_common.h

@@ -22,12 +22,11 @@

 #define MIN(x, y) (((x) < (y)) ? (x) : (y))

 #define MAX(x, y) (((x) > (y)) ? (x) : (y))

-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))

+#define ROUND_POWER_OF_TWO(value, n) \

+    (((value) + (1 << ((n) - 1))) >> (n))

-/* If we don't want to use ROUND_POWER_OF_TWO macro

-static INLINE int16_t round_power_of_two(int16_t value, int n) {

-  return (value + (1 << (n - 1))) >> n;

-}*/

+#define ALIGN_POWER_OF_TWO(value, n) \

+    (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))

 // Only need this for fixed-size arrays, for structs just assign.

 #define vp9_copy(dest, src) {            \

@@ -54,10 +53,6 @@

 static INLINE double fclamp(double value, double low, double high) {

   return value < low ? low : (value > high ? high : value);

-}

-static INLINE int multiple8(int value) {

-  return (value + 7) & ~7;

 static int get_unsigned_bits(unsigned int num_values) {

--- a/vp9/common/vp9_enums.h

+++ b/vp9/common/vp9_enums.h

@@ -14,10 +14,12 @@

 #include "./vpx_config.h"

 #define LOG2_MI_SIZE 3

+#define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE)  // 64 = 2^6

-#define MI_SIZE (1 << LOG2_MI_SIZE)

-#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)

-#define MI_BLOCK_SIZE (64 / MI_SIZE)

+#define MI_SIZE (1 << LOG2_MI_SIZE)  // pixels per mi-unit

+#define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE)  // mi-units per max block

+#define MI_MASK (MI_BLOCK_SIZE - 1)

 typedef enum BLOCK_SIZE_TYPE {

   BLOCK_SIZE_AB4X4,

--- a/vp9/common/vp9_onyxc_int.h

+++ b/vp9/common/vp9_onyxc_int.h

@@ -301,8 +301,8 @@

   buf[new_idx]++;

-static int mi_cols_aligned_to_sb(VP9_COMMON *cm) {

-  return 2 * ((cm->mb_cols + 3) & ~3);

+static int mi_cols_aligned_to_sb(int n_mis) {

+  return ALIGN_POWER_OF_TWO(n_mis, LOG2_MI_BLOCK_SIZE);

 static INLINE void set_partition_seg_context(VP9_COMMON *cm, MACROBLOCKD *xd,

--- a/vp9/common/vp9_tile_common.c

+++ b/vp9/common/vp9_tile_common.c

@@ -15,10 +15,14 @@

 #define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)

 #define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)

+static int to_sbs(n_mis) {

+  return mi_cols_aligned_to_sb(n_mis) >> LOG2_MI_BLOCK_SIZE;

+}

 static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,

                                  int *max_tile_off, int tile_idx,

                                  int log2_n_tiles, int n_mis) {

-  const int n_sbs = (n_mis + 7) >> 3;

+  const int n_sbs = to_sbs(n_mis);

   const int sb_off1 =  (tile_idx      * n_sbs) >> log2_n_tiles;

   const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;

@@ -43,7 +47,7 @@

 void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,

                          int *delta_log2_n_tiles) {

-  const int sb_cols = (cm->mb_cols + 3) >> 2;

+  const int sb_cols = to_sbs(cm->mi_cols);

   int min_log2_n_tiles, max_log2_n_tiles;

   for (max_log2_n_tiles = 0;

--- a/vp9/decoder/vp9_decodframe.c

+++ b/vp9/decoder/vp9_decodframe.c

@@ -684,16 +684,17 @@

   VP9_COMMON *const pc = &pbi->common;

   const uint8_t *data_ptr = data + first_partition_size;

-  const uint8_t* const data_end = pbi->source + pbi->source_sz;

+  const uint8_t *const data_end = pbi->source + pbi->source_sz;

+  const int aligned_mi_cols = mi_cols_aligned_to_sb(pc->mi_cols);

   int tile_row, tile_col;

   // Note: this memset assumes above_context[0], [1] and [2]

   // are allocated as part of the same buffer.

-  vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 *

-                                      MAX_MB_PLANE * mi_cols_aligned_to_sb(pc));

+  vpx_memset(pc->above_context[0], 0,

+             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);

-  vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *

-                                       mi_cols_aligned_to_sb(pc));

+  vpx_memset(pc->above_seg_context, 0,

+             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);

   if (pbi->oxcf.inv_tile_order) {

     const int n_cols = pc->tile_columns;

--- a/vp9/encoder/vp9_bitstream.c

+++ b/vp9/encoder/vp9_bitstream.c

@@ -1475,8 +1475,8 @@

     unsigned char *data_ptr = cx_data + header_bc.pos;

     TOKENEXTRA *tok[4][1 << 6], *tok_end;

-    vpx_memset(cpi->common.above_seg_context, 0, sizeof(PARTITION_CONTEXT) *

-               mi_cols_aligned_to_sb(&cpi->common));

+    vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *

+               mi_cols_aligned_to_sb(pc->mi_cols));

     tok[0][0] = cpi->tok;

     for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {

       if (tile_row) {

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -1697,9 +1697,10 @@

 static void init_encode_frame_mb_context(VP9_COMP *cpi) {

-  MACROBLOCK * const x = &cpi->mb;

-  VP9_COMMON * const cm = &cpi->common;

-  MACROBLOCKD * const xd = &x->e_mbd;

+  MACROBLOCK *const x = &cpi->mb;

+  VP9_COMMON *const cm = &cpi->common;

+  MACROBLOCKD *const xd = &x->e_mbd;

+  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);

   x->act_zbin_adj = 0;

   cpi->seg0_idx = 0;

@@ -1742,11 +1743,10 @@

   // Note: this memset assumes above_context[0], [1] and [2]

   // are allocated as part of the same buffer.

-  vpx_memset(

-      cm->above_context[0], 0,

-      sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));

+  vpx_memset(cm->above_context[0], 0,

+             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);

   vpx_memset(cm->above_seg_context, 0,

-             sizeof(PARTITION_CONTEXT) * mi_cols_aligned_to_sb(cm));

+             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);

 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {

--- a/vp9/vp9_iface_common.h

+++ b/vp9/vp9_iface_common.h

@@ -29,7 +29,7 @@

     img->fmt = VPX_IMG_FMT_I420;

   img->w = yv12->y_stride;

-  img->h = multiple8(yv12->y_height + 2 * VP9BORDERINPIXELS);

+  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9BORDERINPIXELS, 3);

   img->d_w = yv12->y_crop_width;

   img->d_h = yv12->y_crop_height;

   img->x_chroma_shift = yv12->uv_width < yv12->y_width;

--

⑨