ref: 516ea8460b325500f44ce79f55ff2e2c3bab6787
parent: e463b95b4ee8f7a1a352a29468f944e4b6d9c4e2
author: Scott LaVarnway <slavarnway@google.com>
date: Tue Dec 28 09:51:46 EST 2010
Use the fast quantizer for inter mode selection Use the fast quantizer for inter mode selection and the regular quantizer for the rest of the encode for good quality, speed 1. Both performance and quality were improved. The quality gains will make up for the quality loss mentioned in I9dc089007ca08129fb6c11fe7692777ebb8647b0. Change-Id: Ia90bc9cf326a7c65d60d31fa32f6465ab6984d21
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -29,7 +29,7 @@
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
{
- d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
+ d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast);
}
/*
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -33,6 +33,7 @@
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
short *quant;
+ short *quant_fast;
short *quant_shift;
short *zbin;
short *zrun_zbin_boost;
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -179,6 +179,7 @@
{
// dc values
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+ cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
cpi->Y1quant_shift[Q] + 0, quant_val);
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -187,6 +188,7 @@
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+ cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
cpi->Y2quant_shift[Q] + 0, quant_val);
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
@@ -195,6 +197,7 @@
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+ cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
cpi->UVquant_shift[Q] + 0, quant_val);
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
@@ -208,6 +211,7 @@
int rc = vp8_default_zig_zag1d[i];
quant_val = vp8_ac_yquant(Q);
+ cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
cpi->Y1quant_shift[Q] + rc, quant_val);
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -216,6 +220,7 @@
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+ cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
cpi->Y2quant_shift[Q] + rc, quant_val);
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
@@ -224,6 +229,7 @@
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+ cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
cpi->UVquant_shift[Q] + rc, quant_val);
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -325,6 +331,7 @@
for (i = 0; i < 16; i++)
{
x->block[i].quant = cpi->Y1quant[QIndex];
+ x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
@@ -339,6 +346,7 @@
for (i = 16; i < 24; i++)
{
x->block[i].quant = cpi->UVquant[QIndex];
+ x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
x->block[i].round = cpi->UVround[QIndex];
@@ -349,6 +357,7 @@
// Y2
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+ x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
x->block[24].quant = cpi->Y2quant[QIndex];
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
x->block[24].zbin = cpi->Y2zbin[QIndex];
@@ -1270,7 +1279,18 @@
if (cpi->sf.RD)
{
+ /* Are we using the fast quantizer for the mode selection? */
+ if(cpi->sf.use_fastquant_for_pick)
+ cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
+
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
+
+ /* switch back to the regular quantizer for the encode */
+ if (cpi->sf.improved_quant)
+ {
+ cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
+ }
+
}
else
#endif
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -591,6 +591,7 @@
sf->max_fs_radius = 32;
sf->iterative_sub_pixel = 1;
sf->optimize_coefficients = 1;
+ sf->use_fastquant_for_pick = 0;
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -758,7 +759,7 @@
cpi->mode_check_freq[THR_SPLITG] = 4;
cpi->mode_check_freq[THR_SPLITA] = 4;
- cpi->mode_check_freq[THR_SPLITMV] = 0;
+ cpi->mode_check_freq[THR_SPLITMV] = 2;
sf->thresh_mult[THR_TM ] = 1500;
sf->thresh_mult[THR_V_PRED ] = 1500;
@@ -789,8 +790,7 @@
sf->thresh_mult[THR_SPLITA ] = 20000;
}
- sf->improved_quant = 0;
- sf->improved_dct = 0;
+ sf->use_fastquant_for_pick = 1;
sf->first_step = 1;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -798,6 +798,8 @@
if (Speed > 1)
{
+ sf->use_fastquant_for_pick = 0;
+
cpi->mode_check_freq[THR_SPLITG] = 15;
cpi->mode_check_freq[THR_SPLITA] = 15;
cpi->mode_check_freq[THR_SPLITMV] = 7;
@@ -830,6 +832,11 @@
sf->thresh_mult[THR_NEWA ] = 2500;
sf->thresh_mult[THR_SPLITA ] = 50000;
}
+
+ sf->first_step = 1;
+
+ sf->improved_quant = 0;
+ sf->improved_dct = 0;
// Only do recode loop on key frames, golden frames and
// alt ref frames
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -182,6 +182,8 @@
int first_step;
int optimize_coefficients;
+ int use_fastquant_for_pick;
+
} SPEED_FEATURES;
typedef struct
@@ -269,6 +271,9 @@
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]);
MACROBLOCK mb;
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -27,7 +27,7 @@
short *coeff_ptr = b->coeff;
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
- short *quant_ptr = b->quant;
+ short *quant_ptr = b->quant_fast;
short *quant_shift_ptr = b->quant_shift;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
@@ -74,7 +74,7 @@
int x, y, z, sz;
short *coeff_ptr = b->coeff;
short *round_ptr = b->round;
- short *quant_ptr = b->quant;
+ short *quant_ptr = b->quant_fast;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -32,7 +32,7 @@
short *coeff_ptr = b->coeff;
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
- short *quant_ptr = b->quant;
+ short *quant_ptr = b->quant_fast;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -90,7 +90,7 @@
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
short *coeff_ptr = b->coeff;
short *round_ptr = b->round;
- short *quant_ptr = b->quant;
+ short *quant_ptr = b->quant_fast;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -183,7 +183,7 @@
d->qcoeff,
d->dequant,
b->round,
- b->quant,
+ b->quant_fast,
d->dqcoeff
);
}