ref: 331d289c5c540cf82d89c2a03da45c30e4fe0779
parent: 5036f0fed80d6f582451b9475aa7e5a3dd2dcd5a
parent: 868484bc66be8563636ead1be3daf1c347d0ec42
author: Johann Koenig <johannkoenig@google.com>
date: Wed Oct 31 17:43:05 EDT 2018
Merge "clang-tidy: fix vp9/encoder parameters"
--- a/vp9/encoder/arm/neon/vp9_dct_neon.c
+++ b/vp9/encoder/arm/neon/vp9_dct_neon.c
@@ -23,13 +23,13 @@
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
tran_low_t temp_buffer[64];
(void)coeff_ptr;
vpx_fdct8x8_neon(input, temp_buffer, stride);
vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, round_ptr, quant_ptr,
- qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan_ptr,
- iscan_ptr);
+ qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
+ iscan);
}
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -122,7 +122,7 @@
const int16_t *quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan_ptr) {
+ const int16_t *scan, const int16_t *iscan) {
const int16x8_t one = vdupq_n_s16(1);
const int16x8_t neg_one = vdupq_n_s16(-1);
@@ -134,8 +134,8 @@
const int16x8_t dequant_thresh = vshrq_n_s16(vld1q_s16(dequant_ptr), 2);
// Process dc and the first seven ac coeffs.
- const uint16x8_t iscan =
- vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one));
+ const uint16x8_t v_iscan =
+ vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one));
const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
const int16x8_t coeff_abs = vabsq_s16(coeff);
@@ -169,12 +169,12 @@
dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
- eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan);
+ eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan);
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
- iscan_ptr += 8;
+ iscan += 8;
coeff_ptr += 8;
qcoeff_ptr += 8;
dqcoeff_ptr += 8;
@@ -188,8 +188,8 @@
// Process the rest of the ac coeffs.
for (i = 8; i < 32 * 32; i += 8) {
- const uint16x8_t iscan =
- vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one));
+ const uint16x8_t v_iscan =
+ vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one));
const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
const int16x8_t coeff_abs = vabsq_s16(coeff);
@@ -215,12 +215,12 @@
vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
eob_max =
- vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan));
+ vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan));
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
- iscan_ptr += 8;
+ iscan += 8;
coeff_ptr += 8;
qcoeff_ptr += 8;
dqcoeff_ptr += 8;
--- a/vp9/encoder/ppc/vp9_quantize_vsx.c
+++ b/vp9/encoder/ppc/vp9_quantize_vsx.c
@@ -42,8 +42,8 @@
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
bool16x8_t zero_coeff0, zero_coeff1;
@@ -52,10 +52,10 @@
int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
- int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr);
- int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan);
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
@@ -103,9 +103,9 @@
coeff0 = vec_vsx_ld(off0, coeff_ptr);
coeff1 = vec_vsx_ld(off1, coeff_ptr);
coeff2 = vec_vsx_ld(off2, coeff_ptr);
- scan0 = vec_vsx_ld(off0, iscan_ptr);
- scan1 = vec_vsx_ld(off1, iscan_ptr);
- scan2 = vec_vsx_ld(off2, iscan_ptr);
+ scan0 = vec_vsx_ld(off0, iscan);
+ scan1 = vec_vsx_ld(off1, iscan);
+ scan2 = vec_vsx_ld(off2, iscan);
qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant);
zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
@@ -169,8 +169,7 @@
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ const int16_t *scan, const int16_t *iscan) {
// In stage 1, we quantize 16 coeffs (DC + 15 AC)
// In stage 2, we loop 42 times and quantize 24 coeffs per iteration
// (32 * 32 - 16) / 24 = 42
@@ -188,13 +187,13 @@
int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
- int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr);
- int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan);
int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2));
int16x8_t abs_coeff0 = vec_abs(coeff0);
int16x8_t abs_coeff1 = vec_abs(coeff1);
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
(void)n_coeffs;
assert(!skip_block);
@@ -238,9 +237,9 @@
coeff0 = vec_vsx_ld(off0, coeff_ptr);
coeff1 = vec_vsx_ld(off1, coeff_ptr);
coeff2 = vec_vsx_ld(off2, coeff_ptr);
- scan0 = vec_vsx_ld(off0, iscan_ptr);
- scan1 = vec_vsx_ld(off1, iscan_ptr);
- scan2 = vec_vsx_ld(off2, iscan_ptr);
+ scan0 = vec_vsx_ld(off0, iscan);
+ scan1 = vec_vsx_ld(off1, iscan);
+ scan2 = vec_vsx_ld(off2, iscan);
abs_coeff0 = vec_abs(coeff0);
abs_coeff1 = vec_abs(coeff1);
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -27,7 +27,7 @@
unsigned int *const max_mv_magnitude);
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *mvctx, int usehp);
+ const nmv_context *ctx, int usehp);
void vp9_update_mv_count(ThreadData *td);
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -811,7 +811,7 @@
// frame is made and not just a copy of the pointer..
int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
- int64_t end_time_stamp);
+ int64_t end_time);
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
@@ -832,9 +832,11 @@
int vp9_update_entropy(VP9_COMP *cpi, int update);
-int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols);
-int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
+int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols);
int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
VPX_SCALING vert_mode);
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -59,7 +59,7 @@
int vp9_init_search_range(int size);
int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv,
- int sad_per_bit, int distance,
+ int error_per_bit, int search_range,
const struct vp9_variance_vtable *fn_ptr,
const struct mv *center_mv);
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -194,7 +194,7 @@
void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass,
RATE_CONTROL *rc);
-int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
+int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
double correction_factor, vpx_bit_depth_t bit_depth);
double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
@@ -205,9 +205,9 @@
int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate);
// Note vp9_rc_get_default_max_gf_interval() requires the min_gf_interval to
-// be passed in to ensure that the max_gf_interval returned is at least as bis
+// be passed in to ensure that the max_gf_interval returned is at least as big
// as that.
-int vp9_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
+int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval);
// Generally at the high level, the following flow is expected
// to be enforced for rate control:
@@ -253,7 +253,7 @@
// Computes frame size bounds.
void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
- int this_frame_target,
+ int frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -145,7 +145,7 @@
void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex);
-void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
unsigned int qstep, int *rate, int64_t *dist);
void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
@@ -176,8 +176,8 @@
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
-void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,
- int best_mode_index);
+void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
+ int bsize, int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
const int *const thresh_fact) {
--- a/vp9/encoder/x86/vp9_dct_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_intrin_sse2.c
@@ -185,8 +185,8 @@
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i zero;
int pass;
@@ -215,7 +215,7 @@
__m128i *in[8];
int index = 0;
- (void)scan_ptr;
+ (void)scan;
(void)coeff_ptr;
// Pre-condition input (shift by two)
@@ -449,7 +449,7 @@
in7 = _mm_srai_epi16(in7, 1);
}
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -518,8 +518,8 @@
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -582,8 +582,8 @@
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -18,11 +18,13 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
-void vp9_fdct8x8_quant_ssse3(
- const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
__m128i zero;
int pass;
@@ -52,7 +54,7 @@
__m128i *in[8];
int index = 0;
- (void)scan_ptr;
+ (void)scan;
(void)coeff_ptr;
// Pre-condition input (shift by two)
@@ -280,7 +282,7 @@
in7 = _mm_srai_epi16(in7, 1);
}
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -350,8 +352,8 @@
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -427,8 +429,8 @@
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
--- a/vp9/encoder/x86/vp9_quantize_avx2.c
+++ b/vp9/encoder/x86/vp9_quantize_avx2.c
@@ -50,18 +50,18 @@
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i eob;
__m256i round256, quant256, dequant256;
__m256i eob256, thr256;
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -97,7 +97,7 @@
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
}
- eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256);
+ eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256);
n_coeffs += 8 * 2;
}
@@ -124,8 +124,7 @@
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
eob256 = _mm256_max_epi16(
- eob256,
- scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256));
+ eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256));
} else {
store_zero_tran_low(qcoeff_ptr + n_coeffs);
store_zero_tran_low(dqcoeff_ptr + n_coeffs);
--- a/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -21,8 +21,8 @@
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i zero;
__m128i thr;
int16_t nzflag;
@@ -29,12 +29,12 @@
__m128i eob;
__m128i round, quant, dequant;
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -100,8 +100,8 @@
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -175,8 +175,8 @@
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);