ref: c21d4370527e1c31c88b5268b7e0a4d99ae0c557
parent: a0359b8c900cf2cf7e15148d14b0ee8d56236c86
author: James Zern <jzern@google.com>
date: Sat Apr 2 07:04:38 EDT 2016
vpx_fdct32x32_1_msa: fix accumulator overflow Change-Id: I33a5432eda3416382e1cea06b45082c0c65faa75
--- a/vpx_dsp/mips/fwd_dct32x32_msa.c
+++ b/vpx_dsp/mips/fwd_dct32x32_msa.c
@@ -933,23 +933,21 @@
}
void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
- out[1] = 0;
-
- out[0] = LD_HADD(input, stride);
- out[0] += LD_HADD(input + 8, stride);
- out[0] += LD_HADD(input + 16, stride);
- out[0] += LD_HADD(input + 24, stride);
- out[0] += LD_HADD(input + 32 * 8, stride);
- out[0] += LD_HADD(input + 32 * 8 + 8, stride);
- out[0] += LD_HADD(input + 32 * 8 + 16, stride);
- out[0] += LD_HADD(input + 32 * 8 + 24, stride);
- out[0] += LD_HADD(input + 32 * 16, stride);
- out[0] += LD_HADD(input + 32 * 16 + 8, stride);
- out[0] += LD_HADD(input + 32 * 16 + 16, stride);
- out[0] += LD_HADD(input + 32 * 16 + 24, stride);
- out[0] += LD_HADD(input + 32 * 24, stride);
- out[0] += LD_HADD(input + 32 * 24 + 8, stride);
- out[0] += LD_HADD(input + 32 * 24 + 16, stride);
- out[0] += LD_HADD(input + 32 * 24 + 24, stride);
- out[0] >>= 3;
+ int sum = LD_HADD(input, stride);
+ sum += LD_HADD(input + 8, stride);
+ sum += LD_HADD(input + 16, stride);
+ sum += LD_HADD(input + 24, stride);
+ sum += LD_HADD(input + 32 * 8, stride);
+ sum += LD_HADD(input + 32 * 8 + 8, stride);
+ sum += LD_HADD(input + 32 * 8 + 16, stride);
+ sum += LD_HADD(input + 32 * 8 + 24, stride);
+ sum += LD_HADD(input + 32 * 16, stride);
+ sum += LD_HADD(input + 32 * 16 + 8, stride);
+ sum += LD_HADD(input + 32 * 16 + 16, stride);
+ sum += LD_HADD(input + 32 * 16 + 24, stride);
+ sum += LD_HADD(input + 32 * 24, stride);
+ sum += LD_HADD(input + 32 * 24 + 8, stride);
+ sum += LD_HADD(input + 32 * 24 + 16, stride);
+ sum += LD_HADD(input + 32 * 24 + 24, stride);
+ out[0] = (int16_t)(sum >> 3);
}