shithub: libvpx

--- a/vp8/common/arm/neon/dc_only_idct_add_neon.c

+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.c

@@ -10,6 +10,8 @@

 #include <arm_neon.h>

+#include "./vp8_rtcd.h"

 void vp8_dc_only_idct_add_neon(int16_t input_dc, unsigned char *pred_ptr,

                                int pred_stride, unsigned char *dst_ptr,

                                int dst_stride) {

--- a/vp8/common/arm/neon/dequant_idct_neon.c

+++ b/vp8/common/arm/neon/dequant_idct_neon.c

@@ -10,6 +10,8 @@

 #include <arm_neon.h>

+#include "./vp8_rtcd.h"

 static const int16_t cospi8sqrt2minus1 = 20091;

 // 35468 exceeds INT16_MAX and gets converted to a negative number. Because of

 // the way it is used in vqdmulh, where the result is doubled, it can be divided

--- a/vp8/common/arm/neon/shortidct4x4llm_neon.c

+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.c

@@ -10,6 +10,8 @@

 #include <arm_neon.h>

+#include "./vp8_rtcd.h"

 static const int16_t cospi8sqrt2minus1 = 20091;

 // 35468 exceeds INT16_MAX and gets converted to a negative number. Because of

 // the way it is used in vqdmulh, where the result is doubled, it can be divided

--- a/vpx_dsp/arm/idct16x16_1_add_neon.c

+++ b/vpx_dsp/arm/idct16x16_1_add_neon.c

@@ -10,10 +10,12 @@

 #include <arm_neon.h>

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

-void vpx_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest,

+                              int dest_stride) {

   uint8x8_t d2u8, d3u8, d30u8, d31u8;

   uint64x1_t d2u64, d3u64, d4u64, d5u64;

   uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;

--- a/vpx_dsp/arm/idct16x16_neon.c

+++ b/vpx_dsp/arm/idct16x16_neon.c

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/vpx_dsp_common.h"

 void vpx_idct16x16_256_add_neon_pass1(const int16_t *input, int16_t *output,

--- a/vpx_dsp/arm/idct32x32_1_add_neon.c

+++ b/vpx_dsp/arm/idct32x32_1_add_neon.c

@@ -11,7 +11,7 @@

 #include <arm_neon.h>

 #include "./vpx_config.h"

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

@@ -93,7 +93,8 @@

   return;

-void vpx_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest,

+                              int dest_stride) {

   uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8;

   int i, j, dest_stride8;

   uint8_t *d;

--- a/vpx_dsp/arm/idct32x32_add_neon.c

+++ b/vpx_dsp/arm/idct32x32_add_neon.c

@@ -11,6 +11,7 @@

 #include <arm_neon.h>

 #include "./vpx_config.h"

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/arm/transpose_neon.h"

 #include "vpx_dsp/txfm_common.h"

@@ -152,8 +153,8 @@

   return;

-static INLINE void idct32_transpose_pair(int16_t *input, int16_t *t_buf) {

-  int16_t *in;

+static INLINE void idct32_transpose_pair(const int16_t *input, int16_t *t_buf) {

+  const int16_t *in;

   int i;

   const int stride = 32;

   int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;

@@ -382,7 +383,8 @@

   return;

-void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int stride) {

+void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest,

+                                 int stride) {

   int i, idct32_pass_loop;

   int16_t trans_buf[32 * 8];

   int16_t pass1[32 * 32];

--- a/vpx_dsp/arm/idct4x4_1_add_neon.c

+++ b/vpx_dsp/arm/idct4x4_1_add_neon.c

@@ -10,10 +10,12 @@

 #include <arm_neon.h>

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

-void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest,

+                            int dest_stride) {

   uint8x8_t d6u8;

   uint32x2_t d2u32 = vdup_n_u32(0);

   uint16x8_t q8u16;

--- a/vpx_dsp/arm/idct4x4_add_neon.c

+++ b/vpx_dsp/arm/idct4x4_add_neon.c

@@ -10,7 +10,10 @@

 #include <arm_neon.h>

-void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+#include "./vpx_dsp_rtcd.h"

+void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,

+                             int dest_stride) {

   uint8x8_t d26u8, d27u8;

   uint32x2_t d26u32, d27u32;

   uint16x8_t q8u16, q9u16;

--- a/vpx_dsp/arm/idct8x8_1_add_neon.c

+++ b/vpx_dsp/arm/idct8x8_1_add_neon.c

@@ -10,10 +10,12 @@

 #include <arm_neon.h>

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/inv_txfm.h"

 #include "vpx_ports/mem.h"

-void vpx_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest,

+                            int dest_stride) {

   uint8x8_t d2u8, d3u8, d30u8, d31u8;

   uint64x1_t d2u64, d3u64, d4u64, d5u64;

   uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;

--- a/vpx_dsp/arm/idct8x8_add_neon.c

+++ b/vpx_dsp/arm/idct8x8_add_neon.c

@@ -11,6 +11,7 @@

 #include <arm_neon.h>

 #include "./vpx_config.h"

+#include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/arm/transpose_neon.h"

 #include "vpx_dsp/txfm_common.h"

@@ -165,7 +166,8 @@

   return;

-void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,

+                             int dest_stride) {

   uint8_t *d1, *d2;

   uint8x8_t d0u8, d1u8, d2u8, d3u8;

   uint64x1_t d0u64, d1u64, d2u64, d3u64;

@@ -267,7 +269,8 @@

   return;

-void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {

+void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest,

+                             int dest_stride) {

   uint8_t *d1, *d2;

   uint8x8_t d0u8, d1u8, d2u8, d3u8;

   int16x4_t d10s16, d11s16, d12s16, d13s16, d16s16;

--

⑨