shithub: libvpx

--- a/test/dct32x32_test.cc

+++ b/test/dct32x32_test.cc

@@ -247,8 +247,8 @@

 INSTANTIATE_TEST_CASE_P(

     C, Trans32x32Test,

     ::testing::Values(

-        make_tuple(&vp9_short_fdct32x32_c, &vp9_short_idct32x32_add_c, 0),

-        make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_short_idct32x32_add_c, 1)));

+        make_tuple(&vp9_short_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),

+        make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));

 #if HAVE_SSE2

 INSTANTIATE_TEST_CASE_P(

@@ -255,8 +255,8 @@

     SSE2, Trans32x32Test,

     ::testing::Values(

         make_tuple(&vp9_short_fdct32x32_sse2,

-                   &vp9_short_idct32x32_add_sse2, 0),

+                   &vp9_idct32x32_1024_add_sse2, 0),

         make_tuple(&vp9_short_fdct32x32_rd_sse2,

-                   &vp9_short_idct32x32_add_sse2, 1)));

+                   &vp9_idct32x32_1024_add_sse2, 1)));

 #endif

 }  // namespace

--- a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm

+++ b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm

@@ -43,7 +43,7 @@

 cospi_31_64 EQU   804

-    EXPORT  |vp9_short_idct32x32_add_neon|

+    EXPORT  |vp9_idct32x32_1024_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -288,7 +288,7 @@

     MEND

     ; --------------------------------------------------------------------------

-;void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest, int dest_stride);

+;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride);

 ;   r0  int16_t *input,

 ;   r1  uint8_t *dest,

@@ -303,7 +303,7 @@

 ;   r9  dest + 15 * dest_stride, descending (14, 13, 12, ...)

 ;   r10 dest + 16 * dest_stride, ascending  (17, 18, 19, ...)

-|vp9_short_idct32x32_add_neon| PROC

+|vp9_idct32x32_1024_add_neon| PROC

     ; This function does one pass of idct32x32 transform.

     ; This is done by transposing the input and then doing a 1d transform on

@@ -1295,5 +1295,5 @@

     vpop {d8-d15}

     pop  {r4-r11}

     bx              lr

-    ENDP  ; |vp9_short_idct32x32_add_neon|

+    ENDP  ; |vp9_idct32x32_1024_add_neon|

END

--- a/vp9/common/vp9_idct.c

+++ b/vp9/common/vp9_idct.c

@@ -1245,7 +1245,7 @@

   output[31] = step1[0] - step1[31];

-void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) {

+void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) {

   int16_t out[32 * 32];

   int16_t *outptr = out;

   int i, j;

@@ -1282,7 +1282,7 @@

-void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest,

+void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest,

                                  int dest_stride) {

   int i, j;

   int a1;

@@ -1347,12 +1347,12 @@

-void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob) {

+void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) {

   if (eob) {

     if (eob == 1)

-      vp9_short_idct32x32_1_add(input, dest, stride);

+      vp9_idct32x32_1_add(input, dest, stride);

     else

-      vp9_short_idct32x32_add(input, dest, stride);

+      vp9_idct32x32_1024_add(input, dest, stride);

--- a/vp9/common/vp9_idct.h

+++ b/vp9/common/vp9_idct.h

@@ -92,7 +92,7 @@

 void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);

 void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob);

 void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob);

-void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);

+void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob);

 void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,

                  int stride, int eob);

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -291,11 +291,11 @@

 prototype void vp9_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"

 specialize vp9_idct16x16_10_add sse2 neon

-prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"

-specialize vp9_short_idct32x32_add sse2 neon

+prototype void vp9_idct32x32_1024_add "int16_t *input, uint8_t *dest, int dest_stride"

+specialize vp9_idct32x32_1024_add sse2 neon

-prototype void vp9_short_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"

-specialize vp9_short_idct32x32_1_add sse2

+prototype void vp9_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"

+specialize vp9_idct32x32_1_add sse2

 prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"

 specialize vp9_short_iht4x4_add sse2 neon

--- a/vp9/common/x86/vp9_idct_intrin_sse2.c

+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c

@@ -2819,7 +2819,7 @@

     input += 8; \

   }  \

-void vp9_short_idct32x32_add_sse2(int16_t *input, uint8_t *dest, int stride) {

+void vp9_idct32x32_1024_add_sse2(int16_t *input, uint8_t *dest, int stride) {

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1<<5);

@@ -3550,7 +3550,7 @@

 }  //NOLINT

-void vp9_short_idct32x32_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {

+void vp9_idct32x32_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {

   __m128i dc_value;

   const __m128i zero = _mm_setzero_si128();

   int a, i;

--- a/vp9/decoder/vp9_decodframe.c

+++ b/vp9/decoder/vp9_decodframe.c

@@ -113,7 +113,7 @@

         break;

       case TX_32X32:

         tx_type = DCT_DCT;

-        vp9_idct_add_32x32(qcoeff, dst, stride, eob);

+        vp9_idct32x32_add(qcoeff, dst, stride, eob);

         break;

       default:

         assert(!"Invalid transform size");

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -442,7 +442,7 @@

   switch (tx_size) {

     case TX_32X32:

-      vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);

+      vp9_idct32x32_1024_add(dqcoeff, dst, pd->dst.stride);

       break;

     case TX_16X16:

       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

@@ -539,7 +539,7 @@

                            p->quant, p->quant_shift, qcoeff, dqcoeff,

                            pd->dequant, p->zbin_extra, eob, scan, iscan);

       if (!x->skip_encode && *eob)

-        vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);

+        vp9_idct32x32_1024_add(dqcoeff, dst, pd->dst.stride);

       break;

     case TX_16X16:

       tx_type = get_tx_type_16x16(pd->plane_type, xd);

--

⑨