shithub: libvpx

--- a/test/fdct8x8_test.cc

+++ b/test/fdct8x8_test.cc

@@ -21,7 +21,7 @@

 extern "C" {

 #include "vp9/common/vp9_entropy.h"

 #include "./vp9_rtcd.h"

-void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);

+void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *output, int pitch);

 #include "vpx/vpx_integer.h"

@@ -296,7 +296,7 @@

 INSTANTIATE_TEST_CASE_P(

     C, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));

+        make_tuple(&vp9_short_fdct8x8_c, &vp9_idct8x8_64_add_c, 0)));

 INSTANTIATE_TEST_CASE_P(

     C, FwdTrans8x8HT,

     ::testing::Values(

@@ -309,7 +309,7 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, FwdTrans8x8DCT,

     ::testing::Values(

-        make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));

+        make_tuple(&vp9_short_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0)));

 INSTANTIATE_TEST_CASE_P(

     SSE2, FwdTrans8x8HT,

     ::testing::Values(

--- a/test/idct8x8_test.cc

+++ b/test/idct8x8_test.cc

@@ -126,7 +126,7 @@

     reference_dct_2d(input, output_r);

     for (int j = 0; j < 64; ++j)

       coeff[j] = round(output_r[j]);

-    vp9_short_idct8x8_add_c(coeff, dst, 8);

+    vp9_idct8x8_64_add_c(coeff, dst, 8);

     for (int j = 0; j < 64; ++j) {

       const int diff = dst[j] - src[j];

       const int error = diff * diff;

--- a/vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm

+++ b/vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm

@@ -8,7 +8,7 @@

-    EXPORT  |vp9_short_idct8x8_1_add_neon|

+    EXPORT  |vp9_idct8x8_1_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -15,7 +15,7 @@

     AREA ||.text||, CODE, READONLY, ALIGN=2

-;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,

+;void vp9_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,

 ;                                  int dest_stride)

 ; r0  int16_t input

@@ -22,7 +22,7 @@

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_short_idct8x8_1_add_neon| PROC

+|vp9_idct8x8_1_add_neon| PROC

     ldrsh            r0, [r0]

     ; generate cospi_16_64 = 11585

@@ -83,6 +83,6 @@

     vst1.64          {d31}, [r12], r2

     bx               lr

-    ENDP             ; |vp9_short_idct8x8_1_add_neon|

+    ENDP             ; |vp9_idct8x8_1_add_neon|

END

--- a/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm

+++ b/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm

@@ -8,8 +8,8 @@

 ;  be found in the AUTHORS file in the root of the source tree.

-    EXPORT  |vp9_short_idct8x8_add_neon|

-    EXPORT  |vp9_short_idct8x8_10_add_neon|

+    EXPORT  |vp9_idct8x8_64_add_neon|

+    EXPORT  |vp9_idct8x8_10_add_neon|

ARM

     REQUIRE8

     PRESERVE8

@@ -198,13 +198,13 @@

     MEND

     AREA    Block, CODE, READONLY ; name this block of code

-;void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

+;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

 ; r0  int16_t input

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_short_idct8x8_add_neon| PROC

+|vp9_idct8x8_64_add_neon| PROC

     push            {r4-r9}

     vpush           {d8-d15}

     vld1.s16        {q8,q9}, [r0]!

@@ -308,15 +308,15 @@

     vpop            {d8-d15}

     pop             {r4-r9}

     bx              lr

-    ENDP  ; |vp9_short_idct8x8_add_neon|

+    ENDP  ; |vp9_idct8x8_64_add_neon|

-;void vp9_short_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

+;void vp9_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)

 ; r0  int16_t input

 ; r1  uint8_t *dest

 ; r2  int dest_stride)

-|vp9_short_idct8x8_10_add_neon| PROC

+|vp9_idct8x8_10_add_neon| PROC

     push            {r4-r9}

     vpush           {d8-d15}

     vld1.s16        {q8,q9}, [r0]!

@@ -514,6 +514,6 @@

     vpop            {d8-d15}

     pop             {r4-r9}

     bx              lr

-    ENDP  ; |vp9_short_idct8x8_10_add_neon|

+    ENDP  ; |vp9_idct8x8_10_add_neon|

END

--- a/vp9/common/vp9_idct.c

+++ b/vp9/common/vp9_idct.c

@@ -201,7 +201,7 @@

   output[7] = step1[0] - step1[7];

-void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) {

+void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) {

   int16_t out[8 * 8];

   int16_t *outptr = out;

   int i, j;

@@ -225,7 +225,7 @@

-void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {

+void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {

   int i, j;

   int a1;

   int16_t out = dct_const_round_shift(input[0] * cospi_16_64);

@@ -420,7 +420,7 @@

                                   + dest[j * dest_stride + i]);  }

-void vp9_short_idct8x8_10_add_c(int16_t *input, uint8_t *dest,

+void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest,

                                 int dest_stride) {

   int16_t out[8 * 8] = { 0 };

   int16_t *outptr = out;

@@ -1301,7 +1301,7 @@

     vp9_iwht4x4_1_add(input, dest, stride);

-void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) {

+void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {

   // If dc is 1, then input[0] is the reconstructed value, do not need

   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.

@@ -1312,11 +1312,11 @@

   if (eob) {

     if (eob == 1)

       // DC only DCT coefficient

-      vp9_short_idct8x8_1_add(input, dest, stride);

+      vp9_idct8x8_1_add(input, dest, stride);

     else if (eob <= 10)

-      vp9_short_idct8x8_10_add(input, dest, stride);

+      vp9_idct8x8_10_add(input, dest, stride);

     else

-      vp9_short_idct8x8_add(input, dest, stride);

+      vp9_idct8x8_64_add(input, dest, stride);

@@ -1355,7 +1355,7 @@

 void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,

                        int stride, int eob) {

   if (tx_type == DCT_DCT) {

-    vp9_idct_add_8x8(input, dest, stride, eob);

+    vp9_idct8x8_add(input, dest, stride, eob);

   } else {

     if (eob > 0) {

       vp9_short_iht8x8_add(input, dest, stride, tx_type);

--- a/vp9/common/vp9_idct.h

+++ b/vp9/common/vp9_idct.h

@@ -90,7 +90,7 @@

 void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);

 void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);

-void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob);

+void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob);

 void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);

 void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -273,14 +273,14 @@

 prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"

 specialize vp9_idct4x4_16_add sse2 neon

-prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"

-specialize vp9_short_idct8x8_1_add sse2 neon

+prototype void vp9_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"

+specialize vp9_idct8x8_1_add sse2 neon

-prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride"

-specialize vp9_short_idct8x8_add sse2 neon

+prototype void vp9_idct8x8_64_add "int16_t *input, uint8_t *dest, int dest_stride"

+specialize vp9_idct8x8_64_add sse2 neon

-prototype void vp9_short_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"

-specialize vp9_short_idct8x8_10_add sse2 neon

+prototype void vp9_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"

+specialize vp9_idct8x8_10_add sse2 neon

 prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"

 specialize vp9_short_idct16x16_1_add sse2 neon

--- a/vp9/common/x86/vp9_idct_intrin_sse2.c

+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c

@@ -529,7 +529,7 @@

       dest += stride; \

-void vp9_short_idct8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {

+void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {

   const __m128i zero = _mm_setzero_si128();

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1<<4);

@@ -597,7 +597,7 @@

   RECON_AND_STORE(dest, in7);

-void vp9_short_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {

+void vp9_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {

   __m128i dc_value;

   const __m128i zero = _mm_setzero_si128();

   int a;

@@ -985,7 +985,7 @@

   RECON_AND_STORE(dest, in[7]);

-void vp9_short_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {

+void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {

   const __m128i zero = _mm_setzero_si128();

   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);

   const __m128i final_rounding = _mm_set1_epi16(1<<4);

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -460,7 +460,7 @@

       vp9_idct_add_16x16(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

       break;

     case TX_8X8:

-      vp9_idct_add_8x8(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

+      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

       break;

     case TX_4X4:

       // this is like vp9_short_idct4x4 but has a special case around eob<=1

--

⑨