shithub: libvpx

Download patch

ref: e47811ef8f045352dfad6c00add79e57e04fc96b
parent: af7695344867d2ac955570f18a9aac0502a842a0
parent: 493a8579f151df871d4b5b2aef2646cd7f7a179a
author: hkuang <hkuang@google.com>
date: Tue May 5 13:11:06 EDT 2015

Merge "Add some sse2 code for intra prediction."

--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -87,13 +87,13 @@
 specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4/;
+specialize qw/vp9_dc_top_predictor_4x4/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4/;
+specialize qw/vp9_dc_left_predictor_4x4/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4/;
+specialize qw/vp9_dc_128_predictor_4x4/, "$sse_x86inc";
 
 add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
@@ -126,13 +126,13 @@
 specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_8x8/;
+specialize qw/vp9_dc_top_predictor_8x8/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_8x8/;
+specialize qw/vp9_dc_left_predictor_8x8/, "$sse_x86inc";
 
 add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_8x8/;
+specialize qw/vp9_dc_128_predictor_8x8/, "$sse_x86inc";
 
 add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
@@ -165,7 +165,7 @@
 specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_16x16/;
+specialize qw/vp9_dc_top_predictor_16x16/, "$sse2_x86inc";
 
 add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_dc_left_predictor_16x16/;
--- a/vp9/common/x86/vp9_intrapred_sse2.asm
+++ b/vp9/common/x86/vp9_intrapred_sse2.asm
@@ -15,6 +15,10 @@
 pw_8:  times 8 dw 8
 pw_16: times 8 dw 16
 pw_32: times 8 dw 32
+dc_128: times 16 db 128
+pw2_4:  times 8 dw 2
+pw2_8:  times 8 dw 4
+pw2_16:  times 8 dw 8
 
 SECTION .text
 
@@ -40,6 +44,46 @@
   RET
 
 INIT_MMX sse
+cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  pxor                  m1, m1
+  movd                  m0, [leftq]
+  psadbw                m0, m1
+  paddw                 m0, [GLOBAL(pw2_4)]
+  psraw                 m0, 2
+  pshufw                m0, m0, 0x0
+  packuswb              m0, m0
+  movd      [dstq        ], m0
+  movd      [dstq+strideq], m0
+  lea                 dstq, [dstq+strideq*2]
+  movd      [dstq        ], m0
+  movd      [dstq+strideq], m0
+
+  RESTORE_GOT
+  RET
+
+INIT_MMX sse
+cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  pxor                  m1, m1
+  movd                  m0, [aboveq]
+  psadbw                m0, m1
+  paddw                 m0, [GLOBAL(pw2_4)]
+  psraw                 m0, 2
+  pshufw                m0, m0, 0x0
+  packuswb              m0, m0
+  movd      [dstq        ], m0
+  movd      [dstq+strideq], m0
+  lea                 dstq, [dstq+strideq*2]
+  movd      [dstq        ], m0
+  movd      [dstq+strideq], m0
+
+  RESTORE_GOT
+  RET
+
+INIT_MMX sse
 cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
   GET_GOT     goffsetq
 
@@ -68,6 +112,91 @@
   RESTORE_GOT
   RET
 
+INIT_MMX sse
+cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  pxor                  m1, m1
+  movq                  m0, [aboveq]
+  DEFINE_ARGS dst, stride, stride3
+  lea             stride3q, [strideq*3]
+  psadbw                m0, m1
+  paddw                 m0, [GLOBAL(pw2_8)]
+  psraw                 m0, 3
+  pshufw                m0, m0, 0x0
+  packuswb              m0, m0
+  movq    [dstq          ], m0
+  movq    [dstq+strideq  ], m0
+  movq    [dstq+strideq*2], m0
+  movq    [dstq+stride3q ], m0
+  lea                 dstq, [dstq+strideq*4]
+  movq    [dstq          ], m0
+  movq    [dstq+strideq  ], m0
+  movq    [dstq+strideq*2], m0
+  movq    [dstq+stride3q ], m0
+
+  RESTORE_GOT
+  RET
+
+INIT_MMX sse
+cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  pxor                  m1, m1
+  movq                  m0, [leftq]
+  DEFINE_ARGS dst, stride, stride3
+  lea             stride3q, [strideq*3]
+  psadbw                m0, m1
+  paddw                 m0, [GLOBAL(pw2_8)]
+  psraw                 m0, 3
+  pshufw                m0, m0, 0x0
+  packuswb              m0, m0
+  movq    [dstq          ], m0
+  movq    [dstq+strideq  ], m0
+  movq    [dstq+strideq*2], m0
+  movq    [dstq+stride3q ], m0
+  lea                 dstq, [dstq+strideq*4]
+  movq    [dstq          ], m0
+  movq    [dstq+strideq  ], m0
+  movq    [dstq+strideq*2], m0
+  movq    [dstq+stride3q ], m0
+
+  RESTORE_GOT
+  RET
+
+INIT_MMX sse
+cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  DEFINE_ARGS dst, stride, stride3
+  lea             stride3q, [strideq*3]
+  movd     m0,        [GLOBAL(dc_128)]
+  movd    [dstq          ], m0
+  movd    [dstq+strideq  ], m0
+  movd    [dstq+strideq*2], m0
+  movd    [dstq+stride3q ], m0
+  RESTORE_GOT
+  RET
+
+INIT_MMX sse
+cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  DEFINE_ARGS dst, stride, stride3
+  lea             stride3q, [strideq*3]
+  movq    m0,        [GLOBAL(dc_128)]
+  movq    [dstq          ], m0
+  movq    [dstq+strideq  ], m0
+  movq    [dstq+strideq*2], m0
+  movq    [dstq+stride3q ], m0
+  lea                 dstq, [dstq+strideq*4]
+  movq    [dstq          ], m0
+  movq    [dstq+strideq  ], m0
+  movq    [dstq+strideq*2], m0
+  movq    [dstq+stride3q ], m0
+  RESTORE_GOT
+  RET
+
 INIT_XMM sse2
 cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
   GET_GOT     goffsetq
@@ -99,6 +228,60 @@
 
   RESTORE_GOT
   REP_RET
+
+
+INIT_XMM sse2
+cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  pxor                  m1, m1
+  pxor                  m2, m2
+  mova                  m0, [aboveq]
+  DEFINE_ARGS dst, stride, stride3, lines4
+  lea             stride3q, [strideq*3]
+  mov              lines4d, 4
+  psadbw                m0, m1
+  psadbw                m2, m1
+  paddw                 m0, m2
+  movhlps               m2, m0
+  paddw                 m0, m2
+  paddw                 m0, [GLOBAL(pw2_16)]
+  psraw                 m0, 4
+  pshuflw               m0, m0, 0x0
+  punpcklqdq            m0, m0
+  packuswb              m0, m0
+.loop:
+  mova    [dstq          ], m0
+  mova    [dstq+strideq  ], m0
+  mova    [dstq+strideq*2], m0
+  mova    [dstq+stride3q ], m0
+  lea                 dstq, [dstq+strideq*4]
+  dec              lines4d
+  jnz .loop
+
+  RESTORE_GOT
+  REP_RET
+
+
+INIT_MMX sse
+cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+  GET_GOT     goffsetq
+
+  DEFINE_ARGS dst, stride, stride3, lines4
+  lea             stride3q, [strideq*3]
+  mov              lines4d, 4
+  movq    m0,        [GLOBAL(dc_128)]
+.loop:
+  mova    [dstq          ], m0
+  mova    [dstq+strideq  ], m0
+  mova    [dstq+strideq*2], m0
+  mova    [dstq+stride3q ], m0
+  lea                 dstq, [dstq+strideq*4]
+  dec              lines4d
+  jnz .loop
+  RESTORE_GOT
+  RET
+
 
 INIT_XMM sse2
 cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset