ref: 89ac94f8fb7be1ce9baee198954a890941ecf936
parent: 9a7023d2ad82ba6905ed51adfc9821bea2fba26a
author: Scott LaVarnway <slavarnway@google.com>
date: Fri Dec 21 09:41:49 EST 2012
Removed mmx versions of vp9_bilinear_predict filters These filters will not work with VP9. Change-Id: Ic26c77961084fcea6bfa97f4cd95afdea2282e85
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -319,10 +319,10 @@
specialize vp9_sixtap_predict_avg
prototype void vp9_bilinear_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict16x16 mmx sse2
+specialize vp9_bilinear_predict16x16 sse2
prototype void vp9_bilinear_predict8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict8x8 mmx sse2
+specialize vp9_bilinear_predict8x8 sse2
prototype void vp9_bilinear_predict_avg16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict_avg16x16
@@ -331,10 +331,10 @@
specialize vp9_bilinear_predict_avg8x8
prototype void vp9_bilinear_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict8x4 mmx
+specialize vp9_bilinear_predict8x4
prototype void vp9_bilinear_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict4x4 mmx
+specialize vp9_bilinear_predict4x4
prototype void vp9_bilinear_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
specialize vp9_bilinear_predict_avg4x4
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -15,8 +15,6 @@
extern const short vp9_six_tap_mmx[16][6 * 8];
-extern const short vp9_bilinear_filters_8x_mmx[16][2 * 8];
-
extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
@@ -95,8 +93,6 @@
unsigned int output_height,
const short *vp9_filter);
-extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
-
///////////////////////////////////////////////////////////////////////////
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
@@ -231,26 +227,6 @@
16, 8, 4, 8, vfilter);
vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
16, 8, 4, 8, vfilter);
-}
-
-void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- vp9_bilinear_predict8x8_mmx(src_ptr,
- src_pixels_per_line, xoffset, yoffset,
- dst_ptr, dst_pitch);
- vp9_bilinear_predict8x8_mmx(src_ptr + 8,
- src_pixels_per_line, xoffset, yoffset,
- dst_ptr + 8, dst_pitch);
- vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,
- src_pixels_per_line, xoffset, yoffset,
- dst_ptr + dst_pitch * 8, dst_pitch);
- vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8,
- src_pixels_per_line, xoffset, yoffset,
- dst_ptr + dst_pitch * 8 + 8, dst_pitch);
}
#endif
--- a/vp9/common/x86/vp9_subpixel_mmx.asm
+++ b/vp9/common/x86/vp9_subpixel_mmx.asm
@@ -202,438 +202,6 @@
pop rbp
ret
-
-;void bilinear_predict8x8_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-global sym(vp9_bilinear_predict8x8_mmx)
-sym(vp9_bilinear_predict8x8_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
-
- movsxd rax, dword ptr arg(2) ;xoffset
- mov rdi, arg(4) ;dst_ptr ;
-
- shl rax, 5 ; offset * 32
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
-
- add rax, rcx ; HFilter
- mov rsi, arg(0) ;src_ptr ;
-
- movsxd rdx, dword ptr arg(5) ;dst_pitch
- movq mm1, [rax] ;
-
- movq mm2, [rax+16] ;
- movsxd rax, dword ptr arg(3) ;yoffset
-
- pxor mm0, mm0 ;
-
- shl rax, 5 ; offset*32
- add rax, rcx ; VFilter
-
- lea rcx, [rdi+rdx*8] ;
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
-
-
-
- ; get the first horizontal line done ;
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movq mm4, mm3 ; make a copy of current line
-
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, mm1 ;
- pmullw mm4, mm1 ;
-
- movq mm5, [rsi+1] ;
- movq mm6, mm5 ;
-
- punpcklbw mm5, mm0 ;
- punpckhbw mm6, mm0 ;
-
- pmullw mm5, mm2 ;
- pmullw mm6, mm2 ;
-
- paddw mm3, mm5 ;
- paddw mm4, mm6 ;
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw mm4, [GLOBAL(rd)] ;
- psraw mm4, VP9_FILTER_SHIFT ;
-
- movq mm7, mm3 ;
- packuswb mm7, mm4 ;
-
- add rsi, rdx ; next line
-.next_row_8x8:
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movq mm4, mm3 ; make a copy of current line
-
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, mm1 ;
- pmullw mm4, mm1 ;
-
- movq mm5, [rsi+1] ;
- movq mm6, mm5 ;
-
- punpcklbw mm5, mm0 ;
- punpckhbw mm6, mm0 ;
-
- pmullw mm5, mm2 ;
- pmullw mm6, mm2 ;
-
- paddw mm3, mm5 ;
- paddw mm4, mm6 ;
-
- movq mm5, mm7 ;
- movq mm6, mm7 ;
-
- punpcklbw mm5, mm0 ;
- punpckhbw mm6, mm0
-
- pmullw mm5, [rax] ;
- pmullw mm6, [rax] ;
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw mm4, [GLOBAL(rd)] ;
- psraw mm4, VP9_FILTER_SHIFT ;
-
- movq mm7, mm3 ;
- packuswb mm7, mm4 ;
-
-
- pmullw mm3, [rax+16] ;
- pmullw mm4, [rax+16] ;
-
- paddw mm3, mm5 ;
- paddw mm4, mm6 ;
-
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw mm4, [GLOBAL(rd)] ;
- psraw mm4, VP9_FILTER_SHIFT ;
-
- packuswb mm3, mm4
-
- movq [rdi], mm3 ; store the results in the destination
-
-%if ABI_IS_32BIT
- add rsi, rdx ; next line
- add rdi, dword ptr arg(5) ;dst_pitch ;
-%else
- movsxd r8, dword ptr arg(5) ;dst_pitch
- add rsi, rdx ; next line
- add rdi, r8 ;dst_pitch
-%endif
- cmp rdi, rcx ;
- jne .next_row_8x8
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void bilinear_predict8x4_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-global sym(vp9_bilinear_predict8x4_mmx)
-sym(vp9_bilinear_predict8x4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
-
- movsxd rax, dword ptr arg(2) ;xoffset
- mov rdi, arg(4) ;dst_ptr ;
-
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
- shl rax, 5
-
- mov rsi, arg(0) ;src_ptr ;
- add rax, rcx
-
- movsxd rdx, dword ptr arg(5) ;dst_pitch
- movq mm1, [rax] ;
-
- movq mm2, [rax+16] ;
- movsxd rax, dword ptr arg(3) ;yoffset
-
- pxor mm0, mm0 ;
- shl rax, 5
-
- add rax, rcx
- lea rcx, [rdi+rdx*4] ;
-
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
-
- ; get the first horizontal line done ;
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movq mm4, mm3 ; make a copy of current line
-
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, mm1 ;
- pmullw mm4, mm1 ;
-
- movq mm5, [rsi+1] ;
- movq mm6, mm5 ;
-
- punpcklbw mm5, mm0 ;
- punpckhbw mm6, mm0 ;
-
- pmullw mm5, mm2 ;
- pmullw mm6, mm2 ;
-
- paddw mm3, mm5 ;
- paddw mm4, mm6 ;
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw mm4, [GLOBAL(rd)] ;
- psraw mm4, VP9_FILTER_SHIFT ;
-
- movq mm7, mm3 ;
- packuswb mm7, mm4 ;
-
- add rsi, rdx ; next line
-.next_row_8x4:
- movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movq mm4, mm3 ; make a copy of current line
-
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, mm1 ;
- pmullw mm4, mm1 ;
-
- movq mm5, [rsi+1] ;
- movq mm6, mm5 ;
-
- punpcklbw mm5, mm0 ;
- punpckhbw mm6, mm0 ;
-
- pmullw mm5, mm2 ;
- pmullw mm6, mm2 ;
-
- paddw mm3, mm5 ;
- paddw mm4, mm6 ;
-
- movq mm5, mm7 ;
- movq mm6, mm7 ;
-
- punpcklbw mm5, mm0 ;
- punpckhbw mm6, mm0
-
- pmullw mm5, [rax] ;
- pmullw mm6, [rax] ;
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw mm4, [GLOBAL(rd)] ;
- psraw mm4, VP9_FILTER_SHIFT ;
-
- movq mm7, mm3 ;
- packuswb mm7, mm4 ;
-
-
- pmullw mm3, [rax+16] ;
- pmullw mm4, [rax+16] ;
-
- paddw mm3, mm5 ;
- paddw mm4, mm6 ;
-
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw mm4, [GLOBAL(rd)] ;
- psraw mm4, VP9_FILTER_SHIFT ;
-
- packuswb mm3, mm4
-
- movq [rdi], mm3 ; store the results in the destination
-
-%if ABI_IS_32BIT
- add rsi, rdx ; next line
- add rdi, dword ptr arg(5) ;dst_pitch ;
-%else
- movsxd r8, dword ptr arg(5) ;dst_pitch
- add rsi, rdx ; next line
- add rdi, r8
-%endif
- cmp rdi, rcx ;
- jne .next_row_8x4
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void bilinear_predict4x4_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-global sym(vp9_bilinear_predict4x4_mmx)
-sym(vp9_bilinear_predict4x4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ;const short *HFilter = bilinear_filters_mmx[xoffset];
- ;const short *VFilter = bilinear_filters_mmx[yoffset];
-
- movsxd rax, dword ptr arg(2) ;xoffset
- mov rdi, arg(4) ;dst_ptr ;
-
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))]
- shl rax, 5
-
- add rax, rcx ; HFilter
- mov rsi, arg(0) ;src_ptr ;
-
- movsxd rdx, dword ptr arg(5) ;ldst_pitch
- movq mm1, [rax] ;
-
- movq mm2, [rax+16] ;
- movsxd rax, dword ptr arg(3) ;yoffset
-
- pxor mm0, mm0 ;
- shl rax, 5
-
- add rax, rcx
- lea rcx, [rdi+rdx*4] ;
-
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
-
- ; get the first horizontal line done ;
- movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
-
- pmullw mm3, mm1 ;
- movd mm5, [rsi+1] ;
-
- punpcklbw mm5, mm0 ;
- pmullw mm5, mm2 ;
-
- paddw mm3, mm5 ;
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
-
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- movq mm7, mm3 ;
- packuswb mm7, mm0 ;
-
- add rsi, rdx ; next line
-.next_row_4x4:
- movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
-
- pmullw mm3, mm1 ;
- movd mm5, [rsi+1] ;
-
- punpcklbw mm5, mm0 ;
- pmullw mm5, mm2 ;
-
- paddw mm3, mm5 ;
-
- movq mm5, mm7 ;
- punpcklbw mm5, mm0 ;
-
- pmullw mm5, [rax] ;
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
-
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
- movq mm7, mm3 ;
-
- packuswb mm7, mm0 ;
-
- pmullw mm3, [rax+16] ;
- paddw mm3, mm5 ;
-
-
- paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- packuswb mm3, mm0
- movd [rdi], mm3 ; store the results in the destination
-
-%if ABI_IS_32BIT
- add rsi, rdx ; next line
- add rdi, dword ptr arg(5) ;dst_pitch ;
-%else
- movsxd r8, dword ptr arg(5) ;dst_pitch ;
- add rsi, rdx ; next line
- add rdi, r8
-%endif
-
- cmp rdi, rcx ;
- jne .next_row_4x4
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-
SECTION_RODATA
align 16
rd:
@@ -698,30 +266,3 @@
times 8 dw -6
times 8 dw 0
-
-align 16
-global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx))
-sym(vp9_bilinear_filters_8x_mmx):
- times 8 dw 128
- times 8 dw 0
-
- times 8 dw 112
- times 8 dw 16
-
- times 8 dw 96
- times 8 dw 32
-
- times 8 dw 80
- times 8 dw 48
-
- times 8 dw 64
- times 8 dw 64
-
- times 8 dw 48
- times 8 dw 80
-
- times 8 dw 32
- times 8 dw 96
-
- times 8 dw 16
- times 8 dw 112
--- a/vp9/common/x86/vp9_subpixel_x86.h
+++ b/vp9/common/x86/vp9_subpixel_x86.h
@@ -25,11 +25,7 @@
extern prototype_subpixel_predict(vp9_sixtap_predict8x4_mmx);
extern prototype_subpixel_predict(vp9_sixtap_predict4x4_mmx);
extern prototype_subpixel_predict(vp9_bilinear_predict16x16_mmx);
-extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
-extern prototype_subpixel_predict(vp9_bilinear_predict8x4_mmx);
-extern prototype_subpixel_predict(vp9_bilinear_predict4x4_mmx);
-
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp9_subpix_sixtap16x16
#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_mmx
@@ -45,15 +41,6 @@
#undef vp9_subpix_bilinear16x16
#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_mmx
-
-#undef vp9_subpix_bilinear8x8
-#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_mmx
-
-#undef vp9_subpix_bilinear8x4
-#define vp9_subpix_bilinear8x4 vp9_bilinear_predict8x4_mmx
-
-#undef vp9_subpix_bilinear4x4
-#define vp9_subpix_bilinear4x4 vp9_bilinear_predict4x4_mmx
#endif
#endif
--
⑨