ref: 15b261d85433f0c9169e45c27a2cd6308de94106
parent: 3b85a5beb7fedba88540d74fd7fe8f89b2f578cb
parent: cb9f4dc1056b39383595f658cfcd166833bc0097
author: Scott LaVarnway <slavarnway@google.com>
date: Wed Jan 24 17:54:42 EST 2018
Merge "BUG FIX: sse2 subpel variance is not PIC compliant"
--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -91,7 +91,7 @@
%define filter_idx_shift 5
-%ifdef PIC ; 64bit PIC
+%if ARCH_X86_64
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, \
@@ -99,19 +99,20 @@
sec, sec_stride, height, sse
%define sec_str sec_strideq
%else
- cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse
+ cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, height, sse
%endif
%define block_height heightd
%define bilin_filter sseq
%else
- %if ARCH_X86=1 && CONFIG_PIC=1
+ %if CONFIG_PIC=1
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse, g_bilin_filter, g_pw_8
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, height, sse, \
+ g_bilin_filter, g_pw_8
%define block_height dword heightm
%define sec_str sec_stridemp
@@ -130,8 +131,9 @@
LOAD_IF_USED 0, 1 ; load eax, ecx back
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, height, \
- sse, g_bilin_filter, g_pw_8
+ x_offset, y_offset, \
+ dst, dst_stride, height, sse, \
+ g_bilin_filter, g_pw_8
%define block_height heightd
; Store bilin_filter and pw_8 location in stack
@@ -150,22 +152,16 @@
%endif
%else
%if %2 == 1 ; avg
- cglobal highbd_sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
- 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse
- %if ARCH_X86_64
- %define block_height heightd
- %define sec_str sec_strideq
- %else
+ cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, height, sse
%define block_height dword heightm
%define sec_str sec_stridemp
- %endif
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, height, sse
+ x_offset, y_offset, \
+ dst, dst_stride, height, sse
%define block_height heightd
%endif
@@ -284,14 +280,14 @@
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -308,7 +304,7 @@
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -511,14 +507,14 @@
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -535,7 +531,7 @@
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -633,14 +629,14 @@
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -657,7 +653,7 @@
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -732,14 +728,14 @@
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -756,7 +752,7 @@
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -859,8 +855,8 @@
.x_nonhalf_y_nonhalf:
; loading filter - this is same as in 8-bit depth
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
shl y_offsetd, filter_idx_shift
@@ -869,7 +865,7 @@
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [bilin_filter+y_offsetq]
mova m11, [bilin_filter+y_offsetq+16]
- mova m12, [pw_8]
+ mova m12, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_y_a m10
@@ -897,7 +893,7 @@
%define filter_x_b [x_offsetq+16]
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
; end of load filter
--- a/vpx_dsp/x86/subpel_variance_sse2.asm
+++ b/vpx_dsp/x86/subpel_variance_sse2.asm
@@ -114,27 +114,26 @@
; 11, not 13, if the registers are ordered correctly. May make a minor speed
; difference on Win64
-%ifdef PIC ; 64bit PIC
+%if ARCH_X86_64
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
+ x_offset, y_offset, dst, dst_stride, \
+ sec, sec_stride, height, sse
%define sec_str sec_strideq
%else
- cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse
+ cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
+ x_offset, y_offset, dst, dst_stride, \
+ height, sse
%endif
%define block_height heightd
%define bilin_filter sseq
%else
- %if ARCH_X86=1 && CONFIG_PIC=1
+ %if CONFIG_PIC=1
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse, g_bilin_filter, g_pw_8
+ x_offset, y_offset, dst, dst_stride, \
+ sec, sec_stride, height, sse, \
+ g_bilin_filter, g_pw_8
%define block_height dword heightm
%define sec_str sec_stridemp
@@ -152,9 +151,9 @@
LOAD_IF_USED 0, 1 ; load eax, ecx back
%else
- cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse, \
- g_bilin_filter, g_pw_8
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, dst, dst_stride, \
+ height, sse, g_bilin_filter, g_pw_8
%define block_height heightd
;Store bilin_filter and pw_8 location in stack
@@ -173,25 +172,18 @@
%endif
%else
%if %2 == 1 ; avg
- cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
- 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse
- %if ARCH_X86_64
- %define block_height heightd
- %define sec_str sec_strideq
- %else
+ cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, sec, sec_stride, \
+ height, sse
%define block_height dword heightm
%define sec_str sec_stridemp
- %endif
%else
- cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, dst, dst_stride, \
+ height, sse
%define block_height heightd
%endif
-
%define bilin_filter bilin_filter_m
%endif
%endif
@@ -371,8 +363,8 @@
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -380,7 +372,7 @@
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -397,7 +389,7 @@
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -694,8 +686,8 @@
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -703,7 +695,7 @@
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -720,7 +712,7 @@
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -852,8 +844,8 @@
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -861,7 +853,7 @@
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -878,7 +870,7 @@
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -994,8 +986,8 @@
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -1003,7 +995,7 @@
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -1020,7 +1012,7 @@
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -1192,8 +1184,8 @@
STORE_AND_RET %1
.x_nonhalf_y_nonhalf:
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
shl y_offsetd, filter_idx_shift
@@ -1206,7 +1198,7 @@
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m11, [bilin_filter+y_offsetq+16]
%endif
- mova m12, [pw_8]
+ mova m12, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_y_a m10
@@ -1234,7 +1226,7 @@
%define filter_x_b [x_offsetq+16]
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif