ref: 4dd943156dba5e182cee8d5fda764ab1c8537eae
parent: e308ae49b39502ee871c0df8b64fb79561ce17c5
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Tue Mar 31 06:26:08 EDT 2020
x86: don't use vptest in SSSE3 version This is the VEX (AVX) encoded variant for the SSE4 instruction ptest, so emulate it using pmovmskb in the SSSE3 version.
--- a/src/x86/mc_ssse3.asm
+++ b/src/x86/mc_ssse3.asm
@@ -5212,8 +5212,16 @@
; if no emulation is required, we don't need to shuffle or emulate edges
; this also saves 2 quasi-vpgatherdqs
- vptest m3, m3
- jz .filter
+ pxor m6, m6
+ pcmpeqb m6, m3
+%if ARCH_X86_64
+ pmovmskb r8d, m6
+ cmp r8d, 0xffff
+%else
+ pmovmskb r3d, m6
+ cmp r3d, 0xffff
+%endif
+ je .filter
%if ARCH_X86_64
movd r8d, xm3