ref: 69dae683fdce08dabf406787cdd8d1f76a9f0437
parent: a9315f5fde02530f64358375c3d2444a506b3a58
author: Henrik Gramner <gramner@twoorioles.com>
date: Thu Sep 5 18:50:28 EDT 2019
x86: Fix buffer overead in mc put For w <= 32 we can't process more than two rows per loop iteration. Credit to OSS-Fuzz.
--- a/src/x86/mc.asm
+++ b/src/x86/mc.asm
@@ -170,8 +170,6 @@
.put:
movzx wd, word [t2+wq*2+table_offset(put,)]
add wq, t2
- lea t1, [ssq*3]
- lea t2, [dsq*3]
jmp wq
.put_w2:
movzx t0d, word [srcq+ssq*0]
@@ -194,11 +192,11 @@
jg .put_w4
RET
.put_w8:
- movq m0, [srcq+ssq*0]
- movq m1, [srcq+ssq*1]
+ mov t0, [srcq+ssq*0]
+ mov t1, [srcq+ssq*1]
lea srcq, [srcq+ssq*2]
- movq [dstq+dsq*0], m0
- movq [dstq+dsq*1], m1
+ mov [dstq+dsq*0], t0
+ mov [dstq+dsq*1], t1
lea dstq, [dstq+dsq*2]
sub hd, 2
jg .put_w8
@@ -206,15 +204,11 @@
.put_w16:
movu m0, [srcq+ssq*0]
movu m1, [srcq+ssq*1]
- movu m2, [srcq+ssq*2]
- movu m3, [srcq+t1 ]
- lea srcq, [srcq+ssq*4]
+ lea srcq, [srcq+ssq*2]
mova [dstq+dsq*0], m0
mova [dstq+dsq*1], m1
- mova [dstq+dsq*2], m2
- mova [dstq+t2 ], m3
- lea dstq, [dstq+dsq*4]
- sub hd, 4
+ lea dstq, [dstq+dsq*2]
+ sub hd, 2
jg .put_w16
RET
INIT_YMM avx2
@@ -221,15 +215,11 @@
.put_w32:
movu m0, [srcq+ssq*0]
movu m1, [srcq+ssq*1]
- movu m2, [srcq+ssq*2]
- movu m3, [srcq+t1 ]
- lea srcq, [srcq+ssq*4]
+ lea srcq, [srcq+ssq*2]
mova [dstq+dsq*0], m0
mova [dstq+dsq*1], m1
- mova [dstq+dsq*2], m2
- mova [dstq+t2 ], m3
- lea dstq, [dstq+dsq*4]
- sub hd, 4
+ lea dstq, [dstq+dsq*2]
+ sub hd, 2
jg .put_w32
RET
.put_w64:
--- a/src/x86/mc_ssse3.asm
+++ b/src/x86/mc_ssse3.asm
@@ -177,7 +177,6 @@
.put:
movzx wd, word [t0+wq*2+table_offset(put,)]
add wq, t0
- lea r6, [ssq*3]
RESTORE_DSQ_32 t0
jmp wq
.put_w2:
@@ -211,20 +210,14 @@
jg .put_w8
RET
.put_w16:
- lea r4, [dsq*3]
-.put_w16_in:
movu m0, [srcq+ssq*0]
movu m1, [srcq+ssq*1]
- movu m2, [srcq+ssq*2]
- movu m3, [srcq+r6 ]
- lea srcq, [srcq+ssq*4]
+ lea srcq, [srcq+ssq*2]
mova [dstq+dsq*0], m0
mova [dstq+dsq*1], m1
- mova [dstq+dsq*2], m2
- mova [dstq+r4 ], m3
- lea dstq, [dstq+dsq*4]
- sub hd, 4
- jg .put_w16_in
+ lea dstq, [dstq+dsq*2]
+ sub hd, 2
+ jg .put_w16
RET
.put_w32:
movu m0, [srcq+ssq*0+16*0]