ref: 282087a14c84a65f10b3a8d0e81c255e13a7a746
parent: ff0b9bb0541c78046df5fed1a145d2a7a3113c54
parent: a93705f7f9c15cdc2a1e62f6142e99f794923826
author: Johann Koenig <johannkoenig@google.com>
date: Thu Sep 20 17:56:21 EDT 2018
Merge "sanitizer: fix unaligned load/stores"
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -13,6 +13,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/emmintrin_compat.h"
+#include "vpx_dsp/x86/mem_sse2.h"
static INLINE __m128i abs_diff(__m128i a, __m128i b) {
return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
@@ -212,21 +213,21 @@
// 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0);
- *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ storeu_uint32(s + 0 * p - 2, _mm_cvtsi128_si32(ps1ps0));
ps1ps0 = _mm_srli_si128(ps1ps0, 4);
- *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ storeu_uint32(s + 1 * p - 2, _mm_cvtsi128_si32(ps1ps0));
ps1ps0 = _mm_srli_si128(ps1ps0, 4);
- *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ storeu_uint32(s + 2 * p - 2, _mm_cvtsi128_si32(ps1ps0));
ps1ps0 = _mm_srli_si128(ps1ps0, 4);
- *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ storeu_uint32(s + 3 * p - 2, _mm_cvtsi128_si32(ps1ps0));
- *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ storeu_uint32(s + 4 * p - 2, _mm_cvtsi128_si32(qs1qs0));
qs1qs0 = _mm_srli_si128(qs1qs0, 4);
- *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ storeu_uint32(s + 5 * p - 2, _mm_cvtsi128_si32(qs1qs0));
qs1qs0 = _mm_srli_si128(qs1qs0, 4);
- *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ storeu_uint32(s + 6 * p - 2, _mm_cvtsi128_si32(qs1qs0));
qs1qs0 = _mm_srli_si128(qs1qs0, 4);
- *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ storeu_uint32(s + 7 * p - 2, _mm_cvtsi128_si32(qs1qs0));
}
void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
--- a/vpx_dsp/x86/mem_sse2.h
+++ b/vpx_dsp/x86/mem_sse2.h
@@ -12,8 +12,19 @@
#define VPX_VPX_DSP_X86_MEM_SSE2_H_
#include <emmintrin.h> // SSE2
+#include <string.h>
#include "./vpx_config.h"
+
+static INLINE void storeu_uint32(void *dst, uint32_t v) {
+ memcpy(dst, &v, sizeof(v));
+}
+
+static INLINE uint32_t loadu_uint32(const void *src) {
+ uint32_t v;
+ memcpy(&v, src, sizeof(v));
+ return v;
+}
static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) {
return _mm_castps_si128(
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
+#include "vpx_dsp/x86/mem_sse2.h"
static INLINE unsigned int add32x4_sse2(__m128i val) {
val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
@@ -35,8 +36,8 @@
}
static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
- const __m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 0 * stride));
- const __m128i p1 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 1 * stride));
+ const __m128i p0 = _mm_cvtsi32_si128(loadu_uint32(p + 0 * stride));
+ const __m128i p1 = _mm_cvtsi32_si128(loadu_uint32(p + 1 * stride));
const __m128i p01 = _mm_unpacklo_epi32(p0, p1);
return _mm_unpacklo_epi8(p01, _mm_setzero_si128());
}