ref: b79f25b54635294508fca42e3766b805fb4fcf4e
parent: a4f5a742887a1389c518de4a07da625ce7b52bb5
parent: c2f82351e45ee51a87f0d4d3badcdc1b99a5fd50
author: James Zern <jzern@google.com>
date: Mon Jul 20 19:44:08 EDT 2020
Merge "vp8,vpx_dsp: [loongson] fix msa optimization bugs"
--- a/vp8/common/mips/msa/vp8_macros_msa.h
+++ b/vp8/common/mips/msa/vp8_macros_msa.h
@@ -122,11 +122,11 @@
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
uint32_t val_m; \
\
- asm volatile("ulw %[val_m], %[psrc_m] \n\t" \
+ asm volatile("lwr %[val_m], 0(%[psrc_m]) \n\t" \
+ "lwl %[val_m], 3(%[psrc_m]) \n\t" \
+ : [val_m] "=&r"(val_m) \
+ : [psrc_m] "r"(psrc_m)); \
\
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
val_m; \
})
@@ -136,10 +136,10 @@
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
uint64_t val_m = 0; \
\
- asm volatile("uld %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
+ asm volatile("ldr %[val_m], 0(%[psrc_m]) \n\t" \
+ "ldl %[val_m], 7(%[psrc_m]) \n\t" \
+ : [val_m] "=&r"(val_m) \
+ : [psrc_m] "r"(psrc_m)); \
\
val_m; \
})
--- a/vpx_dsp/mips/macros_msa.h
+++ b/vpx_dsp/mips/macros_msa.h
@@ -88,11 +88,11 @@
const uint8_t *psrc_lw_m = (const uint8_t *)(psrc); \
uint32_t val_lw_m; \
\
- __asm__ __volatile__("ulw %[val_lw_m], %[psrc_lw_m] \n\t" \
+ __asm__ __volatile__("lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
+ "lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
+ : [val_lw_m] "=&r"(val_lw_m) \
+ : [psrc_lw_m] "r"(psrc_lw_m)); \
\
- : [val_lw_m] "=r"(val_lw_m) \
- : [psrc_lw_m] "m"(*psrc_lw_m)); \
- \
val_lw_m; \
})
@@ -102,10 +102,10 @@
const uint8_t *psrc_ld_m = (const uint8_t *)(psrc); \
uint64_t val_ld_m = 0; \
\
- __asm__ __volatile__("uld %[val_ld_m], %[psrc_ld_m] \n\t" \
- \
- : [val_ld_m] "=r"(val_ld_m) \
- : [psrc_ld_m] "m"(*psrc_ld_m)); \
+ __asm__ __volatile__("ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
+ "ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
+ : [val_ld_m] "=&r"(val_ld_m) \
+ : [psrc_ld_m] "r"(psrc_ld_m)); \
\
val_ld_m; \
})