shithub: libvpx

Download patch

ref: b79f25b54635294508fca42e3766b805fb4fcf4e
parent: a4f5a742887a1389c518de4a07da625ce7b52bb5
parent: c2f82351e45ee51a87f0d4d3badcdc1b99a5fd50
author: James Zern <jzern@google.com>
date: Mon Jul 20 19:44:08 EDT 2020

Merge "vp8,vpx_dsp: [loongson] fix msa optimization bugs"

--- a/vp8/common/mips/msa/vp8_macros_msa.h
+++ b/vp8/common/mips/msa/vp8_macros_msa.h
@@ -122,11 +122,11 @@
     const uint8_t *psrc_m = (const uint8_t *)(psrc); \
     uint32_t val_m;                                  \
                                                      \
-    asm volatile("ulw  %[val_m],  %[psrc_m]  \n\t"   \
+    asm volatile("lwr %[val_m], 0(%[psrc_m]) \n\t"   \
+                 "lwl %[val_m], 3(%[psrc_m]) \n\t"   \
+                 : [val_m] "=&r"(val_m)              \
+                 : [psrc_m] "r"(psrc_m));            \
                                                      \
-                 : [val_m] "=r"(val_m)               \
-                 : [psrc_m] "m"(*psrc_m));           \
-                                                     \
     val_m;                                           \
   })
 
@@ -136,10 +136,10 @@
     const uint8_t *psrc_m = (const uint8_t *)(psrc); \
     uint64_t val_m = 0;                              \
                                                      \
-    asm volatile("uld  %[val_m],  %[psrc_m]  \n\t"   \
-                                                     \
-                 : [val_m] "=r"(val_m)               \
-                 : [psrc_m] "m"(*psrc_m));           \
+    asm volatile("ldr %[val_m], 0(%[psrc_m]) \n\t"   \
+                 "ldl %[val_m], 7(%[psrc_m]) \n\t"   \
+                 : [val_m] "=&r"(val_m)              \
+                 : [psrc_m] "r"(psrc_m));            \
                                                      \
     val_m;                                           \
   })
--- a/vpx_dsp/mips/macros_msa.h
+++ b/vpx_dsp/mips/macros_msa.h
@@ -88,11 +88,11 @@
     const uint8_t *psrc_lw_m = (const uint8_t *)(psrc);          \
     uint32_t val_lw_m;                                           \
                                                                  \
-    __asm__ __volatile__("ulw  %[val_lw_m],  %[psrc_lw_m]  \n\t" \
+    __asm__ __volatile__("lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
+                         "lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
+                         : [val_lw_m] "=&r"(val_lw_m)            \
+                         : [psrc_lw_m] "r"(psrc_lw_m));          \
                                                                  \
-                         : [val_lw_m] "=r"(val_lw_m)             \
-                         : [psrc_lw_m] "m"(*psrc_lw_m));         \
-                                                                 \
     val_lw_m;                                                    \
   })
 
@@ -102,10 +102,10 @@
     const uint8_t *psrc_ld_m = (const uint8_t *)(psrc);          \
     uint64_t val_ld_m = 0;                                       \
                                                                  \
-    __asm__ __volatile__("uld  %[val_ld_m],  %[psrc_ld_m]  \n\t" \
-                                                                 \
-                         : [val_ld_m] "=r"(val_ld_m)             \
-                         : [psrc_ld_m] "m"(*psrc_ld_m));         \
+    __asm__ __volatile__("ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
+                         "ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
+                         : [val_ld_m] "=&r"(val_ld_m)            \
+                         : [psrc_ld_m] "r"(psrc_ld_m));          \
                                                                  \
     val_ld_m;                                                    \
   })