ref: b1b4ba1bddf7290ff3e054e545e2d47dd70688ad
parent: db60c02c9e0524b87915cc22f01c2f89c2df6f0b
author: Christian Duvivier <cduvivier@google.com>
date: Thu Sep 26 12:01:37 EDT 2013
Properly save neon registers. Replace current code which corrupts the stack by duplicate of vp8 code to save and restore neon registers. Change-Id: Ibb0220b9aa985d10533befa0a455ebce57a2891a
--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c
+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c
@@ -29,17 +29,19 @@
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void save_neon_registers();
-extern void restore_neon_registers();
+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
+extern void vp9_push_neon(int64_t *store);
+extern void vp9_pop_neon(int64_t *store);
void vp9_short_idct16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
+ int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
- save_neon_registers();
+ vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@@ -102,7 +104,7 @@
dest_stride);
// restore d8-d15 register values.
- restore_neon_registers();
+ vp9_pop_neon(store_reg);
return;
}
@@ -109,11 +111,12 @@
void vp9_short_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
+ int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
- save_neon_registers();
+ vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@@ -163,7 +166,7 @@
dest_stride);
// restore d8-d15 register values.
- restore_neon_registers();
+ vp9_pop_neon(store_reg);
return;
}
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_save_reg_neon.asm
@@ -1,0 +1,36 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp9_push_neon|
+ EXPORT |vp9_pop_neon|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+|vp9_push_neon| PROC
+ vst1.i64 {d8, d9, d10, d11}, [r0]!
+ vst1.i64 {d12, d13, d14, d15}, [r0]!
+ bx lr
+
+ ENDP
+
+|vp9_pop_neon| PROC
+ vld1.i64 {d8, d9, d10, d11}, [r0]!
+ vld1.i64 {d12, d13, d14, d15}, [r0]!
+ bx lr
+
+ ENDP
+
+ END
+
--- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
@@ -12,8 +12,6 @@
EXPORT |vp9_short_idct16x16_add_neon_pass2|
EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
- EXPORT |save_neon_registers|
- EXPORT |restore_neon_registers|
ARM
REQUIRE8
PRESERVE8
@@ -1178,14 +1176,4 @@
pop {r3-r9}
bx lr
ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
-;void |save_neon_registers|()
-|save_neon_registers| PROC
- vpush {d8-d15}
- bx lr
- ENDP ; |save_registers|
-;void |restore_neon_registers|()
-|restore_neon_registers| PROC
- vpop {d8-d15}
- bx lr
- ENDP ; |restore_registers|
END
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -108,5 +108,6 @@
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
--
⑨