ref: c8e1a41c298f89d054757bcfad09195eac8b2a6b
parent: 6cb48fc54709b7e9a72b3218a17958c3787c10bf
	author: Licai Guo <guolicai@gmail.com>
	date: Thu Apr 17 06:06:48 EDT 2014
	
Move copy_mb neon code to common folder
--- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj
@@ -16,6 +16,7 @@
 		4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C618D96EA600DFA14A /* deblocking_common.cpp */; }; 		4C3406D018D96EA600DFA14A /* logging.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C718D96EA600DFA14A /* logging.cpp */; }; 		4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */; };+		4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */; }; 		4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; }; 		4CE443E718B722CD0017DF25 /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443E618B722CD0017DF25 /* XCTest.framework */; }; 		4CE443E818B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };@@ -71,6 +72,7 @@
 		4C3406C618D96EA600DFA14A /* deblocking_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deblocking_common.cpp; sourceTree = "<group>"; }; 		4C3406C718D96EA600DFA14A /* logging.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = logging.cpp; sourceTree = "<group>"; }; 		4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WelsThreadLib.cpp; sourceTree = "<group>"; };+		4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = copy_mb_neon.S; sourceTree = "<group>"; }; 		4CE443D518B722CD0017DF25 /* libcommon.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libcommon.a; sourceTree = BUILT_PRODUCTS_DIR; }; 		4CE443D818B722CD0017DF25 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; 		4CE443E518B722CD0017DF25 /* commonTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = commonTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };@@ -111,6 +113,7 @@
 		4C3406B118D96EA600DFA14A /* arm */ = {isa = PBXGroup;
children = (
+ 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */,
4C3406B218D96EA600DFA14A /* arm_arch_common_macro.S */,
4C3406B318D96EA600DFA14A /* deblocking_neon.S */,
4C3406B418D96EA600DFA14A /* expand_picture_neon.S */,
@@ -300,6 +303,7 @@
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */,
4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */,
+ 4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */,
4C3406CD18D96EA600DFA14A /* cpu.cpp in Sources */,
4C3406CA18D96EA600DFA14A /* deblocking_neon.S in Sources */,
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */,
--- /dev/null
+++ b/codec/common/arm/copy_mb_neon.S
@@ -1,0 +1,201 @@
+/*!
+ * \copy
+ * Copyright (c) 2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef HAVE_NEON
+.text
+#include "arm_arch_common_macro.S"
+
+#ifdef __APPLE__
+.macro LOAD_ALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, src*, src_stride+    vld1.64	{$0}, [$4,:128], $5+    vld1.64	{$1}, [$4,:128], $5+    vld1.64	{$2}, [$4,:128], $5+    vld1.64	{$3}, [$4,:128], $5+// }
+.endm
+
+.macro STORE_ALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, dst*, dst_stride+    vst1.64	{$0}, [$4,:128], $5+    vst1.64	{$1}, [$4,:128], $5+    vst1.64	{$2}, [$4,:128], $5+    vst1.64	{$3}, [$4,:128], $5+// }
+.endm
+
+.macro LOAD_UNALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, src*, src_stride+    vld1.64	{$0}, [$4], $5+    vld1.64	{$1}, [$4], $5+    vld1.64	{$2}, [$4], $5+    vld1.64	{$3}, [$4], $5+// }
+.endm
+
+.macro STORE_UNALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, dst*, dst_stride+    vst1.64	{$0}, [$4], $5+    vst1.64	{$1}, [$4], $5+    vst1.64	{$2}, [$4], $5+    vst1.64	{$3}, [$4], $5+// }
+.endm
+#else
+.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, src*, src_stride+    vld1.64	{\arg0}, [\arg4,:128], \arg5+    vld1.64	{\arg1}, [\arg4,:128], \arg5+    vld1.64	{\arg2}, [\arg4,:128], \arg5+    vld1.64	{\arg3}, [\arg4,:128], \arg5+// }
+.endm
+
+.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, dst*, dst_stride+    vst1.64	{\arg0}, [\arg4,:128], \arg5+    vst1.64	{\arg1}, [\arg4,:128], \arg5+    vst1.64	{\arg2}, [\arg4,:128], \arg5+    vst1.64	{\arg3}, [\arg4,:128], \arg5+// }
+.endm
+
+.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, src*, src_stride+    vld1.64	{\arg0}, [\arg4], \arg5+    vld1.64	{\arg1}, [\arg4], \arg5+    vld1.64	{\arg2}, [\arg4], \arg5+    vld1.64	{\arg3}, [\arg4], \arg5+// }
+.endm
+
+.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, dst*, dst_stride+    vst1.64	{\arg0}, [\arg4], \arg5+    vst1.64	{\arg1}, [\arg4], \arg5+    vst1.64	{\arg2}, [\arg4], \arg5+    vst1.64	{\arg3}, [\arg4], \arg5+// }
+.endm
+
+#endif
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
+
+WELS_ASM_FUNC_END
+
+#endif
--- a/codec/encoder/core/arm/reconstruct_neon.S
+++ b/codec/encoder/core/arm/reconstruct_neon.S
@@ -35,42 +35,6 @@
#include "arm_arch_common_macro.S"
#ifdef __APPLE__
-.macro LOAD_ALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, src*, src_stride-    vld1.64	{$0}, [$4,:128], $5-    vld1.64	{$1}, [$4,:128], $5-    vld1.64	{$2}, [$4,:128], $5-    vld1.64	{$3}, [$4,:128], $5-// }
-.endm
-
-.macro STORE_ALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, dst*, dst_stride-    vst1.64	{$0}, [$4,:128], $5-    vst1.64	{$1}, [$4,:128], $5-    vst1.64	{$2}, [$4,:128], $5-    vst1.64	{$3}, [$4,:128], $5-// }
-.endm
-
-.macro LOAD_UNALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, src*, src_stride-    vld1.64	{$0}, [$4], $5-    vld1.64	{$1}, [$4], $5-    vld1.64	{$2}, [$4], $5-    vld1.64	{$3}, [$4], $5-// }
-.endm
-
-.macro STORE_UNALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, dst*, dst_stride-    vst1.64	{$0}, [$4], $5-    vst1.64	{$1}, [$4], $5-    vst1.64	{$2}, [$4], $5-    vst1.64	{$3}, [$4], $5-// }
-.endm
-
.macro LOAD_4x4_DATA_FOR_DCT
 //	{	//	input: $0~$3, src1*, src1_stride, src2*, src2_stride     vld2.16	{$0[0],$1[0]}, [$4], $5@@ -315,42 +279,6 @@
// }
.endm
#else
-.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, src*, src_stride-    vld1.64	{\arg0}, [\arg4,:128], \arg5-    vld1.64	{\arg1}, [\arg4,:128], \arg5-    vld1.64	{\arg2}, [\arg4,:128], \arg5-    vld1.64	{\arg3}, [\arg4,:128], \arg5-// }
-.endm
-
-.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, dst*, dst_stride-    vst1.64	{\arg0}, [\arg4,:128], \arg5-    vst1.64	{\arg1}, [\arg4,:128], \arg5-    vst1.64	{\arg2}, [\arg4,:128], \arg5-    vst1.64	{\arg3}, [\arg4,:128], \arg5-// }
-.endm
-
-.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, src*, src_stride-    vld1.64	{\arg0}, [\arg4], \arg5-    vld1.64	{\arg1}, [\arg4], \arg5-    vld1.64	{\arg2}, [\arg4], \arg5-    vld1.64	{\arg3}, [\arg4], \arg5-// }
-.endm
-
-.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, dst*, dst_stride-    vst1.64	{\arg0}, [\arg4], \arg5-    vst1.64	{\arg1}, [\arg4], \arg5-    vst1.64	{\arg2}, [\arg4], \arg5-    vst1.64	{\arg3}, [\arg4], \arg5-// }
-.endm
-
.macro LOAD_4x4_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
 //	{	//	input: \arg0~\arg3, src1*, src1_stride, src2*, src2_stride     vld2.16	{\arg0[0],\arg1[0]}, [\arg4], \arg5@@ -595,96 +523,6 @@
// }
.endm
#endif
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
-
-WELS_ASM_FUNC_END
-
WELS_ASM_FUNC_BEGIN WelsDctT4_neon
--
⑨