ref: 4d9c8de5a5249f23346815d6af4dfdb074531eef
parent: 95ac333f3bf2a915ef2736888e695f1b284b3660
 parent: eace9b7b00bf5f87a0ace0e55ca5b13eb8c9407c
	author: zhilwang <zhilwang@cisco.com>
	date: Tue Jul  8 09:49:07 EDT 2014
	
Merge pull request #1098 from dongzha/AddCPMBARM64 add MemoryZero Arm64 code and UT
--- a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj
@@ -48,6 +48,7 @@
 		9AED665019469FC1009A3567 /* welsCodecTrace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED664C19469FC1009A3567 /* welsCodecTrace.cpp */; }; 		9AED66661946A2B3009A3567 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED66651946A2B3009A3567 /* utils.cpp */; }; 		F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */; };+		F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */; };/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@@ -157,6 +158,7 @@
 		9AED66651946A2B3009A3567 /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = utils.cpp; path = ../../../common/src/utils.cpp; sourceTree = "<group>"; }; 		9AED66671946A2C4009A3567 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utils.h; path = ../../../common/inc/utils.h; sourceTree = "<group>"; }; 		F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = reconstruct_aarch64_neon.S; path = arm64/reconstruct_aarch64_neon.S; sourceTree = "<group>"; };+		F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = memory_aarch64_neon.S; path = arm64/memory_aarch64_neon.S; sourceTree = "<group>"; };/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@@ -186,6 +188,7 @@
 		4CB8F2B219235FAC005D6386 /* arm64 */ = {isa = PBXGroup;
children = (
+ F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */,
F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */,
4C23BC5F195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S */,
4CBC1B82194ACBB400214D9E /* intra_pred_aarch64_neon.S */,
@@ -431,6 +434,7 @@
4C23BC60195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S in Sources */,
4CE4472B18BC605C0017DF25 /* wels_preprocess.cpp in Sources */,
4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
+ F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
4CE4471718BC605C0017DF25 /* mc.cpp in Sources */,
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,
--- /dev/null
+++ b/codec/encoder/core/arm64/memory_aarch64_neon.S
@@ -1,0 +1,63 @@
+/*!
+ * \copy
+ * Copyright (c) 2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef HAVE_NEON_AARCH64
+.text
+#include "arm_arch64_common_macro.S"
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon
+ eor v0.16b, v0.16b, v0.16b
+ cmp x1, #32
+ b.eq mem_zero_32_neon_start
+ b.lt mem_zero_24_neon_start
+mem_zero_loop:
+ subs x1, x1, #64
+  st1 {v0.16b}, [x0], #16+  st1 {v0.16b}, [x0], #16+  st1 {v0.16b}, [x0], #16+  st1 {v0.16b}, [x0], #16+ b.ne mem_zero_loop
+ b mem_zero_end
+
+mem_zero_32_neon_start:
+  st1 {v0.16b}, [x0], #16+  st1 {v0.16b}, [x0], #16+ b mem_zero_end
+mem_zero_24_neon_start:
+  st1 {v0.16b}, [x0], #16+  st1 {v0.8b}, [x0], #8+mem_zero_end:
+
+WELS_ASM_AARCH64_FUNC_END
+
+#endif
--- a/codec/encoder/core/inc/encoder.h
+++ b/codec/encoder/core/inc/encoder.h
@@ -129,6 +129,8 @@
void WelsPrefetchZero_mmx (int8_t const* kpDst);
#elif defined(HAVE_NEON)
void WelsSetMemZero_neon (void* pDst, int32_t iSize);
+#elif defined(HAVE_NEON_AARCH64)
+void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize);
#endif
#if defined(__cplusplus)
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -179,6 +179,14 @@
}
#endif
+#if defined(HAVE_NEON_AARCH64)
+  if (uiCpuFlag & WELS_CPU_NEON) {+ pFuncList->pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon;
+ pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon;
+ pFuncList->pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon;
+ }
+#endif
+
InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag);
/* Intra_Prediction_fn*/
--- a/codec/encoder/targets.mk
+++ b/codec/encoder/targets.mk
@@ -61,6 +61,7 @@
ENCODER_ASM_ARM64_SRCS=\
$(ENCODER_SRCDIR)/core/arm64/intra_pred_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S\
+ $(ENCODER_SRCDIR)/core/arm64/memory_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/pixel_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/reconstruct_aarch64_neon.S\
--- /dev/null
+++ b/test/encoder/EncUT_MemoryZero.cpp
@@ -1,0 +1,94 @@
+#include<gtest/gtest.h>
+#include<math.h>
+#include<stdlib.h>
+#include<time.h>
+
+#include "cpu_core.h"
+#include "cpu.h"
+#include "macros.h"
+#include "wels_func_ptr_def.h"
+#include "../../codec/encoder/core/src/encoder.cpp"
+
+using namespace WelsSVCEnc;
+#define MEMORYZEROTEST_NUM 1000
+
+TEST (SetMemZeroFunTest, WelsSetMemZero) {+ int32_t iLen =64;
+ int32_t iCpuCores = 0;
+ SWelsFuncPtrList sFuncPtrList;
+ uint32_t uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores);
+ /* Functionality utilization of CPU instructions dependency */
+ sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
+ sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
+ sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
+#if defined(X86_ASM)
+  if (uiCpuFlag & WELS_CPU_MMXEXT) {+ sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZeroSize8_mmx; // confirmed_safe_unsafe_usage
+ sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage
+ sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage
+ }
+  if (uiCpuFlag & WELS_CPU_SSE2) {+ sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZeroAligned64_sse2; // confirmed_safe_unsafe_usage
+ }
+#endif//X86_ASM
+
+#if defined(HAVE_NEON)
+  if (uiCpuFlag & WELS_CPU_NEON) {+ sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_neon;
+ sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_neon;
+ sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_neon;
+ }
+#endif
+
+#if defined(HAVE_NEON_AARCH64)
+  if (uiCpuFlag & WELS_CPU_NEON) {+ sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon;
+ sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon;
+ sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon;
+ }
+#endif
+
+ ENFORCE_STACK_ALIGN_2D (uint8_t, pInputAlign, 2, 64*101, 16)
+
+  for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {+ memset(pInputAlign[0], 255, 64*101);
+ memset(pInputAlign[1], 255, 64*101);
+ iLen = 64*(1+(rand()%100));
+ WelsSetMemZero_c(pInputAlign[0],iLen);
+ sFuncPtrList.pfSetMemZeroSize64Aligned16(pInputAlign[1],iLen);
+    for (int32_t i = 0 ; i < 64*101; i++) {+ ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+ }
+ }
+
+  for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {+ memset(pInputAlign[0], 255, 64*101);
+ memset(pInputAlign[1], 255, 64*101);
+ iLen = 64*(1+(rand()%100));
+ WelsSetMemZero_c(pInputAlign[0]+1,iLen);
+ sFuncPtrList.pfSetMemZeroSize64(pInputAlign[1]+1,iLen);
+    for (int32_t i = 0 ; i < 64*101; i++) {+ ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+ }
+ }
+
+ memset(pInputAlign[0], 255, 64*101);
+ memset(pInputAlign[1], 255, 64*101);
+ iLen = 32;
+ WelsSetMemZero_c(pInputAlign[0]+1,iLen);
+ sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
+  for (int32_t i = 0 ; i < 64*101; i++) {+ ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+ }
+
+ memset(pInputAlign[0], 255, 64*101);
+ memset(pInputAlign[1], 255, 64*101);
+ iLen = 24;
+ WelsSetMemZero_c(pInputAlign[0]+1,iLen);
+ sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
+  for (int32_t i = 0 ; i < 64*101; i++) {+ ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+ }
+}
+
+
--- a/test/encoder/targets.mk
+++ b/test/encoder/targets.mk
@@ -7,6 +7,7 @@
$(ENCODER_UNITTEST_SRCDIR)/EncUT_ExpGolomb.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_GetIntraPredictor.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\
+ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_Reconstruct.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_Sample.cpp\
--
⑨