ref: 37a6e8fac05abd8c7aa2f8c6cd9e26eceb1d8ee9
dir: /test/encoder/EncUT_SVC_me.cpp/
#include <gtest/gtest.h> #include <math.h> #include <stdlib.h> #include <time.h> #include "cpu_core.h" #include "cpu.h" #include "macros.h" #include "ls_defines.h" #include "svc_motion_estimate.h" using namespace WelsEnc; #define SVC_ME_TEST_NUM 10 static void FillWithRandomData (uint8_t* p, int32_t Len) { for (int32_t i = 0; i < Len; i++) { p[i] = rand() % 256; } } //preprocess related int32_t SumOf8x8SingleBlock_ref (uint8_t* pRef, const int32_t kiRefStride) { int32_t iSum = 0, i; for (i = 0; i < 8; i++) { iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3]; iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7]; pRef += kiRefStride; } return iSum; } int32_t SumOf16x16SingleBlock_ref (uint8_t* pRef, const int32_t kiRefStride) { int32_t iSum = 0, i; for (i = 0; i < 16; i++) { iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3]; iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7]; iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11]; iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15]; pRef += kiRefStride; } return iSum; } void SumOf8x8BlockOfFrame_ref (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) { int32_t x, y; uint8_t* pRef; uint16_t* pBuffer; int32_t iSum; for (y = 0; y < kiHeight; y++) { pRef = pRefPicture + kiRefStride * y; pBuffer = pFeatureOfBlock + kiWidth * y; for (x = 0; x < kiWidth; x++) { iSum = SumOf8x8SingleBlock_c (pRef + x, kiRefStride); pBuffer[x] = iSum; pTimesOfFeatureValue[iSum]++; } } } void SumOf16x16BlockOfFrame_ref (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) { //TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able? int32_t x, y; uint8_t* pRef; uint16_t* pBuffer; int32_t iSum; for (y = 0; y < kiHeight; y++) { pRef = pRefPicture + kiRefStride * y; pBuffer = pFeatureOfBlock + kiWidth * y; for (x = 0; x < kiWidth; x++) { iSum = SumOf16x16SingleBlock_c (pRef + x, kiRefStride); pBuffer[x] = iSum; pTimesOfFeatureValue[iSum]++; } } } void InitializeHashforFeature_ref (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) { //assign location pointer uint16_t* pBufPos = pBuf; for (int32_t i = 0 ; i < kiListSize; ++i) { pLocationOfFeature[i] = pFeatureValuePointerList[i] = pBufPos; pBufPos += (pTimesOfFeatureValue[i] << 1); } } void FillQpelLocationByFeatureValue_ref (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList) { //assign each pixel's position uint16_t* pSrcPointer = pFeatureOfBlock; int32_t iQpelY = 0; for (int32_t y = 0; y < kiHeight; y++) { for (int32_t x = 0; x < kiWidth; x++) { uint16_t uiFeature = pSrcPointer[x]; pFeatureValuePointerList[uiFeature][0] = x << 2; pFeatureValuePointerList[uiFeature][1] = iQpelY; pFeatureValuePointerList[uiFeature] += 2; } iQpelY += 4; pSrcPointer += kiWidth; } } #define GENERATE_SumOfSingleBlock(anchor, method, flag) \ TEST (SVC_ME_FunTest, method) {\ uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \ if ((uiCPUFlags & flag) == 0 && flag != 0) \ return; \ ENFORCE_STACK_ALIGN_1D (uint8_t, uiRefBuf, 16*320, 16);\ int32_t iRes[2];\ for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) {\ FillWithRandomData (uiRefBuf,16*320);\ iRes[0] = anchor (uiRefBuf,320);\ iRes[1] = method (uiRefBuf,320);\ ASSERT_EQ (iRes[0], iRes[1]);\ }\ } GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_c, 0) GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_c, 0) #ifdef X86_ASM GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_sse2, WELS_CPU_SSE2) GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_sse2, WELS_CPU_SSE2) #endif #ifdef HAVE_NEON GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_neon, WELS_CPU_NEON) GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_neon, WELS_CPU_NEON) #endif #ifdef HAVE_NEON_AARCH64 GENERATE_SumOfSingleBlock (SumOf8x8SingleBlock_ref, SumOf8x8SingleBlock_AArch64_neon, WELS_CPU_NEON) GENERATE_SumOfSingleBlock (SumOf16x16SingleBlock_ref, SumOf16x16SingleBlock_AArch64_neon, WELS_CPU_NEON) #endif #define ENFORCE_NEW_ALIGN_1D(_tp, _nm, _nbuff, _sz, _al) \ _tp *_nbuff = new _tp[(_sz)+(_al)-1]; \ _tp *_nm = _nbuff + ((_al)-1) - (((uintptr_t)(_nbuff + ((_al)-1)) & ((_al)-1))/sizeof(_tp)); #define GENERATE_SumOfFrame(anchor, method, kiWidth, kiHeight, flag) \ TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\ uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \ if ((uiCPUFlags & flag) == 0 && flag != 0) \ return; \ ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock1, pFeatureOfBlockBuff1, (kiWidth*kiHeight), 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock2, pFeatureOfBlockBuff2, (kiWidth*kiHeight), 16) \ uint32_t pTimesOfFeatureValue[2][65536]; \ for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) {\ FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16));\ memset(pTimesOfFeatureValue[0], 0, 65536*sizeof(uint32_t)); \ memset(pTimesOfFeatureValue[1], 0, 65536*sizeof(uint32_t)); \ anchor (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock1,pTimesOfFeatureValue[0]); \ method (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock2,pTimesOfFeatureValue[1]); \ for(int32_t j=0;j<kiWidth*kiHeight;j++){\ ASSERT_EQ (pFeatureOfBlock1[j], pFeatureOfBlock2[j]);\ }\ for(int32_t j=0;j<65536;j++){\ ASSERT_EQ (pTimesOfFeatureValue[0][j], pTimesOfFeatureValue[1][j]);\ }\ }\ delete[] pRefPictureBuff; \ delete[] pFeatureOfBlockBuff1; \ delete[] pFeatureOfBlockBuff2; \ } #define GENERATE_InitializeHashforFeature(anchor, method, kiWidth, kiHeight, flag) \ TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\ uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \ if ((uiCPUFlags & flag) == 0 && flag != 0) \ return; \ ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock, pFeatureOfBlockBuff, (kiWidth*kiHeight), 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation1, pLocationBuff1, (kiWidth*kiHeight)*2, 16) \ ENFORCE_NEW_ALIGN_1D (uint32_t, pTimesOfFeatureValue, pTimesOfFeatureValueBuff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature0, pLocationFeature0Buff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature1, pLocationFeature1Buff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList0, pFeaturePointValueList0Buff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList1, pFeaturePointValueList1Buff, 65536, 16) \ for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) { \ FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)); \ memset(pTimesOfFeatureValue, 0, 65536*sizeof(uint32_t)); \ memset(pLocationFeature0, 0, 65536*sizeof(uint16_t*)); \ memset(pFeaturePointValueList0, 0, 65536*sizeof(uint16_t*)); \ memset(pLocationFeature1, 0, 65536*sizeof(uint16_t*)); \ memset(pFeaturePointValueList1, 0, 65536*sizeof(uint16_t*)); \ SumOf8x8BlockOfFrame_c (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock,pTimesOfFeatureValue); \ int32_t iActSize = 65536;\ anchor ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature0, pFeaturePointValueList0);\ method ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature1, pFeaturePointValueList1); \ for(int32_t j =0; j<65536; j++) { \ EXPECT_EQ (pLocationFeature0[j], pLocationFeature1[j]); \ EXPECT_EQ (pFeaturePointValueList0[j], pFeaturePointValueList1[j]); \ } \ } \ delete[] pRefPictureBuff; \ delete[] pFeatureOfBlockBuff; \ delete[] pLocationBuff1; \ delete[] pTimesOfFeatureValueBuff; \ delete[] pLocationFeature0Buff; \ delete[] pFeaturePointValueList0Buff; \ delete[] pLocationFeature1Buff; \ delete[] pFeaturePointValueList1Buff; \ } #define GENERATE_FillQpelLocationByFeatureValue(anchor, method, kiWidth, kiHeight, flag) \ TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\ uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \ if ((uiCPUFlags & flag) == 0 && flag != 0) \ return; \ ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock, pFeatureOfBlockBuff, (kiWidth*kiHeight), 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation1, pLocationBuff1, (kiWidth*kiHeight)*2, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation2, pLocationBuff2, (kiWidth*kiHeight)*2, 16) \ ENFORCE_NEW_ALIGN_1D (uint32_t, pTimesOfFeatureValue, pTimesOfFeatureValueBuff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature0, pLocationFeature0Buff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature1, pLocationFeature1Buff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList0, pFeaturePointValueList0Buff, 65536, 16) \ ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList1, pFeaturePointValueList1Buff, 65536, 16) \ for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) { \ FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)); \ memset(pTimesOfFeatureValue, 0, 65536*sizeof(uint32_t)); \ memset(pLocationFeature0, 0, 65536*sizeof(uint16_t*)); \ memset(pFeaturePointValueList0, 0, 65536*sizeof(uint16_t*)); \ memset(pLocationFeature1, 0, 65536*sizeof(uint16_t*)); \ memset(pFeaturePointValueList1, 0, 65536*sizeof(uint16_t*)); \ SumOf8x8BlockOfFrame_c (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock,pTimesOfFeatureValue); \ int32_t iActSize = 65536; \ InitializeHashforFeature_c ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature0, pFeaturePointValueList0); \ InitializeHashforFeature_c( pTimesOfFeatureValue, pLocation2, iActSize, pLocationFeature1, pFeaturePointValueList1); \ anchor(pFeatureOfBlock, kiWidth, kiHeight, pFeaturePointValueList0); \ method(pFeatureOfBlock, kiWidth, kiHeight, pFeaturePointValueList1); \ for(int32_t j =0; j<kiWidth*kiHeight*2; j++) { \ EXPECT_EQ (pLocation1[j], pLocation2[j]); \ } \ } \ delete[] pRefPictureBuff; \ delete[] pFeatureOfBlockBuff; \ delete[] pLocationBuff1; \ delete[] pLocationBuff2; \ delete[] pTimesOfFeatureValueBuff; \ delete[] pLocationFeature0Buff; \ delete[] pFeaturePointValueList0Buff; \ delete[] pLocationFeature1Buff; \ delete[] pFeaturePointValueList1Buff; \ } GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 10, 10, 0) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 16, 16, 0) GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 640, 320, 0) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 640, 320, 0) #ifdef X86_ASM GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 10, 10, WELS_CPU_SSE2) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 16, 16, WELS_CPU_SSE2) GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 640, 320, WELS_CPU_SSE2) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 640, 320, WELS_CPU_SSE2) #endif GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 1, 0) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 1, 1, 0) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 320, 0) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 1, 320, 0) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 640, 320, 0) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 640, 320, 0) #ifdef X86_ASM GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse2, 6, 6, WELS_CPU_SSE2) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse2, 6, 6, WELS_CPU_SSE2) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse2, 6, 320, WELS_CPU_SSE2) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse2, 6, 320, WELS_CPU_SSE2) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse2, 640, 320, WELS_CPU_SSE2) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse2, 640, 320, WELS_CPU_SSE2) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse4, 8, 2, WELS_CPU_SSE41) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse4, 16, 2, WELS_CPU_SSE41) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse4, 8, 320, WELS_CPU_SSE41) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse4, 16, 320, WELS_CPU_SSE41) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_sse4, 640, 320, WELS_CPU_SSE41) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_sse4, 640, 320, WELS_CPU_SSE41) #endif #ifdef HAVE_NEON GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_neon, 1, 1, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_neon, 1, 1, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_neon, 1, 320, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_neon, 1, 320, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_neon, 640, 320, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_neon, 640, 320, WELS_CPU_NEON) GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_neon, 10, 10, WELS_CPU_NEON) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_neon, 16, 16, WELS_CPU_NEON) GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_neon, 640, 320, WELS_CPU_NEON) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_neon, 640, 320, WELS_CPU_NEON) #endif #ifdef HAVE_NEON_AARCH64 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_AArch64_neon, 1, 1, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_AArch64_neon, 1, 1, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_AArch64_neon, 1, 320, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_AArch64_neon, 1, 320, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_AArch64_neon, 640, 320, WELS_CPU_NEON) GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_AArch64_neon, 640, 320, WELS_CPU_NEON) GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_AArch64_neon, 10, 10, WELS_CPU_NEON) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_AArch64_neon, 16, 16, WELS_CPU_NEON) GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_AArch64_neon, 640, 320, WELS_CPU_NEON) GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_AArch64_neon, 640, 320, WELS_CPU_NEON) #endif