ref: 57bd721b2f082801590a8ebe9b03e52a08a56f18
parent: 82a0d3e4a2236e3913b91e7e36e19dc667bc2a80
author: huili2 <huili2@cisco.com>
date: Mon Jun 8 07:06:32 EDT 2015
add sub8x8 mode decision functions
--- a/codec/common/src/mc.cpp
+++ b/codec/common/src/mc.cpp
@@ -1292,8 +1292,10 @@
#if defined (X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
+#if 1 //could not work well for sub8x8: should disable it for now, or bugfix for it!
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
+#endif
pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
pMcFuncs->pMcChromaFunc = McChroma_sse2;
pMcFuncs->pMcLumaFunc = McLuma_sse2;
--- a/codec/encoder/core/inc/mv_pred.h
+++ b/codec/encoder/core/inc/mv_pred.h
@@ -84,6 +84,30 @@
SMVUnitXY* pMv);
/*!
+ * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_4x4
+ * \param
+ * \param
+ */
+void UpdateP4x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
+ SMVUnitXY* pMv);
+
+/*!
+ * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_8x4
+ * \param
+ * \param
+ */
+void UpdateP8x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
+ SMVUnitXY* pMv);
+
+/*!
+ * \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_4x8
+ * \param
+ * \param
+ */
+void UpdateP4x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
+ SMVUnitXY* pMv);
+
+/*!
* \brief get the motion predictor for 4*4 or 8*8 or 16*16 block
* \param
* \param output mvp_x and mvp_y
@@ -135,6 +159,7 @@
* \param
*/
void UpdateP8x16Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
+
/*!
* \brief only update pMv cache for current MB, only for P_8x8
* \param
@@ -141,5 +166,26 @@
* \param
*/
void UpdateP8x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
+
+/*!
+ * \brief only update pMv cache for current MB, only for P_4x4
+ * \param
+ * \param
+ */
+void UpdateP4x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
+
+/*!
+ * \brief only update pMv cache for current MB, only for P_8x4
+ * \param
+ * \param
+ */
+void UpdateP8x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
+
+/*!
+ * \brief only update pMv cache for current MB, only for P_4x8
+ * \param
+ * \param
+ */
+void UpdateP4x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
}
#endif//WELS_MV_PRED_H__
--- a/codec/encoder/core/inc/svc_base_layer_md.h
+++ b/codec/encoder/core/inc/svc_base_layer_md.h
@@ -63,6 +63,9 @@
int32_t WelsMdP16x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice);
int32_t WelsMdP8x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice);
int32_t WelsMdP8x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice);
+int32_t WelsMdP4x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx);
+int32_t WelsMdP8x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx);
+int32_t WelsMdP4x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx);
/*static*/ void WelsMdInterInit (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, const int32_t kiSliceFirstMbXY);
/*static*/ void WelsMdInterFinePartition (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, int32_t bestCost);
/*static*/ void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, int32_t bestCost);
--- a/codec/encoder/core/inc/svc_enc_macroblock.h
+++ b/codec/encoder/core/inc/svc_enc_macroblock.h
@@ -50,6 +50,7 @@
/*************************mb_layer() syntax and generated********************************/
/*mb_layer():*/
Mb_Type uiMbType; // including MB detailed partition type, number and type of reference list
+Mb_Type uiSubMbType[4]; // sub MB types
int32_t iMbXY; // offset position of MB top left point based
int16_t iMbX; // position of MB in horizontal axis [0..32767]
int16_t iMbY; // position of MB in vertical axis [0..32767]
@@ -71,7 +72,7 @@
uint16_t uiSliceIdc; // 2^16=65536 > MaxFS(36864) of level 5.1; AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
uint32_t uiChromPredMode;
int32_t iLumaDQp;
-SMVUnitXY sMvd[4];
+SMVUnitXY sMvd[MB_BLOCK4x4_NUM]; //only for CABAC writing; storage structure the same as sMv, in 4x4 scan order.
int32_t iCbpDc;
//uint8_t reserved_filling_bytes[1]; // not deleting this line for further changes of this structure. filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
} SMB, *PMb;
--- a/codec/encoder/core/src/mv_pred.cpp
+++ b/codec/encoder/core/src/mv_pred.cpp
@@ -301,7 +301,51 @@
pMvComp->sMotionVectorCache[kiCacheIdx6] =
pMvComp->sMotionVectorCache[kiCacheIdx7] = *pMv;
}
+//update uiRefIndex and pMv of both SMB and Mb_cache, only for P4x4
+void UpdateP4x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
+ SMVUnitXY* pMv) {
+ SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
+ const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx];
+ const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx];
+ //mb
+ pCurMb->sMv[kiScan4Idx] = *pMv;
+ //cache
+ pMvComp->iRefIndexCache[kiCacheIdx] = kiRef;
+ pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv;
+}
+//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x4
+void UpdateP8x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
+ SMVUnitXY* pMv) {
+ SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
+ const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx];
+ const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx];
+
+ //mb
+ pCurMb->sMv[ kiScan4Idx] = *pMv;
+ pCurMb->sMv[1 + kiScan4Idx] = *pMv;
+ //cache
+ pMvComp->iRefIndexCache[ kiCacheIdx] = kiRef;
+ pMvComp->iRefIndexCache[1 + kiCacheIdx] = kiRef;
+ pMvComp->sMotionVectorCache[ kiCacheIdx] = *pMv;
+ pMvComp->sMotionVectorCache[1 + kiCacheIdx] = *pMv;
+}
+//update uiRefIndex and pMv of both SMB and Mb_cache, only for P4x8
+void UpdateP4x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
+ SMVUnitXY* pMv) {
+ SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
+ const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx];
+ const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx];
+
+ //mb
+ pCurMb->sMv[ kiScan4Idx] = *pMv;
+ pCurMb->sMv[4 + kiScan4Idx] = *pMv;
+ //cache
+ pMvComp->iRefIndexCache[ kiCacheIdx] = kiRef;
+ pMvComp->iRefIndexCache[6 + kiCacheIdx] = kiRef;
+ pMvComp->sMotionVectorCache[ kiCacheIdx] = *pMv;
+ pMvComp->sMotionVectorCache[6 + kiCacheIdx] = *pMv;
+}
//=========================update motion info(MV and ref_idx) into Mb_cache==========================
//update pMv and uiRefIndex cache only for Mb_cache, only for P_16*16 (SKIP inclusive)
@@ -359,4 +403,34 @@
pMvComp->sMotionVectorCache[7 + kuiCacheIdx] = *pMv;
}
+//update uiRefIndex and pMv of only Mb_cache, for P4x4
+void UpdateP4x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) {
+ SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
+ const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+ pMvComp->iRefIndexCache [kuiCacheIdx] = pRef;
+ pMvComp->sMotionVectorCache[kuiCacheIdx] = *pMv;
+}
+
+//update uiRefIndex and pMv of only Mb_cache, for P8x4
+void UpdateP8x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) {
+ SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
+ const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+ pMvComp->iRefIndexCache [ kuiCacheIdx] =
+ pMvComp->iRefIndexCache [1 + kuiCacheIdx] = pRef;
+ pMvComp->sMotionVectorCache [ kuiCacheIdx] =
+ pMvComp->sMotionVectorCache[1 + kuiCacheIdx] = *pMv;
+}
+
+//update uiRefIndex and pMv of only Mb_cache, for P4x8
+void UpdateP4x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) {
+ SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
+ const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+ pMvComp->iRefIndexCache [ kuiCacheIdx] =
+ pMvComp->iRefIndexCache [6 + kuiCacheIdx] = pRef;
+ pMvComp->sMotionVectorCache [ kuiCacheIdx] =
+ pMvComp->sMotionVectorCache[6 + kuiCacheIdx] = *pMv;
+}
} // namespace WelsEnc
--- a/codec/encoder/core/src/svc_base_layer_md.cpp
+++ b/codec/encoder/core/src/svc_base_layer_md.cpp
@@ -1117,6 +1117,124 @@
return iCostP8x8;
}
+int32_t WelsMdP4x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice,
+ const int32_t ki8x8Idx) {
+ SMbCache* pMbCache = &pSlice->sMbCacheInfo;
+ int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0];
+ int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0];
+ SWelsME* sMe4x4;
+ int32_t i4x4Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef;
+ int32_t iCostP4x4 = 0;
+ for (i4x4Idx = 0; i4x4Idx < 4; ++i4x4Idx) {
+ int32_t iPartIdx = (ki8x8Idx << 2) + i4x4Idx;
+ iIdxX = ((ki8x8Idx & 1) << 1) + (i4x4Idx & 1);
+ iIdxY = ((ki8x8Idx >> 1) << 1) + (i4x4Idx >> 1);
+ iPixelX = (iIdxX << 2);
+ iPixelY = (iIdxY << 2);
+ iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc);
+ iStrideRef = iPixelX + (iPixelY * iLineSizeRef);
+
+ sMe4x4 = &pWelsMd->sMe.sMe4x4[ki8x8Idx][i4x4Idx];
+ InitMe (*pWelsMd, BLOCK_4x4,
+ pMbCache->SPicData.pEncMb[0] + iStrideEnc,
+ pMbCache->SPicData.pRefMb[0] + iStrideRef,
+ pCurDqLayer->pRefPic->pScreenBlockFeatureStorage,
+ *sMe4x4);
+ //not putting these three lines below into InitMe to avoid judging mode in InitMe
+ sMe4x4->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX;
+ sMe4x4->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY;
+ sMe4x4->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2;
+
+ pSlice->sMvc[0] = sMe4x4->sMvBase;
+ pSlice->uiMvcNum = 1;
+
+ PredMv (&pMbCache->sMvComponents, iPartIdx, 1, pWelsMd->uiRef, & (sMe4x4->sMvp));
+ pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe4x4, pSlice);
+ UpdateP4x4Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe4x4->sMv));
+ iCostP4x4 += sMe4x4->uiSatdCost;
+ }
+ return iCostP4x4;
+}
+
+int32_t WelsMdP8x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice,
+ const int32_t ki8x8Idx) {
+ SMbCache* pMbCache = &pSlice->sMbCacheInfo;
+ int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0];
+ int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0];
+ SWelsME* sMe8x4;
+ int32_t i8x4Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef;
+ int32_t iCostP8x4 = 0;
+ for (i8x4Idx = 0; i8x4Idx < 2; ++i8x4Idx) {
+ int32_t iPartIdx = (ki8x8Idx << 2) + (i8x4Idx << 1);
+ iIdxX = ((ki8x8Idx & 1) << 1);
+ iIdxY = ((ki8x8Idx >> 1) << 1) + i8x4Idx;
+ iPixelX = (iIdxX << 2);
+ iPixelY = (iIdxY << 2);
+ iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc);
+ iStrideRef = iPixelX + (iPixelY * iLineSizeRef);
+
+ sMe8x4 = &pWelsMd->sMe.sMe8x4[ki8x8Idx][i8x4Idx];
+ InitMe (*pWelsMd, BLOCK_8x4,
+ pMbCache->SPicData.pEncMb[0] + iStrideEnc,
+ pMbCache->SPicData.pRefMb[0] + iStrideRef,
+ pCurDqLayer->pRefPic->pScreenBlockFeatureStorage,
+ *sMe8x4);
+ //not putting these three lines below into InitMe to avoid judging mode in InitMe
+ sMe8x4->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX;
+ sMe8x4->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY;
+ sMe8x4->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2;
+
+ pSlice->sMvc[0] = sMe8x4->sMvBase;
+ pSlice->uiMvcNum = 1;
+
+ PredMv (&pMbCache->sMvComponents, iPartIdx, 2, pWelsMd->uiRef, & (sMe8x4->sMvp));
+ pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe8x4, pSlice);
+ UpdateP8x4Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe8x4->sMv));
+ iCostP8x4 += sMe8x4->uiSatdCost;
+ }
+ return iCostP8x4;
+}
+
+int32_t WelsMdP4x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice,
+ const int32_t ki8x8Idx) {
+ //Wayne, to be modified
+ SMbCache* pMbCache = &pSlice->sMbCacheInfo;
+ int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0];
+ int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0];
+ SWelsME* sMe4x8;
+ int32_t i4x8Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef;
+ int32_t iCostP4x8 = 0;
+ for (i4x8Idx = 0; i4x8Idx < 2; ++i4x8Idx) {
+ int32_t iPartIdx = (ki8x8Idx << 2) + i4x8Idx;
+ iIdxX = ((ki8x8Idx & 1) << 1) + i4x8Idx;
+ iIdxY = ((ki8x8Idx >> 1) << 1);
+ iPixelX = (iIdxX << 2);
+ iPixelY = (iIdxY << 2);
+ iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc);
+ iStrideRef = iPixelX + (iPixelY * iLineSizeRef);
+
+ sMe4x8 = &pWelsMd->sMe.sMe4x8[ki8x8Idx][i4x8Idx];
+ InitMe (*pWelsMd, BLOCK_4x8,
+ pMbCache->SPicData.pEncMb[0] + iStrideEnc,
+ pMbCache->SPicData.pRefMb[0] + iStrideRef,
+ pCurDqLayer->pRefPic->pScreenBlockFeatureStorage,
+ *sMe4x8);
+ //not putting these three lines below into InitMe to avoid judging mode in InitMe
+ sMe4x8->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX;
+ sMe4x8->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY;
+ sMe4x8->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2;
+
+ pSlice->sMvc[0] = sMe4x8->sMvBase;
+ pSlice->uiMvcNum = 1;
+
+ PredMv (&pMbCache->sMvComponents, iPartIdx, 1, pWelsMd->uiRef, & (sMe4x8->sMvp));
+ pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe4x8, pSlice);
+ UpdateP4x8Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe4x8->sMv));
+ iCostP4x8 += sMe4x8->uiSatdCost;
+ }
+ return iCostP4x8;
+}
+
void WelsMdInterFinePartition (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
// SMbCache *pMbCache = &pSlice->sMbCacheInfo;
@@ -1129,6 +1247,7 @@
if (iCost < iBestCost) {
int32_t iCostPart;
pCurMb->uiMbType = MB_TYPE_8x8;
+ pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP16x8, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]);
iCostPart = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice);
@@ -1190,6 +1309,7 @@
if (iCostP8x8 < iBestCost) {
iBestCost = iCostP8x8;
pCurMb->uiMbType = MB_TYPE_8x8;
+ pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
}
break;
@@ -1198,6 +1318,7 @@
if (iCostP8x8 < iBestCost) {
iBestCost = iCostP8x8;
pCurMb->uiMbType = MB_TYPE_8x8;
+ pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
iCostP16x8 = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice);
if (iCostP16x8 <= iBestCost) {
@@ -1426,6 +1547,32 @@
const int32_t g_kiPixStrideIdx8x8[4] = { 0, ME_REFINE_BUF_WIDTH_BLK8,
ME_REFINE_BUF_STRIDE_BLK8, ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8
};
+const int32_t g_kiPixStrideIdx4x4[4][4] = {
+ {
+ 0,
+ 0 + ME_REFINE_BUF_WIDTH_BLK4,
+ 0 + ME_REFINE_BUF_STRIDE_BLK4,
+ 0 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
+ }, //[0][]
+ {
+ ME_REFINE_BUF_WIDTH_BLK8,
+ ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4,
+ ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_STRIDE_BLK4,
+ ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
+ }, //[1][]
+ {
+ ME_REFINE_BUF_STRIDE_BLK8,
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK4,
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_STRIDE_BLK4,
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
+ }, //[2][]
+ {
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8,
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4,
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_STRIDE_BLK4,
+ ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
+ } //[3][]
+};
void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) {
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
@@ -1436,7 +1583,7 @@
int32_t iBestSadCost = 0, iBestSatdCost = 0;
SMeRefinePointer sMeRefine;
- int32_t i, iIdx, iPixStride;
+ int32_t i, j, iIdx, iPixStride;
uint8_t* pRefCb = pMbCache->SPicData.pRefMb[1];
uint8_t* pRefCr = pMbCache->SPicData.pRefMb[2];
@@ -1536,40 +1683,141 @@
}
break;
case MB_TYPE_8x8:
- sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x8Aligned;
+ pMbCache->sMvComponents.iRefIndexCache [9] = pMbCache->sMvComponents.iRefIndexCache [21] = REF_NOT_AVAIL;
for (i = 0; i < 4; i++) {
int32_t iBlk8Idx = i << 2; //0, 4, 8, 12
- int32_t iBlk4X, iBlk4Y;
+ int32_t iBlk4X, iBlk4Y, iBlk4x4Idx;
pCurMb->pRefIndex[i] = pWelsMd->uiRef;
+ switch (pCurMb->uiSubMbType[i]) {
+ case SUB_MB_TYPE_8x8:
+ sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x8Aligned;
+ //luma
+ InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx8x8[i]);
+ PredMv (&pMbCache->sMvComponents, iBlk8Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMvp);
+ MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk8Idx], &pWelsMd->sMe.sMe8x8[i], &sMeRefine, 8, 8);
+ UpdateP8x8MotionInfo (pMbCache, pCurMb, iBlk8Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMv);
+ pMbCache->sMbMvp[g_kuiMbCountScan4Idx[iBlk8Idx]] = pWelsMd->sMe.sMe8x8[i].sMvp;
+ iBestSadCost += pWelsMd->sMe.sMe8x8[i].uiSadCost;
+ iBestSatdCost += pWelsMd->sMe.sMe8x8[i].uiSatdCost;
- //luma
- InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx8x8[i]);
- PredMv (&pMbCache->sMvComponents, iBlk8Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMvp);
- MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk8Idx], &pWelsMd->sMe.sMe8x8[i], &sMeRefine, 8, 8);
- UpdateP8x8MotionInfo (pMbCache, pCurMb, iBlk8Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMv);
- pMbCache->sMbMvp[i] = pWelsMd->sMe.sMe8x8[i].sMvp;
- iBestSadCost += pWelsMd->sMe.sMe8x8[i].uiSadCost;
- iBestSatdCost += pWelsMd->sMe.sMe8x8[i].uiSatdCost;
+ //chroma
+ pMv = &pWelsMd->sMe.sMe8x8[i].sMv;
+ iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
- //chroma
- pMv = &pWelsMd->sMe.sMe8x8[i].sMv;
- iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
+ iBlk4X = (i & 1) << 2;
+ iBlk4Y = (i >> 1) << 2;
+ iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
+ iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
- iBlk4X = (i & 1) << 2;
- iBlk4Y = (i >> 1) << 2;
- iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
- iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
+ pTmpRefCb = pRefCb + iRefBlk4Stride;
+ pTmpDstCb = pDstCb + iDstBlk4Stride;
+ pTmpRefCr = pRefCr + iRefBlk4Stride;
+ pTmpDstCr = pDstCr + iDstBlk4Stride;
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
+ 4, 4); //Cb
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
+ 4, 4); //Cr
+ break;
+ case SUB_MB_TYPE_4x4:
+ sMeRefine.pfCopyBlockByMode = pFunc->pfCopy4x4;
+ //luma
+ for (j = 0; j < 4; ++j) {
+ iBlk4x4Idx = iBlk8Idx + j;
+ InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j]);
+ PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 1, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x4[i][j].sMvp);
+ MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe4x4[i][j], &sMeRefine, 4, 4);
+ UpdateP4x4MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x4[i][j].sMv);
+ pMbCache->sMbMvp[g_kuiMbCountScan4Idx[iBlk4x4Idx]] = pWelsMd->sMe.sMe4x4[i][j].sMvp;
+ iBestSadCost += pWelsMd->sMe.sMe4x4[i][j].uiSadCost;
+ iBestSatdCost += pWelsMd->sMe.sMe4x4[i][j].uiSatdCost;
- pTmpRefCb = pRefCb + iRefBlk4Stride;
- pTmpDstCb = pDstCb + iDstBlk4Stride;
- pTmpRefCr = pRefCr + iRefBlk4Stride;
- pTmpDstCr = pDstCr + iDstBlk4Stride;
- pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
- 4, 4); //Cb
- pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
- 4, 4); //Cr
+ //chroma
+ pMv = &pWelsMd->sMe.sMe4x4[i][j].sMv;
+ iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
+ iBlk4X = (((i & 1) << 1) + (j & 1)) << 1;
+ iBlk4Y = (((i >> 1) << 1) + (j >> 1)) << 1;
+ iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
+ iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
+
+ pTmpRefCb = pRefCb + iRefBlk4Stride;
+ pTmpDstCb = pDstCb + iDstBlk4Stride;
+ pTmpRefCr = pRefCr + iRefBlk4Stride;
+ pTmpDstCr = pDstCr + iDstBlk4Stride;
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
+ 2, 2); //Cb
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
+ 2, 2); //Cr
+ }
+ break;
+ case SUB_MB_TYPE_8x4:
+ sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x4;
+ //luma
+ for (j = 0; j < 2; ++j) {
+ iBlk4x4Idx = iBlk8Idx + (j << 1);
+ InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j << 1]);
+ PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x4[i][j].sMvp);
+ MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe8x4[i][j], &sMeRefine, 8, 4);
+ UpdateP8x4MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x4[i][j].sMv);
+ pMbCache->sMbMvp[g_kuiMbCountScan4Idx[ iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp;
+ //pMbCache->sMbMvp[g_kuiMbCountScan4Idx[1 + iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp;
+ iBestSadCost += pWelsMd->sMe.sMe8x4[i][j].uiSadCost;
+ iBestSatdCost += pWelsMd->sMe.sMe8x4[i][j].uiSatdCost;
+
+ //chroma
+ pMv = &pWelsMd->sMe.sMe8x4[i][j].sMv;
+ iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
+
+ iBlk4X = ((i & 1) << 1) << 1;
+ iBlk4Y = (((i >> 1) << 1) + j) << 1;
+ iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
+ iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
+
+ pTmpRefCb = pRefCb + iRefBlk4Stride;
+ pTmpDstCb = pDstCb + iDstBlk4Stride;
+ pTmpRefCr = pRefCr + iRefBlk4Stride;
+ pTmpDstCr = pDstCr + iDstBlk4Stride;
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
+ 4, 2); //Cb
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
+ 4, 2); //Cr
+ }
+ break;
+ case SUB_MB_TYPE_4x8:
+ sMeRefine.pfCopyBlockByMode = pFunc->pfCopy4x8;
+ //luma
+ for (j = 0; j < 2; ++j) {
+ iBlk4x4Idx = iBlk8Idx + j;
+ InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j]);
+ PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 1, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x8[i][j].sMvp);
+ MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe4x8[i][j], &sMeRefine, 4, 8);
+ UpdateP4x8MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x8[i][j].sMv);
+ pMbCache->sMbMvp[g_kuiMbCountScan4Idx[ iBlk4x4Idx]] = pWelsMd->sMe.sMe4x8[i][j].sMvp;
+ //pMbCache->sMbMvp[g_kuiMbCountScan4Idx[4 + iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp;
+ iBestSadCost += pWelsMd->sMe.sMe4x8[i][j].uiSadCost;
+ iBestSatdCost += pWelsMd->sMe.sMe4x8[i][j].uiSatdCost;
+
+ //chroma
+ pMv = &pWelsMd->sMe.sMe4x8[i][j].sMv;
+ iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
+
+ iBlk4X = (((i & 1) << 1) + j) << 1;
+ iBlk4Y = (((i >> 1) << 1)) << 1;
+ iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
+ iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
+
+ pTmpRefCb = pRefCb + iRefBlk4Stride;
+ pTmpDstCb = pDstCb + iDstBlk4Stride;
+ pTmpRefCr = pRefCr + iRefBlk4Stride;
+ pTmpDstCr = pDstCr + iDstBlk4Stride;
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
+ 2, 4); //Cb
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
+ 2, 4); //Cr
+ }
+ break;
+ }
}
break;
default:
--- a/codec/encoder/core/src/svc_mode_decision.cpp
+++ b/codec/encoder/core/src/svc_mode_decision.cpp
@@ -532,7 +532,8 @@
return false;
}
-bool WelsMdInterJudgeSCDPskipFalse (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, SMbCache* pMbCache) {
+bool WelsMdInterJudgeSCDPskipFalse (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb,
+ SMbCache* pMbCache) {
return false;
}
@@ -606,7 +607,8 @@
}
-void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
+void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb,
+ int32_t iBestCost) {
SMbCache* pMbCache = &pSlice->sMbCacheInfo;
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
int32_t iCostP8x8;
@@ -620,8 +622,37 @@
if (iCostP8x8 < iBestCost) {
iBestCost = iCostP8x8;
pCurMb->uiMbType = MB_TYPE_8x8;
-
- TryModeMerge (pMbCache, pWelsMd, pCurMb);
+ pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
+#if 0 //Disable for sub8x8 modes for now
+ iBestCost = 0;
+ //reset neighbor info for sub8x8
+ pMbCache->sMvComponents.iRefIndexCache [9] = pMbCache->sMvComponents.iRefIndexCache [21] = REF_NOT_AVAIL;
+ for (int32_t i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) {
+ int32_t iCurCostSub8x8, iBestCostSub8x8 = pWelsMd->sMe.sMe8x8[i8x8Idx].uiSatdCost;
+ //4x4
+ iCurCostSub8x8 = WelsMdP4x4 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx);
+ if (iCurCostSub8x8 < iBestCostSub8x8) {
+ pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_4x4;
+ iBestCostSub8x8 = iCurCostSub8x8;
+ }
+ //8x4
+ iCurCostSub8x8 = WelsMdP8x4 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx);
+ if (iCurCostSub8x8 < iBestCostSub8x8) {
+ pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_8x4;
+ iBestCostSub8x8 = iCurCostSub8x8;
+ }
+ //4x8
+ iCurCostSub8x8 = WelsMdP4x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx);
+ if (iCurCostSub8x8 < iBestCostSub8x8) {
+ pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_4x8;
+ iBestCostSub8x8 = iCurCostSub8x8;
+ }
+ iBestCost += iBestCostSub8x8;
+ }
+ if ((pCurMb->uiSubMbType[0] == SUB_MB_TYPE_8x8) && (pCurMb->uiSubMbType[1] == SUB_MB_TYPE_8x8)
+ && (pCurMb->uiSubMbType[2] == SUB_MB_TYPE_8x8) && (pCurMb->uiSubMbType[3] == SUB_MB_TYPE_8x8)) //all 8x8
+#endif
+ TryModeMerge (pMbCache, pWelsMd, pCurMb);
}
pWelsMd->iCostLuma = iBestCost;
}
--- a/codec/encoder/core/src/svc_set_mb_syn_cabac.cpp
+++ b/codec/encoder/core/src/svc_set_mb_syn_cabac.cpp
@@ -271,8 +271,7 @@
WelsCabacEncodeDecision (pCabacCtx, iCtx, bSkipFlag);
if (bSkipFlag) {
- for (int i = 0; i < 4; i++) {
-
+ for (int i = 0; i < 16; i++) {
pCurMb->sMvd[i].iMvX = 0;
pCurMb->sMvd[i].iMvY = 0;
}
@@ -338,7 +337,7 @@
}
}
SMVUnitXY WelsCabacMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, uint32_t iMbWidth,
- SMVUnitXY sCurMv, SMVUnitXY sPredMv, int16_t iBlockIdx) {
+ SMVUnitXY sCurMv, SMVUnitXY sPredMv, int16_t i4x4ScanIdx) {
uint32_t iAbsMvd0, iAbsMvd1;
uint8_t uiNeighborAvail = pCurMb->uiNeighborAvail;
SMVUnitXY sMvd;
@@ -347,19 +346,16 @@
sMvdLeft.iMvX = sMvdLeft.iMvY = sMvdTop.iMvX = sMvdTop.iMvY = 0;
sMvd.sDeltaMv (sCurMv, sPredMv);
-
- if (((iBlockIdx == 0) || (iBlockIdx == 1)) && (uiNeighborAvail & TOP_MB_POS)) {
- sMvdTop.sAssginMv ((pCurMb - iMbWidth)->sMvd[iBlockIdx + 2]);
+ if ((i4x4ScanIdx < 4) && (uiNeighborAvail & TOP_MB_POS)) { //top row blocks
+ sMvdTop.sAssginMv ((pCurMb - iMbWidth)->sMvd[i4x4ScanIdx + 12]);
+ } else if (i4x4ScanIdx >= 4) {
+ sMvdTop.sAssginMv (pCurMb->sMvd[i4x4ScanIdx - 4]);
}
- if ((iBlockIdx == 2) || (iBlockIdx == 3)) {
- sMvdTop.sAssginMv (pCurMb->sMvd[iBlockIdx - 2]);
+ if ((! (i4x4ScanIdx & 0x03)) && (uiNeighborAvail & LEFT_MB_POS)) { //left column blocks
+ sMvdLeft.sAssginMv ((pCurMb - 1)->sMvd[i4x4ScanIdx + 3]);
+ } else if (i4x4ScanIdx & 0x03) {
+ sMvdLeft.sAssginMv (pCurMb->sMvd[i4x4ScanIdx - 1]);
}
- if (((iBlockIdx == 0) || (iBlockIdx == 2)) && (uiNeighborAvail & LEFT_MB_POS)) {
- sMvdLeft.sAssginMv ((pCurMb - 1)->sMvd[iBlockIdx + 1]);
- }
- if ((iBlockIdx == 1) || (iBlockIdx == 3)) {
- sMvdLeft.sAssginMv (pCurMb->sMvd[iBlockIdx - 1]);
- }
iAbsMvd0 = WELS_ABS (sMvdLeft.iMvX) + WELS_ABS (sMvdTop.iMvX);
iAbsMvd1 = WELS_ABS (sMvdLeft.iMvY) + WELS_ABS (sMvdTop.iMvY);
@@ -368,7 +364,63 @@
WelsCabacMbMvdLx (pCabacCtx, sMvd.iMvY, 47, iAbsMvd1);
return sMvd;
}
+static void WelsCabacSubMbType (SCabacCtx* pCabacCtx, SMB* pCurMb) {
+ for (int32_t i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) {
+ uint32_t uiSubMbType = pCurMb->uiSubMbType[i8x8Idx];
+ if (SUB_MB_TYPE_8x8 == uiSubMbType) {
+ WelsCabacEncodeDecision (pCabacCtx, 21, 1);
+ continue;
+ }
+ WelsCabacEncodeDecision (pCabacCtx, 21, 0);
+ if (SUB_MB_TYPE_8x4 == uiSubMbType) {
+ WelsCabacEncodeDecision (pCabacCtx, 22, 0);
+ } else {
+ WelsCabacEncodeDecision (pCabacCtx, 22, 1);
+ WelsCabacEncodeDecision (pCabacCtx, 23, SUB_MB_TYPE_4x8 == uiSubMbType);
+ }
+ } //for
+}
+static void WelsCabacSubMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, SMbCache* pMbCache, const int kiMbWidth) {
+ SMVUnitXY sMvd;
+ int32_t i8x8Idx, i4x4ScanIdx;
+ for (i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) {
+ uint32_t uiSubMbType = pCurMb->uiSubMbType[i8x8Idx];
+ if (SUB_MB_TYPE_8x8 == uiSubMbType) {
+ i4x4ScanIdx = g_kuiMbCountScan4Idx[i8x8Idx << 2];
+ sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
+ i4x4ScanIdx);
+ pCurMb->sMvd[ i4x4ScanIdx].sAssginMv (sMvd);
+ pCurMb->sMvd[1 + i4x4ScanIdx].sAssginMv (sMvd);
+ pCurMb->sMvd[4 + i4x4ScanIdx].sAssginMv (sMvd);
+ pCurMb->sMvd[5 + i4x4ScanIdx].sAssginMv (sMvd);
+ } else if (SUB_MB_TYPE_4x4 == uiSubMbType) {
+ for (int32_t i4x4Idx = 0; i4x4Idx < 4; ++i4x4Idx) {
+ i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + i4x4Idx];
+ sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
+ i4x4ScanIdx);
+ pCurMb->sMvd[i4x4ScanIdx].sAssginMv (sMvd);
+ }
+ } else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
+ for (int32_t i8x4Idx = 0; i8x4Idx < 2; ++i8x4Idx) {
+ i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + (i8x4Idx << 1)];
+ sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
+ i4x4ScanIdx);
+ pCurMb->sMvd[ i4x4ScanIdx].sAssginMv (sMvd);
+ pCurMb->sMvd[1 + i4x4ScanIdx].sAssginMv (sMvd);
+ }
+ } else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
+ for (int32_t i4x8Idx = 0; i4x8Idx < 2; ++i4x8Idx) {
+ i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + i4x8Idx];
+ sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
+ i4x4ScanIdx);
+ pCurMb->sMvd[ i4x4ScanIdx].sAssginMv (sMvd);
+ pCurMb->sMvd[4 + i4x4ScanIdx].sAssginMv (sMvd);
+ }
+ }
+ }
+}
+
int16_t WelsGetMbCtxCabac (SMbCache* pMbCache, SMB* pCurMb, uint32_t iMbWidth, ECtxBlockCat eCtxBlockCat,
int16_t iIdx) {
int16_t iNzA = -1, iNzB = -1;
@@ -610,10 +662,9 @@
}
WelsCabacMbIntraChromaPredMode (pCabacCtx, pCurMb, pMbCache, iMbWidth);
sMvd.iMvX = sMvd.iMvY = 0;
- pCurMb->sMvd[0].sAssginMv (sMvd);
- pCurMb->sMvd[1].sAssginMv (sMvd);
- pCurMb->sMvd[2].sAssginMv (sMvd);
- pCurMb->sMvd[3].sAssginMv (sMvd);
+ for (i = 0; i < 16; ++i) {
+ pCurMb->sMvd[i].sAssginMv (sMvd);
+ }
} else if (uiMbType == MB_TYPE_16x16) {
@@ -622,10 +673,9 @@
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
- pCurMb->sMvd[0].sAssginMv (sMvd);
- pCurMb->sMvd[1].sAssginMv (sMvd);
- pCurMb->sMvd[2].sAssginMv (sMvd);
- pCurMb->sMvd[3].sAssginMv (sMvd);
+ for (i = 0; i < 16; ++i) {
+ pCurMb->sMvd[i].sAssginMv (sMvd);
+ }
} else if (uiMbType == MB_TYPE_16x8) {
if (uiNumRefIdxL0Active > 0) {
@@ -633,14 +683,13 @@
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 12);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth , pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
- pCurMb->sMvd[0].sAssginMv (sMvd);
- pCurMb->sMvd[1].sAssginMv (sMvd);
-
-
- sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[1], 2);
- pCurMb->sMvd[2].sAssginMv (sMvd);
- pCurMb->sMvd[3].sAssginMv (sMvd);
-
+ for (i = 0; i < 8; ++i) {
+ pCurMb->sMvd[i].sAssginMv (sMvd);
+ }
+ sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[1], 8);
+ for (i = 8; i < 16; ++i) {
+ pCurMb->sMvd[i].sAssginMv (sMvd);
+ }
} else if (uiMbType == MB_TYPE_8x16) {
if (uiNumRefIdxL0Active > 0) {
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0);
@@ -647,16 +696,18 @@
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 2);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
- pCurMb->sMvd[0].sAssginMv (sMvd);
- pCurMb->sMvd[2].sAssginMv (sMvd);
-
- sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 1);
- pCurMb->sMvd[1].sAssginMv (sMvd);
- pCurMb->sMvd[3].sAssginMv (sMvd);
-
+ for (i = 0; i < 16; i += 4) {
+ pCurMb->sMvd[i ].sAssginMv (sMvd);
+ pCurMb->sMvd[i + 1].sAssginMv (sMvd);
+ }
+ sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 2);
+ for (i = 0; i < 16; i += 4) {
+ pCurMb->sMvd[i + 2].sAssginMv (sMvd);
+ pCurMb->sMvd[i + 3].sAssginMv (sMvd);
+ }
} else if ((uiMbType == MB_TYPE_8x8) || (uiMbType == MB_TYPE_8x8_REF0)) {
- for (i = 0; i < 4; i++)
- WelsCabacEncodeDecision (pCabacCtx, 21, 1);
+ //write sub_mb_type
+ WelsCabacSubMbType (pCabacCtx, pCurMb);
if (uiNumRefIdxL0Active > 0) {
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0);
@@ -664,19 +715,8 @@
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 12);
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 14);
}
-
-
- sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
- pCurMb->sMvd[0].sAssginMv (sMvd);
-
- sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 1);
- pCurMb->sMvd[1].sAssginMv (sMvd);
-
- sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[2], 2);
- pCurMb->sMvd[2].sAssginMv (sMvd);
-
- sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[10], pMbCache->sMbMvp[3], 3);
- pCurMb->sMvd[3].sAssginMv (sMvd);
+ //write sub8x8 mvd
+ WelsCabacSubMbMvd (pCabacCtx, pCurMb, pMbCache, iMbWidth);
}
if (uiMbType != MB_TYPE_INTRA16x16) {
WelsCabacMbCbp (pCurMb, iMbWidth, pCabacCtx);
--- a/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
@@ -190,7 +190,22 @@
//step 1: sub_mb_type
for (i = 0; i < 4; i++) {
- BsWriteUE (pBs, 0);
+ switch (pCurMb->uiSubMbType[i]) {
+ case SUB_MB_TYPE_8x8:
+ BsWriteUE (pBs, 0);
+ break;
+ case SUB_MB_TYPE_8x4:
+ BsWriteUE (pBs, 1);
+ break;
+ case SUB_MB_TYPE_4x8:
+ BsWriteUE (pBs, 2);
+ break;
+ case SUB_MB_TYPE_4x4:
+ BsWriteUE (pBs, 3);
+ break;
+ default: //should not enter
+ break;
+ }
}
//step 2: get and write uiRefIndex and sMvd
@@ -202,8 +217,30 @@
}
//write sMvd
for (i = 0; i < 4; i++) {
- BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[i].iMvX);
- BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[i].iMvY);
+ uint32_t uiSubMbType = pCurMb->uiSubMbType[i];
+ if (SUB_MB_TYPE_8x8 == uiSubMbType) {
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
+ } else if (SUB_MB_TYPE_4x4 == uiSubMbType) {
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvY);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvY);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 3)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 3)].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 3)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 3)].iMvY);
+ } else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvY);
+ } else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvX);
+ BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvY);
+ }
kpScan4 += 4;
}
}