ref: d9502aa71de3a1ed2b7fc0e7472ffda1277a1108
dir: /codec/encoder/core/src/svc_encode_mb.cpp/
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file encode_mb.c
*
* \brief Implementaion for pCurMb encoding
*
* \date 05/19/2009 Created
*************************************************************************************
*/
#include "svc_encode_mb.h"
#include "encode_mb_aux.h"
#include "decode_mb_aux.h"
#include "ls_defines.h"
namespace WelsEnc {
void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) {
pfDctFourT4 (pRes, pEncMb, iEncStride, pBestPred, 16);
pfDctFourT4 (pRes + 64, pEncMb + 8, iEncStride, pBestPred + 8, 16);
pfDctFourT4 (pRes + 128, pEncMb + 8 * iEncStride, iEncStride, pBestPred + 128, 16);
pfDctFourT4 (pRes + 192, pEncMb + 8 * iEncStride + 8, iEncStride, pBestPred + 136, 16);
}
void WelsEncRecI16x16Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) {
ENFORCE_STACK_ALIGN_1D (int16_t, aDctT4Dc, 16, 16)
SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList;
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
const int32_t kiEncStride = pCurDqLayer->iEncStride[0];
int16_t* pRes = pMbCache->pCoeffLevel;
uint8_t* pPred = pMbCache->SPicData.pCsMb[0];
const int32_t kiRecStride = pCurDqLayer->iCsStride[0];
int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
uint8_t* pBestPred = pMbCache->pMemPredLuma;
const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[0];
uint8_t i, uiQp = pCurMb->uiLumaQp;
uint32_t uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiCountI16x16Dc;
const int16_t* pMF = g_kiQuantMF[uiQp];
const int16_t* pFF = g_iQuantIntraFF[uiQp];
WelsDctMb (pRes, pMbCache->SPicData.pEncMb[0], kiEncStride, pBestPred, pEncCtx->pFuncList->pfDctFourT4);
pFuncList->pfTransformHadamard4x4Dc (aDctT4Dc, pRes);
pFuncList->pfQuantizationDc4x4 (aDctT4Dc, pFF[0] << 1, pMF[0]>>1);
pFuncList->pfScan4x4 (pMbCache->pDct->iLumaI16x16Dc, aDctT4Dc);
uiCountI16x16Dc = pFuncList->pfGetNoneZeroCount (pMbCache->pDct->iLumaI16x16Dc);
for (i = 0; i < 4; i++) {
pFuncList->pfQuantizationFour4x4 (pRes, pFF, pMF);
pFuncList->pfScan4x4Ac (pBlock, pRes);
pFuncList->pfScan4x4Ac (pBlock + 16, pRes + 16);
pFuncList->pfScan4x4Ac (pBlock + 32, pRes + 32);
pFuncList->pfScan4x4Ac (pBlock + 48, pRes + 48);
pRes += 64;
pBlock += 64;
}
pRes -= 256;
pBlock -= 256;
for (i = 0; i < 16; i++) {
uiNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock);
pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount;
uiNoneZeroCountMbAc += uiNoneZeroCount;
pBlock += 16;
}
if (uiCountI16x16Dc > 0) {
if (uiQp < 12) {
WelsIHadamard4x4Dc (aDctT4Dc);
WelsDequantLumaDc4x4 (aDctT4Dc, uiQp);
} else
pFuncList->pfDequantizationIHadamard4x4 (aDctT4Dc, g_kuiDequantCoeff[uiQp][0] >> 2);
}
if (uiNoneZeroCountMbAc > 0) {
pCurMb->uiCbp = 15;
pFuncList->pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]);
pFuncList->pfDequantizationFour4x4 (pRes + 64, g_kuiDequantCoeff[uiQp]);
pFuncList->pfDequantizationFour4x4 (pRes + 128, g_kuiDequantCoeff[uiQp]);
pFuncList->pfDequantizationFour4x4 (pRes + 192, g_kuiDequantCoeff[uiQp]);
pRes[0] = aDctT4Dc[0];
pRes[16] = aDctT4Dc[1];
pRes[32] = aDctT4Dc[4];
pRes[48] = aDctT4Dc[5];
pRes[64] = aDctT4Dc[2];
pRes[80] = aDctT4Dc[3];
pRes[96] = aDctT4Dc[6];
pRes[112] = aDctT4Dc[7];
pRes[128] = aDctT4Dc[8];
pRes[144] = aDctT4Dc[9];
pRes[160] = aDctT4Dc[12];
pRes[176] = aDctT4Dc[13];
pRes[192] = aDctT4Dc[10];
pRes[208] = aDctT4Dc[11];
pRes[224] = aDctT4Dc[14];
pRes[240] = aDctT4Dc[15];
pFuncList->pfIDctFourT4 (pPred, kiRecStride, pBestPred, 16, pRes);
pFuncList->pfIDctFourT4 (pPred + 8, kiRecStride, pBestPred + 8, 16, pRes + 64);
pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8, kiRecStride, pBestPred + 128, 16, pRes + 128);
pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8 + 8, kiRecStride, pBestPred + 136, 16, pRes + 192);
} else if (uiCountI16x16Dc > 0) {
pFuncList->pfIDctI16x16Dc (pPred, kiRecStride, pBestPred, 16, aDctT4Dc);
} else {
pFuncList->pfCopy16x16Aligned (pPred, kiRecStride, pBestPred, 16);
}
}
void WelsEncRecI4x4Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, uint8_t uiI4x4Idx) {
SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList;
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
int32_t iEncStride = pCurDqLayer->iEncStride[0];
uint8_t uiQp = pCurMb->uiLumaQp;
int16_t* pResI4x4 = pMbCache->pCoeffLevel;
uint8_t* pPredI4x4;
uint8_t* pPred = pMbCache->SPicData.pCsMb[0];
int32_t iRecStride = pCurDqLayer->iCsStride[0];
uint32_t uiOffset = g_kuiMbCountScan4Idx[uiI4x4Idx];
uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0];
uint8_t* pBestPred = pMbCache->pBestPredI4x4Blk4;
int16_t* pBlock = pMbCache->pDct->iLumaBlock[uiI4x4Idx];
const int16_t* pMF = g_kiQuantMF[uiQp];
const int16_t* pFF = g_iQuantIntraFF[uiQp];
int32_t* pStrideEncBlockOffset = pEncCtx->pStrideTab->pStrideEncBlockOffset[pEncCtx->uiDependencyId];
int32_t* pStrideDecBlockOffset = pEncCtx->pStrideTab->pStrideDecBlockOffset[pEncCtx->uiDependencyId][0 ==
pEncCtx->uiTemporalId];
int32_t iNoneZeroCount = 0;
pFuncList->pfDctT4 (pResI4x4, & (pEncMb[pStrideEncBlockOffset[uiI4x4Idx]]), iEncStride, pBestPred, 4);
pFuncList->pfQuantization4x4 (pResI4x4, pFF, pMF);
pFuncList->pfScan4x4 (pBlock, pResI4x4);
iNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock);
pCurMb->pNonZeroCount[uiOffset] = iNoneZeroCount;
pPredI4x4 = pPred + pStrideDecBlockOffset[uiI4x4Idx];
if (iNoneZeroCount > 0) {
pCurMb->uiCbp |= 1 << (uiI4x4Idx >> 2);
pFuncList->pfDequantization4x4 (pResI4x4, g_kuiDequantCoeff[uiQp]);
pFuncList->pfIDctT4 (pPredI4x4, iRecStride, pBestPred, 4, pResI4x4);
} else
WelsCopy4x4 (pPredI4x4, iRecStride, pBestPred, 4);
}
void WelsEncInterY (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) {
PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max;
PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8;
PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64;
PScanFunc pfScan4x4 = pFuncList->pfScan4x4;
PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4;
PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount;
PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4;
int16_t* pRes = pMbCache->pCoeffLevel;
int32_t iSingleCtrMb = 0, iSingleCtr8x8[4];
int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
uint8_t uiQp = pCurMb->uiLumaQp;
const int16_t* pMF = g_kiQuantMF[uiQp];
const int16_t* pFF = g_kiQuantInterFF[uiQp];
int16_t aMax[16];
int32_t i, j, iNoneZeroCountMbDcAc = 0, iNoneZeroCount = 0;
for (i = 0; i < 4; i++) {
pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax + (i << 2));
iSingleCtr8x8[i] = 0;
for (j = 0; j < 4; j++) {
if (aMax[ (i << 2) + j] == 0)
pfSetMemZeroSize8 (pBlock, 32);
else {
pfScan4x4 (pBlock, pRes);
if (aMax[ (i << 2) + j] > 1)
iSingleCtr8x8[i] += 9;
else if (iSingleCtr8x8[i] < 6)
iSingleCtr8x8[i] += pfCalculateSingleCtr4x4 (pBlock);
}
pRes += 16;
pBlock += 16;
}
iSingleCtrMb += iSingleCtr8x8[i];
}
pBlock -= 256;
pRes -= 256;
memset (pCurMb->pNonZeroCount, 0, 16);
if (iSingleCtrMb < 6) { //from JVT-O079
iNoneZeroCountMbDcAc = 0;
pfSetMemZeroSize64 (pRes, 768); // confirmed_safe_unsafe_usage
} else {
const uint8_t* kpNoneZeroCountIdx = g_kuiMbCountScan4Idx;
for (i = 0; i < 4; i++) {
if (iSingleCtr8x8[i] >= 4) {
for (j = 0; j < 4; j++) {
iNoneZeroCount = pfGetNoneZeroCount (pBlock);
pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = iNoneZeroCount;
iNoneZeroCountMbDcAc += iNoneZeroCount;
pBlock += 16;
}
pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]);
pCurMb->uiCbp |= 1 << i;
} else { // set zero for an 8x8 pBlock
pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage
kpNoneZeroCountIdx += 4;
pBlock += 64;
}
pRes += 64;
}
}
}
void WelsEncRecUV (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache, int16_t* pRes, int32_t iUV) {
PQuantizationHadamardFunc pfQuantizationHadamard2x2 = pFuncList->pfQuantizationHadamard2x2;
PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max;
PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8;
PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64;
PScanFunc pfScan4x4Ac = pFuncList->pfScan4x4Ac;
PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4;
PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount;
PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4;
const int32_t kiInterFlag = !IS_INTRA (pCurMb->uiMbType);
const uint8_t kiQp = pCurMb->uiChromaQp;
uint8_t i, uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiNoneZeroCountMbDc = 0;
uint8_t uiNoneZeroCountOffset = (iUV - 1) << 1; //UV==1 or 2
uint8_t uiSubMbIdx = 16 + ((iUV - 1) << 2); //uiSubMbIdx == 16 or 20
int16_t* iChromaDc = pMbCache->pDct->iChromaDc[iUV - 1], *pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2];
int16_t aDct2x2[4], j, aMax[4];
int32_t iSingleCtr8x8 = 0;
const int16_t* pMF = g_kiQuantMF[kiQp];
const int16_t* pFF = g_kiQuantInterFF[ (!kiInterFlag) * 6 + kiQp];
uiNoneZeroCountMbDc = pfQuantizationHadamard2x2 (pRes, pFF[0] << 1, pMF[0]>>1, aDct2x2, iChromaDc);
pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax);
for (j = 0; j < 4; j++) {
if (aMax[j] == 0)
pfSetMemZeroSize8 (pBlock, 32);
else {
pfScan4x4Ac (pBlock, pRes);
if (kiInterFlag) {
if (aMax[j] > 1)
iSingleCtr8x8 += 9;
else if (iSingleCtr8x8 < 7)
iSingleCtr8x8 += pfCalculateSingleCtr4x4 (pBlock);
} else
iSingleCtr8x8 = INT_MAX;
}
pRes += 16;
pBlock += 16;
}
pRes -= 64;
if (iSingleCtr8x8 < 7) { //from JVT-O079
pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage
ST16 (&pCurMb->pNonZeroCount[16 + uiNoneZeroCountOffset], 0);
ST16 (&pCurMb->pNonZeroCount[20 + uiNoneZeroCountOffset], 0);
} else {
const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[uiSubMbIdx];
pBlock -= 64;
for (i = 0; i < 4; i++) {
uiNoneZeroCount = pfGetNoneZeroCount (pBlock);
pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount;
uiNoneZeroCountMbAc += uiNoneZeroCount;
pBlock += 16;
}
pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[pCurMb->uiChromaQp]);
pCurMb->uiCbp &= 0x0F;
pCurMb->uiCbp |= 0x20;
}
if (uiNoneZeroCountMbDc > 0) {
WelsDequantIHadamard2x2Dc (aDct2x2, g_kuiDequantCoeff[kiQp][0]);
if (2 != (pCurMb->uiCbp >> 4))
pCurMb->uiCbp |= (0x01 << 4) ;
pRes[0] = aDct2x2[0];
pRes[16] = aDct2x2[1];
pRes[32] = aDct2x2[2];
pRes[48] = aDct2x2[3];
}
}
void WelsRecPskip (SDqLayer* pCurLayer, SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) {
int32_t* iRecStride = pCurLayer->iCsStride;
uint8_t** pCsMb = &pMbCache->SPicData.pCsMb[0];
pFuncList->pfCopy16x16Aligned (pCsMb[0], *iRecStride++, pMbCache->pSkipMb, 16);
pFuncList->pfCopy8x8Aligned (pCsMb[1], *iRecStride++, pMbCache->pSkipMb + 256, 8);
pFuncList->pfCopy8x8Aligned (pCsMb[2], *iRecStride, pMbCache->pSkipMb + 320, 8);
pFuncList->pfSetMemZeroSize8 (pCurMb->pNonZeroCount, 24);
}
bool WelsTryPYskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) {
int32_t iSingleCtrMb = 0;
int16_t* pRes = pMbCache->pCoeffLevel;
const uint8_t kuiQp = pCurMb->uiLumaQp;
int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
uint16_t aMax[4], i, j;
const int16_t* pMF = g_kiQuantMF[kuiQp];
const int16_t* pFF = g_kiQuantInterFF[kuiQp];
for (i = 0; i < 4; i++) {
pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax);
for (j = 0; j < 4; j++) {
if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP
else if (aMax[j] == 1) {
pEncCtx->pFuncList->pfScan4x4 (pBlock, pRes); //
iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock);
}
if (iSingleCtrMb >= 6) return false; //from JVT-O079
pRes += 16;
pBlock += 16;
}
}
return true;
}
bool WelsTryPUVskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iUV) {
int16_t* pRes = ((iUV == 1) ? & (pMbCache->pCoeffLevel[256]) : & (pMbCache->pCoeffLevel[256 + 64]));
const uint8_t kuiQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp +
pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)];
const int16_t* pMF = g_kiQuantMF[kuiQp];
const int16_t* pFF = g_kiQuantInterFF[kuiQp];
if (pEncCtx->pFuncList->pfQuantizationHadamard2x2Skip (pRes, pFF[0] << 1, pMF[0]>>1))
return false;
else {
uint16_t aMax[4], j;
int32_t iSingleCtrMb = 0;
int16_t* pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2];
pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax);
for (j = 0; j < 4; j++) {
if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP
else if (aMax[j] == 1) {
pEncCtx->pFuncList->pfScan4x4Ac (pBlock, pRes);
iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock);
}
if (iSingleCtrMb >= 7) return false; //from JVT-O079
pRes += 16;
pBlock += 16;
}
return true;
}
}
} // namespace WelsEnc