ref: 1928b9a1ba6f7c18ebc058835f308c3d7016a1da
dir: /codec/encoder/core/src/svc_encode_mb.cpp/
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file encode_mb.c
*
* \brief Implementaion for pCurMb encoding
*
* \date 05/19/2009 Created
*************************************************************************************
*/
#include <stdio.h> //test use for file operation
#include <string.h>
#include "svc_encode_mb.h"
#include "encode_mb_aux.h"
#include "decode_mb_aux.h"
#include "ls_defines.h"
#include "cpu_core.h"
#include "as264_common.h"
#include "mb_cache.h"
#include "array_stack_align.h"
namespace WelsSVCEnc {
void WelsDctMb(int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4)
{
pfDctFourT4(pRes, pEncMb, iEncStride, pBestPred, 16);
pfDctFourT4(pRes + 64, pEncMb + 8, iEncStride, pBestPred + 8, 16);
pfDctFourT4(pRes + 128, pEncMb + 8 * iEncStride, iEncStride, pBestPred + 128, 16);
pfDctFourT4(pRes + 192, pEncMb + 8 * iEncStride + 8, iEncStride, pBestPred + 136, 16);
}
void WelsEncRecI16x16Y(sWelsEncCtx *pEncCtx, SMB *pCurMb, SMbCache *pMbCache)
{
ENFORCE_STACK_ALIGN_1D(int16_t, aDctT4Dc, 16, 16)
SWelsFuncPtrList *pFuncList = pEncCtx->pFuncList;
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
const int32_t kiEncStride = pCurDqLayer->iEncStride[0];
int16_t *pRes = pMbCache->pCoeffLevel;
uint8_t *pPred = pMbCache->SPicData.pCsMb[0];
const int32_t kiRecStride = pCurDqLayer->iCsStride[0];
int16_t *pBlock = pMbCache->pDct->iLumaBlock[0];
uint8_t *pBestPred = pMbCache->pMemPredLuma;
const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[0];
uint8_t i, uiQp = pCurMb->uiLumaQp;
uint32_t uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiCountI16x16Dc;
int16_t* pMF = g_kiQuantMF[uiQp], *pFF = g_iQuantIntraFF[uiQp];
WelsDctMb(pRes, pMbCache->SPicData.pEncMb[0], kiEncStride, pBestPred, pEncCtx->pFuncList->pfDctFourT4);
pFuncList->pfTransformHadamard4x4Dc(aDctT4Dc, pRes);
pFuncList->pfQuantizationDc4x4( aDctT4Dc, pFF[0]<<1, pMF[0]>>1);
pFuncList->pfScan4x4( pMbCache->pDct->iLumaI16x16Dc, aDctT4Dc);
uiCountI16x16Dc = pFuncList->pfGetNoneZeroCount(pMbCache->pDct->iLumaI16x16Dc);
for(i = 0; i < 4; i++)
{
pFuncList->pfQuantizationFour4x4(pRes, pFF, pMF);
pFuncList->pfScan4x4Ac(pBlock, pRes );
pFuncList->pfScan4x4Ac(pBlock + 16, pRes + 16 );
pFuncList->pfScan4x4Ac(pBlock + 32, pRes + 32 );
pFuncList->pfScan4x4Ac(pBlock + 48, pRes + 48 );
pRes += 64;
pBlock += 64;
}
pRes -= 256;
pBlock -= 256;
for(i=0; i<16; i++) {
uiNoneZeroCount = pFuncList->pfGetNoneZeroCount(pBlock);
pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount;
uiNoneZeroCountMbAc += uiNoneZeroCount;
pBlock += 16;
}
if( uiCountI16x16Dc > 0 ){
if(uiQp < 12)
{
WelsIHadamard4x4Dc(aDctT4Dc);
WelsDequantLumaDc4x4(aDctT4Dc, uiQp);
}
else
pFuncList->pfDequantizationIHadamard4x4(aDctT4Dc, g_kuiDequantCoeff[uiQp][0]>>2);
}
if( uiNoneZeroCountMbAc > 0 )
{
pCurMb->uiCbp = 15;
pFuncList->pfDequantizationFour4x4(pRes, g_kuiDequantCoeff[uiQp]);
pFuncList->pfDequantizationFour4x4(pRes+64, g_kuiDequantCoeff[uiQp]);
pFuncList->pfDequantizationFour4x4(pRes+128, g_kuiDequantCoeff[uiQp]);
pFuncList->pfDequantizationFour4x4(pRes+192, g_kuiDequantCoeff[uiQp]);
pRes[0] = aDctT4Dc[0]; pRes[16] = aDctT4Dc[1];
pRes[32] = aDctT4Dc[4]; pRes[48] = aDctT4Dc[5];
pRes[64] = aDctT4Dc[2]; pRes[80] = aDctT4Dc[3];
pRes[96] = aDctT4Dc[6]; pRes[112]= aDctT4Dc[7];
pRes[128]= aDctT4Dc[8]; pRes[144]= aDctT4Dc[9];
pRes[160]= aDctT4Dc[12]; pRes[176]= aDctT4Dc[13];
pRes[192]= aDctT4Dc[10]; pRes[208]= aDctT4Dc[11];
pRes[224]= aDctT4Dc[14]; pRes[240]= aDctT4Dc[15];
pFuncList->pfIDctFourT4(pPred, kiRecStride, pBestPred, 16, pRes );
pFuncList->pfIDctFourT4(pPred + 8, kiRecStride, pBestPred + 8, 16, pRes + 64 );
pFuncList->pfIDctFourT4(pPred + kiRecStride*8, kiRecStride, pBestPred + 128, 16, pRes + 128);
pFuncList->pfIDctFourT4(pPred + kiRecStride*8 + 8, kiRecStride, pBestPred + 136, 16, pRes + 192);
}
else if( uiCountI16x16Dc > 0 ){
pFuncList->pfIDctI16x16Dc(pPred, kiRecStride, pBestPred, 16, aDctT4Dc);
}
else{
pFuncList->pfCopy16x16Aligned(pPred, kiRecStride, pBestPred, 16);
}
}
void WelsEncRecI4x4Y( sWelsEncCtx *pEncCtx, SMB *pCurMb, SMbCache *pMbCache, uint8_t uiI4x4Idx)
{
SWelsFuncPtrList *pFuncList = pEncCtx->pFuncList;
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
int32_t iEncStride = pCurDqLayer->iEncStride[0];
uint8_t uiQp = pCurMb->uiLumaQp;
int16_t *pResI4x4 = pMbCache->pCoeffLevel;
uint8_t *pPredI4x4;
uint8_t *pPred = pMbCache->SPicData.pCsMb[0];
int32_t iRecStride = pCurDqLayer->iCsStride[0];
uint32_t uiOffset = g_kuiMbCountScan4Idx[uiI4x4Idx];
uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0];
uint8_t *pBestPred = pMbCache->pBestPredI4x4Blk4;
int16_t* pBlock = pMbCache->pDct->iLumaBlock[uiI4x4Idx];
int16_t *pMF = g_kiQuantMF[uiQp], *pFF = g_iQuantIntraFF[uiQp];
int32_t *pStrideEncBlockOffset = pEncCtx->pStrideTab->pStrideEncBlockOffset[pEncCtx->uiDependencyId];
int32_t *pStrideDecBlockOffset = pEncCtx->pStrideTab->pStrideDecBlockOffset[pEncCtx->uiDependencyId][0==pEncCtx->uiTemporalId];
int32_t iNoneZeroCount = 0;
pFuncList->pfDctT4( pResI4x4, &(pEncMb[pStrideEncBlockOffset[uiI4x4Idx]]), iEncStride, pBestPred, 4 );
pFuncList->pfQuantization4x4(pResI4x4, pFF, pMF);
pFuncList->pfScan4x4(pBlock, pResI4x4);
iNoneZeroCount = pFuncList->pfGetNoneZeroCount(pBlock);
pCurMb->pNonZeroCount[uiOffset] = iNoneZeroCount;
pPredI4x4 = pPred + pStrideDecBlockOffset[uiI4x4Idx];
if ( iNoneZeroCount > 0 )
{
pCurMb->uiCbp |= 1 << (uiI4x4Idx>>2);
pFuncList->pfDequantization4x4( pResI4x4, g_kuiDequantCoeff[uiQp]);
pFuncList->pfIDctT4(pPredI4x4, iRecStride, pBestPred, 4, pResI4x4);
}
else
WelsCopy4x4(pPredI4x4, iRecStride, pBestPred, 4);
}
void WelsEncInterY(SWelsFuncPtrList *pFuncList, SMB * pCurMb, SMbCache *pMbCache)
{
PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max;
PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8;
PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64;
PScanFunc pfScan4x4 = pFuncList->pfScan4x4;
PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4;
PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount;
PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4;
int16_t *pRes = pMbCache->pCoeffLevel;
int32_t iSingleCtrMb = 0, iSingleCtr8x8[4];
int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
uint8_t uiQp = pCurMb->uiLumaQp;
int16_t *pMF = g_kiQuantMF[uiQp], *pFF = g_kiQuantInterFF[uiQp], aMax[16];
int32_t i, j, iNoneZeroCountMbDcAc = 0, iNoneZeroCount=0;
for(i = 0; i < 4; i++)
{
pfQuantizationFour4x4Max(pRes, pFF, pMF, aMax+(i<<2));
iSingleCtr8x8[i] = 0;
for(j = 0; j < 4; j++)
{
if(aMax[(i<<2)+j] == 0)
pfSetMemZeroSize8(pBlock, 32);
else
{
pfScan4x4(pBlock, pRes);
if(aMax[(i<<2)+j] > 1)
iSingleCtr8x8[i] += 9;
else if(iSingleCtr8x8[i] < 6)
iSingleCtr8x8[i] += pfCalculateSingleCtr4x4(pBlock);
}
pRes += 16;
pBlock += 16;
}
iSingleCtrMb += iSingleCtr8x8[i];
}
pBlock -= 256;
pRes -= 256;
memset(pCurMb->pNonZeroCount, 0, 16);
if( iSingleCtrMb < 6 ) //from JVT-O079
{
iNoneZeroCountMbDcAc = 0;
pfSetMemZeroSize64( pRes, 768 ); // confirmed_safe_unsafe_usage
}
else
{
const uint8_t* kpNoneZeroCountIdx = g_kuiMbCountScan4Idx;
for(i = 0; i < 4; i++)
{
if( iSingleCtr8x8[i] >= 4 ){
for( j = 0; j < 4; j++ ){
iNoneZeroCount = pfGetNoneZeroCount(pBlock);
pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = iNoneZeroCount;
iNoneZeroCountMbDcAc += iNoneZeroCount;
pBlock += 16;
}
pfDequantizationFour4x4(pRes, g_kuiDequantCoeff[uiQp]);
pCurMb->uiCbp |= 1 << i;
}
else { // set zero for an 8x8 pBlock
pfSetMemZeroSize64(pRes, 128); // confirmed_safe_unsafe_usage
kpNoneZeroCountIdx += 4;
pBlock += 64;
}
pRes += 64;
}
}
}
void WelsEncRecUV(SWelsFuncPtrList *pFuncList, SMB * pCurMb, SMbCache *pMbCache, int16_t * pRes, int32_t iUV)
{
PQuantizationHadamardFunc pfQuantizationHadamard2x2 = pFuncList->pfQuantizationHadamard2x2;
PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max;
PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8;
PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64;
PScanFunc pfScan4x4Ac = pFuncList->pfScan4x4Ac;
PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4;
PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount;
PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4;
const int32_t kiInterFlag = !IS_INTRA( pCurMb->uiMbType);
const uint8_t kiQp = pCurMb->uiChromaQp;
uint8_t i, uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiNoneZeroCountMbDc = 0;
uint8_t uiNoneZeroCountOffset = (iUV - 1)<<1; //UV==1 or 2
uint8_t uiSubMbIdx = 16 + ((iUV - 1)<<2); //uiSubMbIdx == 16 or 20
int16_t* iChromaDc = pMbCache->pDct->iChromaDc[iUV-1], *pBlock = pMbCache->pDct->iChromaBlock[(iUV - 1)<<2];
int16_t aDct2x2[4], j, aMax[4];
int32_t iSingleCtr8x8 = 0;
int16_t* pMF = g_kiQuantMF[kiQp], *pFF = g_kiQuantInterFF[(!kiInterFlag)*6+kiQp];
uiNoneZeroCountMbDc = pfQuantizationHadamard2x2(pRes, pFF[0]<<1, pMF[0]>>1, aDct2x2, iChromaDc);
pfQuantizationFour4x4Max(pRes, pFF, pMF, aMax);
for(j = 0; j < 4; j++)
{
if(aMax[j] == 0)
pfSetMemZeroSize8(pBlock, 32);
else
{
pfScan4x4Ac(pBlock, pRes);
if(kiInterFlag)
{
if(aMax[j] > 1)
iSingleCtr8x8 += 9;
else if(iSingleCtr8x8 < 7)
iSingleCtr8x8 += pfCalculateSingleCtr4x4(pBlock);
}
else
iSingleCtr8x8 = INT_MAX;
}
pRes += 16;
pBlock += 16;
}
pRes -= 64;
if( iSingleCtr8x8 < 7 ) //from JVT-O079
{
pfSetMemZeroSize64(pRes, 128); // confirmed_safe_unsafe_usage
ST16( &pCurMb->pNonZeroCount[16+uiNoneZeroCountOffset], 0 );
ST16( &pCurMb->pNonZeroCount[20+uiNoneZeroCountOffset], 0 );
}
else
{
const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[uiSubMbIdx];
pBlock -= 64;
for(i=0; i<4; i++){
uiNoneZeroCount = pfGetNoneZeroCount(pBlock);
pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount;
uiNoneZeroCountMbAc += uiNoneZeroCount;
pBlock += 16;
}
pfDequantizationFour4x4(pRes, g_kuiDequantCoeff[pCurMb->uiChromaQp]);
pCurMb->uiCbp &= 0x0F;
pCurMb->uiCbp |= 0x20;
}
if (uiNoneZeroCountMbDc > 0)
{
WelsDequantIHadamard2x2Dc(aDct2x2, g_kuiDequantCoeff[kiQp][0] >> 1);
if ( 2 != (pCurMb->uiCbp >> 4) )
pCurMb->uiCbp |= (0x01 << 4) ;
pRes[0] = aDct2x2[0];
pRes[16] = aDct2x2[1];
pRes[32] = aDct2x2[2];
pRes[48] = aDct2x2[3];
}
}
void WelsRecPskip(SDqLayer *pCurLayer, SWelsFuncPtrList *pFuncList, SMB * pCurMb, SMbCache *pMbCache)
{
int32_t* iRecStride = pCurLayer->iCsStride;
uint8_t** pCsMb = &pMbCache->SPicData.pCsMb[0];
pFuncList->pfCopy16x16Aligned(pCsMb[0], *iRecStride++, pMbCache->pSkipMb, 16);
pFuncList->pfCopy8x8Aligned( pCsMb[1], *iRecStride++, pMbCache->pSkipMb + 256, 8);
pFuncList->pfCopy8x8Aligned( pCsMb[2], *iRecStride, pMbCache->pSkipMb + 320, 8);
pFuncList->pfSetMemZeroSize8( pCurMb->pNonZeroCount, 24 );
}
BOOL_T WelsTryPYskip(sWelsEncCtx * pEncCtx, SMB * pCurMb, SMbCache *pMbCache)
{
int32_t iSingleCtrMb = 0;
int16_t *pRes = pMbCache->pCoeffLevel;
const uint8_t kuiQp = pCurMb->uiLumaQp;
int16_t* pBlock = pMbCache->pDct->iLumaBlock[0];
uint16_t aMax[4], i, j;
int16_t* pMF = g_kiQuantMF[kuiQp], *pFF = g_kiQuantInterFF[kuiQp];
for(i = 0; i < 4; i++)
{
pEncCtx->pFuncList->pfQuantizationFour4x4Max(pRes, pFF, pMF, (int16_t*)aMax);
for(j = 0; j < 4; j++)
{
if(aMax[j] > 1) return FALSE; // iSingleCtrMb += 9, can't be P_SKIP
else if( aMax[j] == 1)
{
pEncCtx->pFuncList->pfScan4x4(pBlock, pRes); //
iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4(pBlock);
}
if(iSingleCtrMb >= 6) return FALSE; //from JVT-O079
pRes += 16;
pBlock += 16;
}
}
return TRUE;
}
BOOL_T WelsTryPUVskip(sWelsEncCtx * pEncCtx, SMB * pCurMb, SMbCache *pMbCache, int32_t iUV)
{
int16_t* pRes = ((iUV == 1) ? &(pMbCache->pCoeffLevel[256]):&(pMbCache->pCoeffLevel[256+64]));
const uint8_t kuiQp = g_kuiChromaQpTable[CLIP3_QP_0_51(pCurMb->uiLumaQp + pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)];
int16_t* pMF = g_kiQuantMF[kuiQp], *pFF = g_kiQuantInterFF[kuiQp];
if(pEncCtx->pFuncList->pfQuantizationHadamard2x2Skip(pRes, pFF[0]<<1, pMF[0]>>1))
return FALSE;
else
{
uint16_t aMax[4], j;
int32_t iSingleCtrMb = 0;
int16_t* pBlock = pMbCache->pDct->iChromaBlock[(iUV-1)<<2];
pEncCtx->pFuncList->pfQuantizationFour4x4Max(pRes, pFF, pMF, (int16_t*)aMax);
for(j = 0; j < 4; j++)
{
if( aMax[j] > 1) return FALSE; // iSingleCtrMb += 9, can't be P_SKIP
else if( aMax[j] == 1)
{
pEncCtx->pFuncList->pfScan4x4Ac(pBlock, pRes);
iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4(pBlock);
}
if(iSingleCtrMb >= 7) return FALSE; //from JVT-O079
pRes += 16;
pBlock += 16;
}
return TRUE;
}
}
} // namespace WelsSVCEnc