ref: 661988ada76b04a7bf0ce902c9d74174e2590c36
dir: /codec/encoder/core/src/decode_mb_aux.cpp/
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <string.h>
#include "decode_mb_aux.h"
#include "wels_common_basis.h"
#include "cpu_core.h"
namespace WelsSVCEnc {
/****************************************************************************
* Dequant and Ihdm functions
****************************************************************************/
void WelsIHadamard4x4Dc(int16_t *pRes) //pBuffer size : 4x4
{
int16_t iTemp[4];
int32_t i = 4;
while( --i >= 0 )
{
const int32_t kiIdx = i<<2;
const int32_t kiIdx1 = 1 + kiIdx;
const int32_t kiIdx2 = 1 + kiIdx1;
const int32_t kiIdx3 = 1 + kiIdx2;
iTemp[0] = pRes[kiIdx ] + pRes[kiIdx2];
iTemp[1] = pRes[kiIdx ] - pRes[kiIdx2];
iTemp[2] = pRes[kiIdx1] - pRes[kiIdx3];
iTemp[3] = pRes[kiIdx1] + pRes[kiIdx3];
pRes[kiIdx ] = iTemp[0] + iTemp[3];
pRes[kiIdx1] = iTemp[1] + iTemp[2];
pRes[kiIdx2] = iTemp[1] - iTemp[2];
pRes[kiIdx3] = iTemp[0] - iTemp[3];
}
i = 4;
while( --i >= 0 )
{
const int32_t kiI4 = 4 + i;
const int32_t kiI8 = 4 + kiI4;
const int32_t kiI12 = 4 + kiI8;
iTemp[0] = pRes[i ] + pRes[kiI8 ];
iTemp[1] = pRes[i ] - pRes[kiI8 ];
iTemp[2] = pRes[kiI4 ] - pRes[kiI12];
iTemp[3] = pRes[kiI4 ] + pRes[kiI12];
pRes[i ] = iTemp[0] + iTemp[3];
pRes[kiI4 ] = iTemp[1] + iTemp[2];
pRes[kiI8 ] = iTemp[1] - iTemp[2];
pRes[kiI12] = iTemp[0] - iTemp[3];
}
}
/* for qp < 12 */
void WelsDequantLumaDc4x4(int16_t *pRes, const int32_t kiQp)
{
int32_t i = 15;
const uint16_t kuiDequantValue = g_kuiDequantCoeff[kiQp%6][0];
const int16_t kiQF0 = kiQp / 6;
const int16_t kiQF1 = 2 - kiQF0;
const int16_t kiQF0S = 1 << (1 - kiQF0);
while ( i >= 0 )
{
pRes[i ] = ( pRes[i ] * kuiDequantValue + kiQF0S ) >> kiQF1;
pRes[i-1] = ( pRes[i-1] * kuiDequantValue + kiQF0S ) >> kiQF1;
pRes[i-2] = ( pRes[i-2] * kuiDequantValue + kiQF0S ) >> kiQF1;
pRes[i-3] = ( pRes[i-3] * kuiDequantValue + kiQF0S ) >> kiQF1;
i -= 4;
}
}
/* for qp >= 12 */
void WelsDequantIHadamard4x4_c(int16_t *pRes, const uint16_t kuiMF)
{
int16_t iTemp[4];
int32_t i;
for(i = 0; i < 16; i += 4)
{
iTemp[0] = pRes[i ] + pRes[i+2];
iTemp[1] = pRes[i ] - pRes[i+2];
iTemp[2] = pRes[i+1] - pRes[i+3];
iTemp[3] = pRes[i+1] + pRes[i+3];
pRes[i ] = iTemp[0] + iTemp[3];
pRes[i+1] = iTemp[1] + iTemp[2];
pRes[i+2] = iTemp[1] - iTemp[2];
pRes[i+3] = iTemp[0] - iTemp[3];
}
for(i = 0; i < 4; i++)
{
iTemp[0] = pRes[i ] + pRes[i+8 ];
iTemp[1] = pRes[i ] - pRes[i+8 ];
iTemp[2] = pRes[i+4 ] - pRes[i+12];
iTemp[3] = pRes[i+4 ] + pRes[i+12];
pRes[i ] = (iTemp[0] + iTemp[3]) * kuiMF;
pRes[i+4 ] = (iTemp[1] + iTemp[2]) * kuiMF;
pRes[i+8 ] = (iTemp[1] - iTemp[2]) * kuiMF;
pRes[i+12] = (iTemp[0] - iTemp[3]) * kuiMF;
}
}
void WelsDequantIHadamard2x2Dc( int16_t* pDct, const uint16_t kuiMF)
{
const int16_t kiSumU = pDct[0] + pDct[2];
const int16_t kiDelU = pDct[0] - pDct[2];
const int16_t kiSumD = pDct[1] + pDct[3];
const int16_t kiDelD = pDct[1] - pDct[3];
pDct[0] = (kiSumU + kiSumD) * kuiMF;
pDct[1] = (kiSumU - kiSumD) * kuiMF;
pDct[2] = (kiDelU + kiDelD) * kuiMF;
pDct[3] = (kiDelU - kiDelD) * kuiMF;
}
void WelsDequant4x4_c(int16_t *pRes, const uint16_t* kpMF)
{
int32_t i;
for(i = 0; i < 8; i++)
{
pRes[i] *= kpMF[i];
pRes[i+8] *=kpMF[i];
}
}
void WelsDequantFour4x4_c(int16_t *pRes, const uint16_t* kpMF)
{
int32_t i;
for(i = 0; i < 8; i++)
{
pRes[i] *= kpMF[i];
pRes[i+8] *= kpMF[i];
pRes[i+16]*= kpMF[i];
pRes[i+24]*= kpMF[i];
pRes[i+32]*= kpMF[i];
pRes[i+40]*= kpMF[i];
pRes[i+48]*= kpMF[i];
pRes[i+56]*= kpMF[i];
}
}
/****************************************************************************
* IDCT functions, final output = prediction(CS) + IDCT(scaled_coeff)
****************************************************************************/
void WelsIDctT4Rec_c( uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct )
{
int32_t i;
int16_t iTemp[16];
int32_t iDstStridex2 = iStride << 1;
int32_t iDstStridex3 = iStride + iDstStridex2;
int32_t iPredStridex2 = iPredStride << 1;
int32_t iPredStridex3 = iPredStride + iPredStridex2;
for (i = 0; i < 4; i ++) //horizon
{
int32_t iIdx = i << 2;
const int32_t kiHorSumU = pDct[iIdx] + pDct[iIdx+2]; // add 0-2
const int32_t kiHorDelU = pDct[iIdx] - pDct[iIdx+2]; // sub 0-2
const int32_t kiHorSumD = pDct[iIdx+1] + (pDct[iIdx+3] >> 1);
const int32_t kiHorDelD = (pDct[iIdx+1] >> 1) - pDct[iIdx+3];
iTemp[iIdx ] = kiHorSumU + kiHorSumD;
iTemp[iIdx+1] = kiHorDelU + kiHorDelD;
iTemp[iIdx+2] = kiHorDelU - kiHorDelD;
iTemp[iIdx+3] = kiHorSumU - kiHorSumD;
}
for (i = 0; i < 4; i ++) //vertical
{
const int32_t kiVerSumL = iTemp[i] + iTemp[8+i];
const int32_t kiVerDelL = iTemp[i] - iTemp[8+i];
const int32_t kiVerDelR = (iTemp[4+i] >> 1) - iTemp[12+i];
const int32_t kiVerSumR = iTemp[4+i] + (iTemp[12+i] >> 1);
pRec[i ] = WELS_CLIP1( pPred[i ] + ((kiVerSumL + kiVerSumR + 32) >> 6) );
pRec[iStride+i ] = WELS_CLIP1( pPred[iPredStride+i ] + ((kiVerDelL + kiVerDelR + 32) >> 6) );
pRec[iDstStridex2 + i] = WELS_CLIP1( pPred[iPredStridex2+i] + ((kiVerDelL - kiVerDelR + 32) >> 6) );
pRec[iDstStridex3 + i] = WELS_CLIP1( pPred[iPredStridex3+i] + ((kiVerSumL - kiVerSumR + 32) >> 6) );
}
}
void WelsIDctFourT4Rec_c( uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct )
{
int32_t iDstStridex4 = iStride << 2;
int32_t iPredStridex4 = iPredStride << 2;
WelsIDctT4Rec_c( pRec, iStride, pPred, iPredStride, pDct );
WelsIDctT4Rec_c( &pRec[4], iStride, &pPred[4], iPredStride, pDct+16 );
WelsIDctT4Rec_c( &pRec[iDstStridex4 ], iStride, &pPred[iPredStridex4 ], iPredStride, pDct+32 );
WelsIDctT4Rec_c( &pRec[iDstStridex4+4], iStride, &pPred[iPredStridex4+4], iPredStride, pDct+48 );
}
void WelsIDctT4RecOnMb(uint8_t* pDst, int32_t iDstStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct, PIDctFunc pfIDctFourT4)
{
int32_t iDstStridex8 = iDstStride << 3;
int32_t iPredStridex8 = iPredStride << 3;
pfIDctFourT4(&pDst[0], iDstStride, &pPred[0], iPredStride, pDct);
pfIDctFourT4(&pDst[8], iDstStride, &pPred[8], iPredStride, pDct+64);
pfIDctFourT4(&pDst[iDstStridex8], iDstStride, &pPred[iPredStridex8], iPredStride, pDct+128);
pfIDctFourT4(&pDst[iDstStridex8+8], iDstStride, &pPred[iPredStridex8+8], iPredStride, pDct+192);
}
/*
* pfIDctI16x16Dc: do luma idct of an MB for I16x16 mode, when only dc value are non-zero
*/
void WelsIDctRecI16x16Dc_c(uint8_t *pRec, int32_t iStride, uint8_t *pPred, int32_t iPredStride, int16_t *pDctDc)
{
int32_t i, j;
for (i = 0; i < 16; i ++)
{
for(j = 0; j < 16; j++)
{
pRec[j] = WELS_CLIP1( pPred[j] + ((pDctDc[(i&0x0C) + (j>>2)] + 32) >> 6) );
}
pRec += iStride;
pPred += iPredStride;
}
}
void WelsGetEncBlockStrideOffset(int32_t *pBlock, const int32_t kiStrideY, const int32_t kiStrideUV)
{
int32_t i, j, k, r;
for(j = 0; j < 4; j++)
{
i = j << 2;
k = (j&0x01) << 1;
r = j&0x02;
pBlock[i] = (0 + k + (0 + r) * kiStrideY) << 2;
pBlock[i+1] = (1 + k + (0 + r) * kiStrideY) << 2;
pBlock[i+2] = (0 + k + (1 + r) * kiStrideY) << 2;
pBlock[i+3] = (1 + k + (1 + r) * kiStrideY) << 2;
pBlock[16+j] =
pBlock[20+j] = ((j&0x01) + r * kiStrideUV) << 2;
}
}
void WelsInitReconstructionFuncs( SWelsFuncPtrList *pFuncList, uint32_t uiCpuFlag )
{
pFuncList->pfDequantization4x4 = WelsDequant4x4_c;
pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_c;
pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_c;
pFuncList->pfIDctT4 = WelsIDctT4Rec_c;
pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_c;
pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_c;
#if defined(X86_ASM)
if ( uiCpuFlag & WELS_CPU_MMXEXT )
{
pFuncList->pfIDctT4 = WelsIDctT4Rec_mmx;
}
if ( uiCpuFlag & WELS_CPU_SSE2 )
{
pFuncList->pfDequantization4x4 = WelsDequant4x4_sse2;
pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_sse2;
pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_sse2;
pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_sse2;
pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2;
}
#endif//X86_ASM
}
}