ref: 661988ada76b04a7bf0ce902c9d74174e2590c36
dir: /codec/encoder/core/src/slice_multi_threading.cpp/
/*!
* \copy
* Copyright (c) 2010-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file slice_multi_threading.h
*
* \brief pSlice based multiple threading
*
* \date 04/16/2010 Created
*
*************************************************************************************
*/
#if defined(MT_ENABLED)
#include <assert.h>
#ifdef __GNUC__
#include <semaphore.h>
#ifndef SEM_NAME_MAX
// length of semaphore name should be system constrained at least on mac 10.7
#define SEM_NAME_MAX 32
#endif//SEM_NAME_MAX
#endif//__GNUC__
#include "slice_multi_threading.h"
#include "mt_defs.h"
#include "nal_encap.h"
#include "utils.h"
#include "encoder.h"
#include "svc_encode_slice.h"
#include "deblocking.h"
#include "svc_enc_golomb.h"
#include "crt_util_safe_x.h" // for safe crt like calls
#include "rc.h"
#if defined(X86_ASM)
#include "cpu.h"
#endif//X86_ASM
#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#include "measure_time.h"
#endif//DYNAMIC_SLICE_ASSIGN
namespace WelsSVCEnc {
void UpdateMbListNeighborParallel( SSliceCtx *pSliceCtx,
SMB *pMbList,
const int32_t uiSliceIdc )
{
const uint8_t *kpMbMap = pSliceCtx->pOverallMbMap;
const int32_t kiMbWidth = pSliceCtx->iMbWidth;
int32_t iIdx = pSliceCtx->pFirstMbInSlice[uiSliceIdc];
const int32_t kiEndMbInSlice = iIdx + pSliceCtx->pCountMbNumInSlice[uiSliceIdc] - 1;
do {
SMB *pMb = &pMbList[iIdx];
uint32_t uiNeighborAvailFlag = 0;
const int32_t kiMbXY = pMb->iMbXY;
const int32_t kiMbX = pMb->iMbX;
const int32_t kiMbY = pMb->iMbY;
BOOL_T bLeft;
BOOL_T bTop;
BOOL_T bLeftTop;
BOOL_T bRightTop;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
iLeftXY = kiMbXY - 1;
iTopXY = kiMbXY - kiMbWidth;
iLeftTopXY = iTopXY - 1;
iRightTopXY = iTopXY + 1;
bLeft = (kiMbX > 0) && (uiSliceIdc == kpMbMap[iLeftXY]);
bTop = (kiMbY > 0) && (uiSliceIdc == kpMbMap[iTopXY]);
bLeftTop = (kiMbX > 0) && (kiMbY > 0) && (uiSliceIdc == kpMbMap[iLeftTopXY]);
bRightTop = (kiMbX < (kiMbWidth-1)) && (kiMbY > 0) && (uiSliceIdc == kpMbMap[iRightTopXY]);
if( bLeft ){
uiNeighborAvailFlag |= LEFT_MB_POS;
}
if( bTop ){
uiNeighborAvailFlag |= TOP_MB_POS;
}
if( bLeftTop ){
uiNeighborAvailFlag |= TOPLEFT_MB_POS;
}
if( bRightTop ){
uiNeighborAvailFlag |= TOPRIGHT_MB_POS;
}
pMb->uiNeighborAvail = (uint8_t)uiNeighborAvailFlag;
pMb->uiSliceIdc = uiSliceIdc;
++ iIdx;
} while(iIdx <= kiEndMbInSlice);
}
void CalcSliceComplexRatio( void *pRatio, SSliceCtx *pSliceCtx, uint32_t *pSliceConsume )
{
float *pRatioList = (float *)pRatio;
float fAvI[MAX_SLICES_NUM];
float fSumAv = .0f;
uint32_t *pSliceTime = (uint32_t *)pSliceConsume;
int32_t *pCountMbInSlice = (int32_t *)pSliceCtx->pCountMbNumInSlice;
const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame;
int32_t iSliceIdx = 0;
#if defined(X86_ASM)
WelsEmms();
#endif //X86_ASM
while ( iSliceIdx < kiSliceCount )
{
fAvI[iSliceIdx] = 1.0f * pCountMbInSlice[iSliceIdx] / pSliceTime[iSliceIdx];
#if defined(ENABLE_TRACE_MT)
WelsLog(NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d\n", iSliceIdx, pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]);
#endif//ENABLE_TRACE_MT
fSumAv += fAvI[iSliceIdx];
++ iSliceIdx;
}
while ( -- iSliceIdx >= 0 )
{
pRatioList[iSliceIdx] = fAvI[iSliceIdx] / fSumAv;
}
}
#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) && defined(NOT_ABSOLUTE_BALANCING)
int32_t NeedDynamicAdjust( void *pConsumeTime, const int32_t iSliceNum )
{
#if !defined(USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING)
const float fRatioLower = TOLERANT_BALANCING_RATIO_LOWER( uiSliceNum );
const float fRatioUpper = TOLERANT_BALANCING_RATIO_UPPER( uiSliceNum );
#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
uint32_t *pSliceConsume = (uint32_t *)pConsumeTime;
uint32_t uiTotalConsume = 0;
int32_t iSliceIdx = 0;
int32_t iNeedAdj = false;
#if defined(X86_ASM)
WelsEmms();
#endif //X86_ASM
while( iSliceIdx < iSliceNum )
{
uiTotalConsume += pSliceConsume[iSliceIdx] + pSliceConsume[1+iSliceIdx];
iSliceIdx += 2;
}
if (uiTotalConsume == 0)
{
#if defined(ENABLE_TRACE_MT)
WelsLog( NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d\n", iSliceNum );
#endif//ENABLE_TRACE_MT
return false;
}
iSliceIdx = 0;
#if defined(USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING)
float fThr = EPSN; // threshold for various cores cases
float fRmse = .0f; // root mean square error of pSlice consume ratios
const float kfMeanRatio = 1.0f / iSliceNum;
do{
const float fRatio = 1.0f * pSliceConsume[iSliceIdx] / uiTotalConsume;
const float fDiffRatio = fRatio - kfMeanRatio;
fRmse += (fDiffRatio * fDiffRatio);
++ iSliceIdx;
} while ( iSliceIdx+1 < iSliceNum );
fRmse = sqrtf(fRmse/iSliceNum);
if ( iSliceNum >= 8 )
{
fThr += THRESHOLD_RMSE_CORE8;
}
else if ( iSliceNum >= 4 )
{
fThr += THRESHOLD_RMSE_CORE4;
}
else if ( iSliceNum >= 2 )
{
fThr += THRESHOLD_RMSE_CORE2;
}
else
fThr = 1.0f;
if ( fRmse > fThr )
iNeedAdj = true;
#if defined(ENABLE_TRACE_MT)
WelsLog(NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d\n",
iNeedAdj, fRmse, fThr, iSliceNum);
#endif//ENABLE_TRACE_MT
#else
do{
const float kfRatio = 1.0f * pSliceConsume[uiSliceIdx] / uiTotalConsume;
if ( kfRatio+EPSN < fRatioLower || kfRatio > ratio_upper+EPSN )
{
#if defined(ENABLE_TRACE_MT)
WelsLog(NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein adjustment decision is made by pSlice consume time not balanced at all, uiSliceIdx= %d, comp_ratio= %.6f, pSliceConsumeTime= %d, total_consume_time= %d, iCountSliceNum= %d\n",
uiSliceIdx, kfRatio, pSliceConsume[uiSliceIdx], uiTotalConsume, uiSliceNum);
#endif//ENABLE_TRACE_MT
iNeedAdj = true;
break;
}
++ uiSliceIdx;
} while ( uiSliceIdx+1 < uiSliceNum );
#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
return iNeedAdj;
}
#endif//..
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
void DynamicAdjustSlicing( sWelsEncCtx *pCtx,
SDqLayer *pCurDqLayer,
void *pComplexRatio,
int32_t iCurDid )
{
SSliceCtx *pSliceCtx = pCurDqLayer->pSliceEncCtx;
const int32_t kiCountSliceNum = pSliceCtx->iSliceNumInFrame;
const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame;
int32_t iMinimalMbNum = pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required
int32_t iMaximalMbNum = 0; // dynamically assign later
float *pSliceComplexRatio = (float *)pComplexRatio;
int32_t iMbNumLeft = kiCountNumMb;
int32_t iRunLen[MAX_THREADS_NUM] = {0};
int32_t iSliceIdx = 0;
int32_t iNumMbInEachGom;
SWelsSvcRc *pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid];
if(pCtx->pSvcParam->bEnableRc)
{
iNumMbInEachGom = pWelsSvcRc->iNumberMbGom;
if ( iNumMbInEachGom <= 0 )
{
WelsLog(pCtx, WELS_LOG_ERROR, "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d\n", iNumMbInEachGom, iCurDid, kiCountNumMb);
return;
}
// do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment,
// extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result.
if ( iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb )
{
return;
}
iMinimalMbNum = iNumMbInEachGom;
}
if ( kiCountSliceNum < 2 || (kiCountSliceNum & 0x01) ) // we need suppose uiSliceNum is even for multiple threading
return;
iMaximalMbNum = kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum;
#if defined(X86_ASM)
WelsEmms();
#endif //X86_ASM
#if defined(ENABLE_TRACE_MT)
WelsLog(pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d\n", iCurDid, kiCountNumMb);
#endif//ENABLE_TRACE_MT
iSliceIdx = 0;
while (iSliceIdx+1 < kiCountSliceNum) {
int32_t iNumMbAssigning = (int32_t)(kiCountNumMb * pSliceComplexRatio[iSliceIdx] + EPSN);
// GOM boundary aligned
if(pCtx->pSvcParam->bEnableRc)
{
iNumMbAssigning=(int32_t)(1.0f * iNumMbAssigning / iNumMbInEachGom + 0.5f + EPSN) * iNumMbInEachGom;
}
// make sure one GOM at least in each pSlice for safe
if ( iNumMbAssigning < iMinimalMbNum )
iNumMbAssigning = iMinimalMbNum;
else if ( iNumMbAssigning > iMaximalMbNum )
iNumMbAssigning = iMaximalMbNum;
assert( iNumMbAssigning > 0 );
iMbNumLeft -= iNumMbAssigning;
if ( iMbNumLeft <= 0 ) // error due to we can not support slice_skip now yet, do not adjust this time
{
assert( 0 );
return;
}
iRunLen[iSliceIdx] = iNumMbAssigning;
#if defined(ENABLE_TRACE_MT)
WelsLog(pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), uiSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d\n",
iSliceIdx, pSliceComplexRatio[iSliceIdx], pSliceCtx->pCountMbNumInSlice[iSliceIdx], iNumMbAssigning);
#endif//ENABLE_TRACE_MT
++ iSliceIdx;
iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts
}
iRunLen[iSliceIdx] = iMbNumLeft;
#if defined(ENABLE_TRACE_MT)
WelsLog(pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d\n",
iSliceIdx, pSliceComplexRatio[iSliceIdx], pSliceCtx->pCountMbNumInSlice[iSliceIdx], iMbNumLeft);
#endif//ENABLE_TRACE_MT
if ( DynamicAdjustSlicePEncCtxAll( pSliceCtx, iRunLen ) == 0 )
{
const int32_t kiThreadNum = pCtx->pSvcParam->iCountThreadsNum;
int32_t iThreadIdx = 0;
do {
#ifdef WIN32
WelsEventSignal( &pCtx->pSliceThreading->pUpdateMbListEvent[iThreadIdx] );
#else
WelsEventSignal( pCtx->pSliceThreading->pUpdateMbListEvent[iThreadIdx] );
#endif//WIN32
++ iThreadIdx;
} while(iThreadIdx < kiThreadNum);
WelsMultipleEventsWaitAllBlocking( kiThreadNum, &pCtx->pSliceThreading->pFinUpdateMbListEvent[0] );
}
}
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef PACKING_ONE_SLICE_PER_LAYER
void ResetEnvMt( sWelsEncCtx *pCtx)
{
const int16_t kiSliceCount = pCtx->iMaxSliceCount;
int32_t iIdx = 0;
while ( iIdx < kiSliceCount )
{
SWelsSliceBs *pSliceBs = &pCtx->pSliceBs[iIdx];
pSliceBs->uiBsPos = 0;
++ iIdx;
}
}
#endif//PACKING_ONE_SLICE_PER_LAYER
int32_t RequestMtResource( sWelsEncCtx **ppCtx, SWelsSvcCodingParam *pCodingParam, const int32_t iCountBsLen, const int32_t iTargetSpatialBsSize )
{
CMemoryAlign *pMa = NULL;
SWelsSvcCodingParam *pPara= NULL;
SSliceThreading *pSmt = NULL;
SWelsSliceBs *pSliceB = NULL;
uint8_t *pBsBase = NULL;
int32_t iNumSpatialLayers = 0;
int32_t iThreadNum = 0;
int32_t iIdx = 0;
int32_t iSliceBsBufferSize= 0;
int16_t iMaxSliceNum = 1;
if ( NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0 )
return 1;
pMa = (*ppCtx)->pMemAlign;
pPara= pCodingParam;
iNumSpatialLayers = pPara->iNumDependencyLayer;
iThreadNum = pPara->iCountThreadsNum;
iMaxSliceNum = (*ppCtx)->iMaxSliceCount;
pSmt = (SSliceThreading *)pMa->WelsMalloc(sizeof(SSliceThreading), "SSliceThreading");
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt), FreeMemorySvc(ppCtx) )
(*ppCtx)->pSliceThreading = pSmt;
pSmt->pThreadPEncCtx = (SSliceThreadPrivateData *)pMa->WelsMalloc( sizeof(SSliceThreadPrivateData) * iThreadNum, "pThreadPEncCtx" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pThreadPEncCtx), FreeMemorySvc(ppCtx) )
pSmt->pThreadHandles = (WELS_THREAD_HANDLE *)pMa->WelsMalloc( sizeof(WELS_THREAD_HANDLE) * iThreadNum, "pThreadHandles" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pThreadHandles), FreeMemorySvc(ppCtx) )
#ifdef WIN32
pSmt->pSliceCodedEvent = (WELS_EVENT *)pMa->WelsMalloc( sizeof(WELS_EVENT) * iThreadNum, "pSliceCodedEvent" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pSliceCodedEvent), FreeMemorySvc(ppCtx) )
pSmt->pReadySliceCodingEvent = (WELS_EVENT *)pMa->WelsMalloc( sizeof(WELS_EVENT) * iThreadNum, "pReadySliceCodingEvent" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pReadySliceCodingEvent), FreeMemorySvc(ppCtx) )
pSmt->pFinSliceCodingEvent = (WELS_EVENT *)pMa->WelsMalloc( sizeof(WELS_EVENT) * iThreadNum, "pFinSliceCodingEvent" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pFinSliceCodingEvent), FreeMemorySvc(ppCtx) )
#endif//WIN32
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__)
pSmt->pUpdateMbListThrdHandles = (WELS_THREAD_HANDLE *)pMa->WelsMalloc( sizeof(WELS_THREAD_HANDLE) * iThreadNum, "pUpdateMbListThrdHandles" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pUpdateMbListThrdHandles), FreeMemorySvc(ppCtx) )
#endif//__GNUC__
#ifdef WIN32
pSmt->pUpdateMbListEvent = (WELS_EVENT *)pMa->WelsMalloc( sizeof(WELS_EVENT) * iThreadNum, "pUpdateMbListEvent" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pUpdateMbListEvent), FreeMemorySvc(ppCtx) )
pSmt->pFinUpdateMbListEvent = (WELS_EVENT *)pMa->WelsMalloc( sizeof(WELS_EVENT) * iThreadNum, "pFinUpdateMbListEvent" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pFinUpdateMbListEvent), FreeMemorySvc(ppCtx) )
#endif//WIN32
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef WIN32
pSmt->pExitEncodeEvent = (WELS_EVENT *)pMa->WelsMalloc( sizeof(WELS_EVENT) * iThreadNum, "pExitEncodeEvent" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pExitEncodeEvent), FreeMemorySvc(ppCtx) )
#endif//WIN32
#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
iIdx = 0;
while ( iIdx < iNumSpatialLayers )
{
SMulSliceOption *pMso = &pPara->sDependencyLayers[iIdx].sMso;
const int32_t kiSliceNum= pMso->sSliceArgument.iSliceNum;
if (pMso->uiSliceMode == SM_FIXEDSLCNUM_SLICE && pPara->iMultipleThreadIdc > 1 && pPara->iMultipleThreadIdc >= kiSliceNum )
{
pSmt->pSliceConsumeTime[iIdx] = (uint32_t *)pMa->WelsMallocz( kiSliceNum * sizeof(uint32_t), "pSliceConsumeTime[]" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc(ppCtx) )
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
pSmt->pSliceComplexRatio[iIdx] = (float *)pMa->WelsMalloc( kiSliceNum * sizeof(float), "pSliceComplexRatio[]" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc(ppCtx) )
#endif//TRY_SLICING_BALANCE
}
else
{
pSmt->pSliceConsumeTime[iIdx] = NULL;
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
pSmt->pSliceComplexRatio[iIdx] = NULL;
#endif//TRY_SLICING_BALANCE
}
++ iIdx;
}
// NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#ifdef MT_DEBUG
// file handle for MT debug
pSmt->pFSliceDiff = NULL;
if ( pSmt->pFSliceDiff )
{
fclose( pSmt->pFSliceDiff );
pSmt->pFSliceDiff = NULL;
}
#ifdef WIN32
pSmt->pFSliceDiff = fopen(".\\slice_time.txt", "wt+" );
#else
pSmt->pFSliceDiff = fopen("/tmp/slice_time.txt", "wt+" );
#endif//WIN32
#endif//MT_DEBUG
#if defined(ENABLE_TRACE_MT)
WelsLog((*ppCtx), WELS_LOG_INFO, "encpEncCtx= 0x%p\n", (void *)(*ppCtx));
#endif//ENABLE_TRACE_MT
iIdx = 0;
while ( iIdx < iThreadNum )
{
#ifdef __GNUC__ // for posix threading
str_t name[SEM_NAME_MAX] = {0};
int32_t used_len = 0;
WELS_THREAD_ERROR_CODE err = 0;
#endif//__GNUC__
pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void *)(*ppCtx);
pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
pSmt->pThreadHandles[iIdx] = 0;
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef WIN32
WelsEventInit( &pSmt->pUpdateMbListEvent[iIdx] );
WelsEventInit( &pSmt->pFinUpdateMbListEvent[iIdx] );
#else
// length of semaphore name should be system constrained at least on mac 10.7
SNPRINTF( name, SEM_NAME_MAX, "ud%d%p", iIdx, (void *)(*ppCtx) );
err = WelsEventOpen( &pSmt->pUpdateMbListEvent[iIdx], name );
#if defined(ENABLE_TRACE_MT)
WelsLog((*ppCtx), WELS_LOG_INFO, "[MT] Open pUpdateMbListEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
#endif
used_len = SNPRINTF( name, SEM_NAME_MAX, "fu%d%p", iIdx, (void *)(*ppCtx) );
name[used_len] = '\0';
err = WelsEventOpen( &pSmt->pFinUpdateMbListEvent[iIdx], name );
#if defined(ENABLE_TRACE_MT)
WelsLog((*ppCtx), WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
#endif
#endif//WIN32
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef WIN32
WelsEventInit( &pSmt->pSliceCodedEvent[iIdx] );
WelsEventInit( &pSmt->pReadySliceCodingEvent[iIdx] );
WelsEventInit( &pSmt->pFinSliceCodingEvent[iIdx] );
WelsEventInit( &pSmt->pExitEncodeEvent[iIdx] );
#else
used_len = SNPRINTF( name, SEM_NAME_MAX, "sc%d%p", iIdx, (void *)(*ppCtx) );
name[used_len] = '\0';
err = WelsEventOpen( &pSmt->pSliceCodedEvent[iIdx], name );
#if defined(ENABLE_TRACE_MT)
WelsLog((*ppCtx), WELS_LOG_INFO, "[MT] Open pSliceCodedEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
#endif
used_len = SNPRINTF( name, SEM_NAME_MAX, "rc%d%p", iIdx, (void *)(*ppCtx) );
name[used_len] = '\0';
err = WelsEventOpen( &pSmt->pReadySliceCodingEvent[iIdx], name );
#if defined(ENABLE_TRACE_MT)
WelsLog((*ppCtx), WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d\n", iIdx, (void *)pSmt->pReadySliceCodingEvent[iIdx]), (void *)(*ppCtx), err, errno);
#endif
#endif//WIN32
++ iIdx;
}
#ifdef PACKING_ONE_SLICE_PER_LAYER
pSmt->pCountBsSizeInPartition = (uint32_t *)pMa->WelsMalloc( sizeof(uint32_t) * iThreadNum, "pCountBsSizeInPartition" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSmt->pCountBsSizeInPartition), FreeMemorySvc(ppCtx) )
#endif//PACKING_ONE_SLICE_PER_LAYER
WelsMutexInit( &pSmt->mutexSliceNumUpdate );
(*ppCtx)->pSliceBs = (SWelsSliceBs *)pMa->WelsMalloc( sizeof(SWelsSliceBs) * iMaxSliceNum, "pSliceBs" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc(ppCtx) )
pBsBase = (*ppCtx)->pFrameBs + iCountBsLen;
pSliceB = (*ppCtx)->pSliceBs;
iSliceBsBufferSize = iTargetSpatialBsSize;
iIdx = 0;
while ( iIdx < iMaxSliceNum )
{
pSliceB->pBsBuffer = (uint8_t *)pMa->WelsMalloc( iSliceBsBufferSize, "pSliceB->pBsBuffer" );
WELS_VERIFY_RETURN_PROC_IF( 1, (NULL == pSliceB->pBsBuffer), FreeMemorySvc(ppCtx) )
pSliceB->uiSize = iSliceBsBufferSize;
if ( iIdx > 0 )
{
pSliceB->pBs = pBsBase;
pSliceB->uiBsPos = 0;
pBsBase += iSliceBsBufferSize;
}
else
{
pSliceB->pBs = NULL;
pSliceB->uiBsPos = 0;
}
++ pSliceB;
++ iIdx;
}
#if defined(ENABLE_TRACE_MT)
WelsLog((*ppCtx), WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iCountSliceNum= %d\n", pPara->iCountThreadsNum, iMaxSliceNum);
#endif
return 0;
}
void ReleaseMtResource( sWelsEncCtx **ppCtx )
{
SWelsSliceBs *pSliceB = NULL;
SWelsSvcCodingParam *pCodingParam = NULL;
SSliceThreading *pSmt = NULL;
CMemoryAlign *pMa = NULL;
int32_t iIdx = 0;
int32_t iThreadNum = 0;
int16_t uiSliceNum = 0;
if ( NULL == ppCtx || NULL == *ppCtx )
return;
pMa = (*ppCtx)->pMemAlign;
pCodingParam = (*ppCtx)->pSvcParam;
uiSliceNum = (*ppCtx)->iMaxSliceCount;
iThreadNum = (*ppCtx)->pSvcParam->iCountThreadsNum;
pSmt = (*ppCtx)->pSliceThreading;
if ( NULL == pSmt )
return;
while ( iIdx < iThreadNum) {
#ifdef WIN32
if ( pSmt->pThreadHandles != NULL && pSmt->pThreadHandles[iIdx] != NULL )
WelsThreadDestroy( &pSmt->pThreadHandles[iIdx] );
if ( pSmt->pSliceCodedEvent != NULL )
WelsEventDestroy( &pSmt->pSliceCodedEvent[iIdx] );
if ( pSmt->pReadySliceCodingEvent != NULL )
WelsEventDestroy( &pSmt->pReadySliceCodingEvent[iIdx] );
if ( pSmt->pFinSliceCodingEvent != NULL )
WelsEventDestroy( &pSmt->pFinSliceCodingEvent[iIdx] );
if ( pSmt->pExitEncodeEvent != NULL )
WelsEventDestroy( &pSmt->pExitEncodeEvent[iIdx] );
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
if ( pSmt->pUpdateMbListEvent != NULL )
WelsEventDestroy( &pSmt->pUpdateMbListEvent[iIdx] );
if ( pSmt->pFinUpdateMbListEvent != NULL )
WelsEventDestroy( &pSmt->pFinUpdateMbListEvent[iIdx] );
#endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
#else
str_t ename[SEM_NAME_MAX] = {0};
int32_t used_len = 0;
// length of semaphore name should be system constrained at least on mac 10.7
SNPRINTF( ename, SEM_NAME_MAX, "sc%d%p", iIdx, (void *)(*ppCtx) );
WelsEventClose( pSmt->pSliceCodedEvent[iIdx], ename );
used_len = SNPRINTF( ename, SEM_NAME_MAX, "rc%d%p", iIdx, (void *)(*ppCtx) );
ename[used_len] = '\0';
WelsEventClose( pSmt->pReadySliceCodingEvent[iIdx], ename );
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
used_len = SNPRINTF( ename, SEM_NAME_MAX, "ud%d%p", iIdx, (void *)(*ppCtx) );
ename[used_len] = '\0';
WelsEventClose( pSmt->pUpdateMbListEvent[iIdx], ename );
used_len = SNPRINTF( ename, SEM_NAME_MAX, "fu%d%p", iIdx, (void *)(*ppCtx) );
ename[used_len] = '\0';
WelsEventClose( pSmt->pFinUpdateMbListEvent[iIdx], ename );
#endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
#endif//WIN32
++ iIdx;
}
#ifdef WIN32
if ( pSmt->pExitEncodeEvent != NULL )
{
pMa->WelsFree( pSmt->pExitEncodeEvent, "pExitEncodeEvent" );
pSmt->pExitEncodeEvent = NULL;
}
if ( pSmt->pSliceCodedEvent != NULL )
{
pMa->WelsFree( pSmt->pSliceCodedEvent, "pSliceCodedEvent" );
pSmt->pSliceCodedEvent = NULL;
}
if ( pSmt->pReadySliceCodingEvent != NULL )
{
pMa->WelsFree( pSmt->pReadySliceCodingEvent, "pReadySliceCodingEvent" );
pSmt->pReadySliceCodingEvent = NULL;
}
if ( pSmt->pFinSliceCodingEvent != NULL )
{
pMa->WelsFree( pSmt->pFinSliceCodingEvent, "pFinSliceCodingEvent" );
pSmt->pFinSliceCodingEvent = NULL;
}
#endif//WIN32
#ifdef PACKING_ONE_SLICE_PER_LAYER
if ( NULL != pSmt->pCountBsSizeInPartition )
{
pMa->WelsFree( pSmt->pCountBsSizeInPartition, "pCountBsSizeInPartition" );
pSmt->pCountBsSizeInPartition = NULL;
}
#endif//PACKING_ONE_SLICE_PER_LAYER
WelsMutexDestroy( &pSmt->mutexSliceNumUpdate );
if ( pSmt->pThreadPEncCtx != NULL )
{
pMa->WelsFree( pSmt->pThreadPEncCtx, "pThreadPEncCtx" );
pSmt->pThreadPEncCtx = NULL;
}
if ( pSmt->pThreadHandles != NULL )
{
pMa->WelsFree( pSmt->pThreadHandles, "pThreadHandles" );
pSmt->pThreadHandles = NULL;
}
pSliceB = (*ppCtx)->pSliceBs;
iIdx = 0;
while ( pSliceB != NULL && iIdx < uiSliceNum )
{
if ( pSliceB->pBsBuffer )
{
pMa->WelsFree( pSliceB->pBsBuffer, "pSliceB->pBsBuffer" );
pSliceB->pBsBuffer = NULL;
pSliceB->uiSize = 0;
}
++ iIdx;
++ pSliceB;
}
if ( (*ppCtx)->pSliceBs != NULL )
{
pMa->WelsFree( (*ppCtx)->pSliceBs, "pSliceBs" );
(*ppCtx)->pSliceBs = NULL;
}
#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
if ( pSmt->pSliceConsumeTime != NULL )
{
iIdx = 0;
while (iIdx < pCodingParam->iNumDependencyLayer)
{
if ( pSmt->pSliceConsumeTime[iIdx] )
{
pMa->WelsFree( pSmt->pSliceConsumeTime[iIdx], "pSliceConsumeTime[]" );
pSmt->pSliceConsumeTime[iIdx] = NULL;
}
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
if ( pSmt->pSliceComplexRatio[iIdx] != NULL )
{
pMa->WelsFree( pSmt->pSliceComplexRatio[iIdx], "pSliceComplexRatio[]" );
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
#endif//TRY_SLICING_BALANCE
++ iIdx;
}
}
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef WIN32
if ( pSmt->pUpdateMbListEvent != NULL )
{
pMa->WelsFree( pSmt->pUpdateMbListEvent, "pUpdateMbListEvent" );
pSmt->pUpdateMbListEvent = NULL;
}
if ( pSmt->pFinUpdateMbListEvent != NULL )
{
pMa->WelsFree( pSmt->pFinUpdateMbListEvent, "pFinUpdateMbListEvent" );
pSmt->pFinUpdateMbListEvent = NULL;
}
#else
if ( pSmt->pUpdateMbListThrdHandles )
{
pMa->WelsFree( pSmt->pUpdateMbListThrdHandles, "pUpdateMbListThrdHandles" );
pSmt->pUpdateMbListThrdHandles = NULL;
}
#endif//WIN32
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef MT_DEBUG
// file handle for debug
if ( pSmt->pFSliceDiff )
{
fclose( pSmt->pFSliceDiff );
pSmt->pFSliceDiff = NULL;
}
#endif//MT_DEBUG
pMa->WelsFree((*ppCtx)->pSliceThreading, "SSliceThreading");
(*ppCtx)->pSliceThreading = NULL;
}
int32_t AppendSliceToFrameBs( sWelsEncCtx *pCtx, SLayerBSInfo *pLbi, const int32_t iSliceCount )
{
SWelsSvcCodingParam *pCodingParam = pCtx->pSvcParam;
SDLayerParam *pDlp = &pCodingParam->sDependencyLayers[pCtx->uiDependencyId];
SWelsSliceBs *pSliceBs = NULL;
const BOOL_T kbIsDynamicSlicingMode = (pDlp->sMso.uiSliceMode == SM_DYN_SLICE);
int32_t iLayerSize = 0;
int32_t iNalIdxBase = pLbi->iNalCount;
int32_t iSliceIdx = 0;
if ( !kbIsDynamicSlicingMode )
{
pSliceBs = &pCtx->pSliceBs[0];
iLayerSize = pSliceBs->uiBsPos; // assign with base pSlice first
iSliceIdx = 1; // pSlice 0 bs has been written to pFrameBs yet by now, so uiSliceIdx base should be 1
while (iSliceIdx < iSliceCount)
{
++ pSliceBs;
if ( pSliceBs != NULL && pSliceBs->uiBsPos > 0 )
{
int32_t iNalIdx = 0;
const int32_t iCountNal = pSliceBs->iNalIndex;
#if MT_DEBUG_BS_WR
assert(pSliceBs->bSliceCodedFlag);
#endif//MT_DEBUG_BS_WR
memmove(pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
iLayerSize += pSliceBs->uiBsPos;
while (iNalIdx < iCountNal)
{
pLbi->iNalLengthInByte[iNalIdxBase+iNalIdx] = pSliceBs->iNalLen[iNalIdx];
++ iNalIdx;
}
pLbi->iNalCount += iCountNal;
iNalIdxBase += iCountNal;
}
++ iSliceIdx;
}
}
else // for SM_DYN_SLICE
{
const int32_t kiPartitionCnt = iSliceCount;
int32_t iPartitionIdx = 0;
// due partition_0 has been written to pFrameBsBuffer
// so iLayerSize need add it
while ( iPartitionIdx < kiPartitionCnt )
{
const int32_t kiCountSlicesCoded = pCtx->pCurDqLayer->pNumSliceCodedOfPartition[iPartitionIdx];
int32_t iIdx = 0;
iSliceIdx = iPartitionIdx;
while(iIdx < kiCountSlicesCoded)
{
pSliceBs = &pCtx->pSliceBs[iSliceIdx];
if ( pSliceBs != NULL && pSliceBs->uiBsPos > 0 )
{
if ( iPartitionIdx > 0 )
{
int32_t iNalIdx = 0;
const int32_t iCountNal = pSliceBs->iNalIndex;
memmove(pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
iLayerSize += pSliceBs->uiBsPos;
while (iNalIdx < iCountNal)
{
pLbi->iNalLengthInByte[iNalIdxBase+iNalIdx] = pSliceBs->iNalLen[iNalIdx];
++ iNalIdx;
}
pLbi->iNalCount += iCountNal;
iNalIdxBase += iCountNal;
}
else
{
iLayerSize += pSliceBs->uiBsPos;
}
}
iSliceIdx += kiPartitionCnt;
++ iIdx;
}
++ iPartitionIdx;
}
}
return iLayerSize;
}
int32_t WriteSliceToFrameBs( sWelsEncCtx *pCtx, SLayerBSInfo *pLbi, uint8_t *pFrameBsBuffer, const int32_t iSliceIdx )
{
SWelsSliceBs *pSliceBs = &pCtx->pSliceBs[iSliceIdx];
SNalUnitHeaderExt *pNalHdrExt= &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt;
uint8_t *pDst = pFrameBsBuffer;
int32_t pNalLen[2];
int32_t iSliceSize = 0;
const int32_t kiNalCnt = pSliceBs->iNalIndex;
int32_t iNalIdx = 0;
#if !defined(PACKING_ONE_SLICE_PER_LAYER)
const int32_t iFirstSlice = (iSliceIdx == 0);
int32_t iNalBase = iFirstSlice ? 0 : pLbi->iNalCount;
#else
int32_t iNalBase = 0;
#endif//!PACKING_ONE_SLICE_PER_LAYER
while ( iNalIdx < kiNalCnt ) {
iSliceSize += WelsEncodeNalExt( &pSliceBs->sNalList[iNalIdx], pNalHdrExt, pDst, &pNalLen[iNalIdx] );
pDst += pNalLen[iNalIdx];
pLbi->iNalLengthInByte[iNalBase+iNalIdx] = pNalLen[iNalIdx];
++ iNalIdx;
}
#if !defined(PACKING_ONE_SLICE_PER_LAYER)
pSliceBs->uiBsPos = iSliceSize;
if ( iFirstSlice )
{
// pBsBuffer has been updated at coding_slice_0_in_encoder_mother_thread()
pLbi->uiLayerType = VIDEO_CODING_LAYER;
pLbi->uiSpatialId = pNalHdrExt->uiDependencyId;
pLbi->uiTemporalId = pNalHdrExt->uiTemporalId;
pLbi->uiQualityId = 0;
pLbi->uiPriorityId = 0;
pLbi->iNalCount = kiNalCnt;
}
else
{
pLbi->iNalCount += kiNalCnt;
}
#else
pLbi->uiLayerType = VIDEO_CODING_LAYER;
pLbi->uiSpatialId = pNalHdrExt->uiDependencyId;
pLbi->uiTemporalId = pNalHdrExt->uiTemporalId;
pLbi->uiQualityId = 0;
pLbi->uiPriorityId = 0;
pLbi->iNalCount = kiNalCnt;
#endif//PACKING_ONE_SLICE_PER_LAYER
return iSliceSize;
}
int32_t WriteSliceBs( sWelsEncCtx *pCtx, uint8_t *pSliceBsBuf, const int32_t iSliceIdx )
{
SWelsSliceBs *pSliceBs = &pCtx->pSliceBs[iSliceIdx];
SNalUnitHeaderExt *pNalHdrExt= &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt;
uint8_t *pDst = pSliceBsBuf;
int32_t *pNalLen = &pSliceBs->iNalLen[0];
int32_t iSliceSize = 0;
const int32_t kiNalCnt = pSliceBs->iNalIndex;
int32_t iNalIdx = 0;
assert( kiNalCnt <= 2 );
if ( kiNalCnt > 2 )
return 0;
while ( iNalIdx < kiNalCnt ) {
iSliceSize += WelsEncodeNalExt( &pSliceBs->sNalList[iNalIdx], pNalHdrExt, pDst, &pNalLen[iNalIdx] );
pDst += pNalLen[iNalIdx];
++ iNalIdx;
}
pSliceBs->uiBsPos = iSliceSize;
return iSliceSize;
}
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__)
WELS_THREAD_ROUTINE_TYPE UpdateMbListThreadProc( void *arg )
{
SSliceThreadPrivateData *pPrivateData = (SSliceThreadPrivateData *)arg;
sWelsEncCtx *pEncPEncCtx = NULL;
SDqLayer *pCurDq = NULL;
int32_t iSliceIdx = -1;
int32_t iEventIdx = -1;
WELS_THREAD_ERROR_CODE iWaitRet = WELS_THREAD_ERROR_GENERIAL;
uint32_t uiThrdRet = 0;
if ( NULL == pPrivateData )
WELS_THREAD_ROUTINE_RETURN(1);
pEncPEncCtx = (sWelsEncCtx *)pPrivateData->pWelsPEncCtx;
iSliceIdx = pPrivateData->iSliceIndex;
iEventIdx = pPrivateData->iThreadIndex;
do {
#if defined(ENABLE_TRACE_MT)
WelsLog(pEncPEncCtx, WELS_LOG_INFO, "[MT] UpdateMbListThreadProc(), try to wait (pUpdateMbListEvent[%d])!\n", iEventIdx);
#endif
iWaitRet = WelsEventWait( pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx] );
if ( WELS_THREAD_ERROR_WAIT_OBJECT_0 == iWaitRet )
{
pCurDq = pEncPEncCtx->pCurDqLayer;
UpdateMbListNeighborParallel( pCurDq->pSliceEncCtx, pCurDq->sMbDataP, iSliceIdx );
WelsEventSignal( pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx] ); // mean finished update pMb list for this pSlice
}
else
{
WelsLog(pEncPEncCtx, WELS_LOG_WARNING, "[MT] UpdateMbListThreadProc(), waiting pUpdateMbListEvent[%d] failed(%d) and thread%d terminated!\n", iEventIdx, iWaitRet, iEventIdx);
uiThrdRet = 1;
break;
}
} while(1);
WELS_THREAD_ROUTINE_RETURN(uiThrdRet);
}
#endif//__GNUC__
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
// thread process for coding one pSlice
WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc( void *arg )
{
SSliceThreadPrivateData *pPrivateData = (SSliceThreadPrivateData *)arg;
sWelsEncCtx *pEncPEncCtx = NULL;
SDqLayer *pCurDq = NULL;
SSlice *pSlice = NULL;
SWelsSliceBs *pSliceBs = NULL;
#ifdef WIN32
WELS_EVENT pEventsList[3];
int32_t iEventCount = 0;
#endif
WELS_THREAD_ERROR_CODE iWaitRet = WELS_THREAD_ERROR_GENERIAL;
uint32_t uiThrdRet = 0;
int32_t iSliceSize = 0;
int32_t iSliceIdx = -1;
int32_t iThreadIdx = -1;
int32_t iEventIdx = -1;
bool_t bNeedPrefix = false;
EWelsNalUnitType eNalType = NAL_UNIT_UNSPEC_0;
EWelsNalRefIdc eNalRefIdc = NRI_PRI_LOWEST;
if ( NULL == pPrivateData )
WELS_THREAD_ROUTINE_RETURN(1);
WelsSetThreadCancelable();
pEncPEncCtx = (sWelsEncCtx *)pPrivateData->pWelsPEncCtx;
iThreadIdx = pPrivateData->iThreadIndex;
iEventIdx = iThreadIdx;
#ifdef WIN32
pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx];
pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pExitEncodeEvent[iEventIdx];
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx];
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#endif//WIN32
do {
#ifdef WIN32
iWaitRet = WelsMultipleEventsWaitSingleBlocking( iEventCount,
&pEventsList[0],
(uint32_t)-1 ); // blocking until at least one event is
#else
#if defined(ENABLE_TRACE_MT)
WelsLog(pEncPEncCtx, WELS_LOG_INFO, "[MT] CodingSliceThreadProc(), try to call WelsEventWait(pReadySliceCodingEvent[%d]= 0x%p), pEncPEncCtx= 0x%p!\n", iEventIdx, (void *)(pEncPEncCtx->pReadySliceCodingEvent[iEventIdx]), (void *)pEncPEncCtx );
#endif
iWaitRet = WelsEventWait( pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx] );
#endif//WIN32
if ( WELS_THREAD_ERROR_WAIT_OBJECT_0 == iWaitRet ) // start pSlice coding signal waited
{
SLayerBSInfo *pLbi = pPrivateData->pLayerBs;
const int32_t kiCurDid = pEncPEncCtx->uiDependencyId;
const int32_t kiCurTid = pEncPEncCtx->uiTemporalId;
SWelsSvcCodingParam *pCodingParam = pEncPEncCtx->pSvcParam;
SDLayerParam *pParamD = &pCodingParam->sDependencyLayers[kiCurDid];
pCurDq = pEncPEncCtx->pCurDqLayer;
eNalType = pEncPEncCtx->eNalType;
eNalRefIdc = pEncPEncCtx->eNalPriority;
bNeedPrefix = pEncPEncCtx->bNeedPrefixNalFlag;
if ( pParamD->sMso.uiSliceMode != SM_DYN_SLICE )
{
int64_t iSliceStart = 0;
bool_t bDsaFlag = false;
iSliceIdx = pPrivateData->iSliceIndex;
pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx];
#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
bDsaFlag = (pParamD->sMso.uiSliceMode == SM_FIXEDSLCNUM_SLICE &&
pCodingParam->iMultipleThreadIdc > 1 &&
pCodingParam->iMultipleThreadIdc >= pParamD->sMso.sSliceArgument.iSliceNum);
if ( bDsaFlag )
iSliceStart = WelsTime();
#endif//DYNAMIC_SLICE_ASSIGN || MT_DEBUG
#if !defined(PACKING_ONE_SLICE_PER_LAYER)
pSliceBs->uiBsPos = 0;
#endif//!PACKING_ONE_SLICE_PER_LAYER
pSliceBs->iNalIndex = 0;
assert( (void*)(&pSliceBs->sBsWrite) == (void*)pSlice->pSliceBsa );
InitBits( &pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize );
#if MT_DEBUG_BS_WR
pSliceBs->bSliceCodedFlag = FALSE;
#endif//MT_DEBUG_BS_WR
if ( bNeedPrefix )
{
if ( eNalRefIdc != NRI_PRI_LOWEST )
{
WelsLoadNalForSlice( pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc );
WelsWriteSVCPrefixNal( &pSliceBs->sBsWrite, eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == eNalType) );
WelsUnloadNalForSlice( pSliceBs );
}
else // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension
{
WelsLoadNalForSlice( pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc );
// No need write any syntax of prefix NAL Unit RBSP here
WelsUnloadNalForSlice( pSliceBs );
}
}
WelsLoadNalForSlice( pSliceBs, eNalType, eNalRefIdc );
WelsCodeOneSlice( pEncPEncCtx, iSliceIdx, eNalType );
WelsUnloadNalForSlice( pSliceBs );
#if !defined(PACKING_ONE_SLICE_PER_LAYER)
if ( 0 == iSliceIdx )
{
pLbi->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
iSliceSize = WriteSliceToFrameBs( pEncPEncCtx, pLbi, pLbi->pBsBuf, iSliceIdx );
pEncPEncCtx->iPosBsBuffer += iSliceSize;
}
else
iSliceSize = WriteSliceBs( pEncPEncCtx, pSliceBs->pBs, iSliceIdx );
#else// PACKING_ONE_SLICE_PER_LAYER
if ( 0 == iSliceIdx )
{
pLbi->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
iSliceSize = WriteSliceToFrameBs( pEncPEncCtx, pLbi, pLbi->pBsBuf, iSliceIdx );
pEncPEncCtx->iPosBsBuffer += iSliceSize;
}
else
{
pLbi->pBsBuf = pSliceBs->bs + pSliceBs->uiBsPos;
iSliceSize = WriteSliceToFrameBs( pEncPEncCtx, pLbi, pLbi->pBsBuf, iSliceIdx );
pSliceBs->uiBsPos += iSliceSize;
}
#endif//!PACKING_ONE_SLICE_PER_LAYER
if ( pCurDq->bDeblockingParallelFlag && pSlice->sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc != 1
#if !defined(ENABLE_FRAME_DUMP)
&& ( eNalRefIdc != NRI_PRI_LOWEST ) &&
( pParamD->iHighestTemporalId == 0 || kiCurTid < pParamD->iHighestTemporalId )
#endif// !ENABLE_FRAME_DUMP
)
{
DeblockingFilterSliceAvcbase( pCurDq, pEncPEncCtx->pFuncList, iSliceIdx );
}
#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
if ( bDsaFlag )
{
pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t)(WelsTime() - iSliceStart);
#if defined(ENABLE_TRACE_MT)
WelsLog(pEncPEncCtx, WELS_LOG_INFO, "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, pFirstMbInSlice %d, count_num_mb_in_slice %d\n",
pEncPEncCtx->iCodingIndex, iSliceIdx, pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize, pCurDq->pSliceEncCtx->pFirstMbInSlice[iSliceIdx], pCurDq->pSliceEncCtx->pCountMbNumInSlice[iSliceIdx]);
#endif//ENABLE_TRACE_MT
}
#endif//DYNAMIC_SLICE_ASSIGN || MT_DEBUG
#if defined(SLICE_INFO_OUTPUT)
fprintf( stderr,
"@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n",
iSliceIdx,
(pEncPEncCtx->eSliceType == P_SLICE ? 'P' : 'I'),
eNalRefIdc,
iSliceSize
);
#endif//SLICE_INFO_OUTPUT
#if MT_DEBUG_BS_WR
pSliceBs->bSliceCodedFlag = TRUE;
#endif//MT_DEBUG_BS_WR
#ifdef WIN32
WelsEventSignal( &pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx] ); // mean finished coding current pSlice
#else
WelsEventSignal( pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx] ); // mean finished coding current pSlice
#endif//WIN32
}
else // for SM_DYN_SLICE parallelization
{
#ifdef PACKING_ONE_SLICE_PER_LAYER
SLayerBSInfo *pLbiPacking = NULL;
#endif//PACKING_ONE_SLICE_PER_LAYER
SSliceCtx *pSliceCtx = pCurDq->pSliceEncCtx;
const int32_t kiPartitionId = iThreadIdx;
const int32_t kiSliceIdxStep = pEncPEncCtx->iActiveThreadsNum;
const int32_t kiFirstMbInPartition = pPrivateData->iStartMbIndex; // inclusive
const int32_t kiEndMbInPartition = pPrivateData->iEndMbIndex; // exclusive
int32_t iAnyMbLeftInPartition = kiEndMbInPartition - kiFirstMbInPartition;
iSliceIdx = pPrivateData->iSliceIndex;
pSliceCtx->pFirstMbInSlice[iSliceIdx] = kiFirstMbInPartition;
pCurDq->pNumSliceCodedOfPartition[kiPartitionId] = 1; // one pSlice per partition intialized, dynamic slicing inside
pCurDq->pLastMbIdxOfPartition[kiPartitionId] = kiEndMbInPartition-1;
pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId] = 0;
while( iAnyMbLeftInPartition > 0 )
{
if ( iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint )
{
// TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage
// No idea about its solution due MAX_SLICES_NUM is fixed lenght in relevent pData structure
uiThrdRet = 1;
break;
}
pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx];
#if !defined(PACKING_ONE_SLICE_PER_LAYER)
pSliceBs->uiBsPos = 0;
#endif//!PACKING_ONE_SLICE_PER_LAYER
pSliceBs->iNalIndex = 0;
InitBits( &pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize );
if ( bNeedPrefix )
{
if ( eNalRefIdc != NRI_PRI_LOWEST )
{
WelsLoadNalForSlice( pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc );
WelsWriteSVCPrefixNal( &pSliceBs->sBsWrite, eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == eNalType) );
WelsUnloadNalForSlice( pSliceBs );
}
else // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension
{
WelsLoadNalForSlice( pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc );
// No need write any syntax of prefix NAL Unit RBSP here
WelsUnloadNalForSlice( pSliceBs );
}
}
WelsLoadNalForSlice( pSliceBs, eNalType, eNalRefIdc );
WelsCodeOneSlice( pEncPEncCtx, iSliceIdx, eNalType );
WelsUnloadNalForSlice( pSliceBs );
#if !defined(PACKING_ONE_SLICE_PER_LAYER)
if ( 0 == kiPartitionId )
{
if ( 0 == iSliceIdx )
pLbi->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
iSliceSize = WriteSliceToFrameBs( pEncPEncCtx, pLbi, pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer, iSliceIdx );
pEncPEncCtx->iPosBsBuffer += iSliceSize;
}
else
iSliceSize = WriteSliceBs( pEncPEncCtx, pSliceBs->pBs, iSliceIdx );
#else// PACKING_ONE_SLICE_PER_LAYER
pLbiPacking = pLbi + (iSliceIdx - kiPartitionId);
if ( 0 == kiPartitionId )
{
pLbiPacking->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
iSliceSize = WriteSliceToFrameBs( pEncPEncCtx, pLbiPacking, pLbiPacking->pBsBuf, iSliceIdx );
pEncPEncCtx->iPosBsBuffer += iSliceSize;
}
else
{
pLbiPacking->pBsBuf = pSliceBs->bs + pSliceBs->uiBsPos;
iSliceSize = WriteSliceToFrameBs( pEncPEncCtx, pLbiPacking, pLbiPacking->pBsBuf, iSliceIdx );
pSliceBs->uiBsPos += iSliceSize;
}
pEncPEncCtx->pSliceThreading->pCountBsSizeInPartition[kiPartitionId] += iSliceSize;
#endif//!PACKING_ONE_SLICE_PER_LAYER
if ( pCurDq->bDeblockingParallelFlag && pSlice->sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc != 1
#if !defined(ENABLE_FRAME_DUMP)
&& ( eNalRefIdc != NRI_PRI_LOWEST ) &&
( pParamD->iHighestTemporalId == 0 || kiCurTid < pParamD->iHighestTemporalId )
#endif// !ENABLE_FRAME_DUMP
)
{
DeblockingFilterSliceAvcbase( pCurDq, pEncPEncCtx->pFuncList, iSliceIdx );
}
#if defined(SLICE_INFO_OUTPUT)
fprintf( stderr,
"@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n",
iSliceIdx,
(pEncPEncCtx->eSliceType == P_SLICE ? 'P' : 'I'),
eNalRefIdc,
iSliceSize
);
#endif//SLICE_INFO_OUTPUT
#if defined(ENABLE_TRACE_MT)
WelsLog(pEncPEncCtx, WELS_LOG_INFO, "[MT] CodingSliceThreadProc(), coding_idx %d, iPartitionId %d, uiSliceIdx %d, iSliceSize %d, count_mb_slice %d, iEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
pEncPEncCtx->iCodingIndex, kiPartitionId, iSliceIdx, iSliceSize, pCurDq->pSliceEncCtx->pCountMbNumInSlice[iSliceIdx], kiEndMbInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
#endif//ENABLE_TRACE_MT
iAnyMbLeftInPartition = kiEndMbInPartition - (1 + pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
iSliceIdx += kiSliceIdxStep;
}
if ( uiThrdRet ) // any exception??
break;
#ifdef WIN32
WelsEventSignal( &pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx] ); // mean finished coding current pSlice
#else
WelsEventSignal( pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx] ); // mean finished coding current pSlice
#endif//WIN32
}
}
#ifdef WIN32
else if ( WELS_THREAD_ERROR_WAIT_OBJECT_0+1 == iWaitRet ) // exit thread signal
{
uiThrdRet = 0;
break;
}
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
else if ( WELS_THREAD_ERROR_WAIT_OBJECT_0+2 == iWaitRet ) // update pMb list singal
{
iSliceIdx = iEventIdx; // pPrivateData->iSliceIndex; old threads can not be terminated, pPrivateData is not correct for applicable
pCurDq = pEncPEncCtx->pCurDqLayer;
UpdateMbListNeighborParallel( pCurDq->pSliceEncCtx, pCurDq->sMbDataP, iSliceIdx );
WelsEventSignal( &pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx] ); // mean finished update pMb list for this pSlice
}
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#endif//WIN32
else // WELS_THREAD_ERROR_WAIT_TIMEOUT, or WELS_THREAD_ERROR_WAIT_FAILED
{
WelsLog(pEncPEncCtx, WELS_LOG_WARNING, "[MT] CodingSliceThreadProc(), waiting pReadySliceCodingEvent[%d] failed(%d) and thread%d terminated!\n", iEventIdx, iWaitRet, iThreadIdx);
uiThrdRet = 1;
break;
}
} while( 1 );
#ifdef WIN32
WelsEventSignal( &pEncPEncCtx->pSliceThreading->pFinSliceCodingEvent[iEventIdx] ); // notify to mother encoding threading
#endif//WIN32
WELS_THREAD_ROUTINE_RETURN(uiThrdRet);
}
int32_t CreateSliceThreads( sWelsEncCtx *pCtx )
{
const int32_t kiThreadCount = pCtx->pSvcParam->iCountThreadsNum;
int32_t iIdx = 0;
#if defined(WIN32) && defined(BIND_CPU_CORES_TO_THREADS)
DWORD dwProcessAffinity;
DWORD dwSystemAffinity;
GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinity, &dwSystemAffinity);
#endif//WIN32 && BIND_CPU_CORES_TO_THREADS
while ( iIdx < kiThreadCount ) {
WelsThreadCreate( &pCtx->pSliceThreading->pThreadHandles[iIdx], CodingSliceThreadProc, &pCtx->pSliceThreading->pThreadPEncCtx[iIdx], 0);
#if defined(WIN32) && defined(BIND_CPU_CORES_TO_THREADS)
if ( dwProcessAffinity > 1 && pCtx->pSliceThreading->pThreadHandles[iIdx] != NULL ) // multiple cores and thread created successfully
{
DWORD dw = 0;
DWORD dwAffinityMask = 1 << iIdx;
if (dwAffinityMask & dwProcessAffinity) // check if cpu is available
{
dw = SetThreadAffinityMask( pCtx->pSliceThreading->pThreadHandles[iIdx], dwAffinityMask ); //1 << iIdx
if ( dw == 0)
{
str_t str[64] = {0};
SNPRINTF(str, 64, "SetThreadAffinityMask iIdx:%d", iIdx);
}
}
}
#endif//WIN32 && BIND_CPU_CORES_TO_THREADS
// We need extra threads for update_mb_list_proc on __GNUC__ like OS (mac/linux)
// due to WelsMultipleEventsWaitSingleBlocking implememtation can not work well
// in case waiting pUpdateMbListEvent and pReadySliceCodingEvent events at the same time
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__)
WelsThreadCreate( &pCtx->pSliceThreading->pUpdateMbListThrdHandles[iIdx], UpdateMbListThreadProc, &pCtx->pSliceThreading->pThreadPEncCtx[iIdx], 0);
#endif//__GNUC__
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
++ iIdx;
}
#if defined(ENABLE_TRACE_MT)
WelsLog(pCtx, WELS_LOG_INFO, "CreateSliceThreads() exit..\n");
#endif
return 0;
}
#ifdef PACKING_ONE_SLICE_PER_LAYER
void ResetCountBsSizeInPartitions( uint32_t *pCountBsSizeList, const int32_t iPartitionCnt )
{
if ( pCountBsSizeList != NULL && iPartitionCnt > 0 )
{
memset(pCountBsSizeList, 0, sizeof(pCountBsSizeList[0]) * iPartitionCnt );
}
}
#endif//PACKING_ONE_SLICE_PER_LAYER
#ifdef WIN32
int32_t FiredSliceThreads( SSliceThreadPrivateData *pPriData, WELS_EVENT *pEventsList, SLayerBSInfo *pLbi, const uint32_t uiNumThreads, SSliceCtx *pSliceCtx, const BOOL_T bIsDynamicSlicingMode )
#else
int32_t FiredSliceThreads( SSliceThreadPrivateData *pPriData, WELS_EVENT **pEventsList, SLayerBSInfo *pLbi, const uint32_t uiNumThreads, SSliceCtx *pSliceCtx, const BOOL_T bIsDynamicSlicingMode )
#endif//WIN32
{
int32_t iEndMbIdx = 0;
int32_t iIdx = 0;
const int32_t kiEventCnt = uiNumThreads;
if ( pPriData == NULL || pLbi == NULL || kiEventCnt <= 0 || pEventsList == NULL )
{
WelsLog( NULL, WELS_LOG_ERROR, "FiredSliceThreads(), fail due pPriData == %p || pLbi == %p || iEventCnt(%d) <= 0 || pEventsList == %p!!\n", (void *)pPriData, (void *)pLbi, uiNumThreads, (void *)pEventsList);
return 1;
}
#if defined(PACKING_ONE_SLICE_PER_LAYER)
////////////////////////////////////////
if ( bIsDynamicSlicingMode )
{
iEndMbIdx = pSliceCtx->iMbNumInFrame;
for (iIdx = kiEventCnt-1; iIdx >= 0; --iIdx)
{
const int32_t kiFirstMbIdx = pSliceCtx->pFirstMbInSlice[iIdx];
pPriData[iIdx].iStartMbIndex = kiFirstMbIdx;
pPriData[iIdx].iEndMbIndex = iEndMbIdx;
iEndMbIdx = kiFirstMbIdx;
}
}
iIdx = 0;
while (iIdx < kiEventCnt) {
pPriData[iIdx].pLayerBs = pLbi;
pPriData[iIdx].iSliceIndex = iIdx;
#ifdef WIN32
if ( pEventsList[iIdx] )
WelsEventSignal( &pEventsList[iIdx] );
#else
WelsEventSignal( pEventsList[iIdx] );
#endif//WIN32
++ pLbi;
++ iIdx;
}
////////////////////////////////////////
#else
////////////////////////////////////////
if ( bIsDynamicSlicingMode )
{
iEndMbIdx = pSliceCtx->iMbNumInFrame;
for (iIdx = kiEventCnt-1; iIdx >= 0; --iIdx)
{
const int32_t iFirstMbIdx = pSliceCtx->pFirstMbInSlice[iIdx];
pPriData[iIdx].iStartMbIndex = iFirstMbIdx;
pPriData[iIdx].iEndMbIndex = iEndMbIdx;
iEndMbIdx = iFirstMbIdx;
}
}
iIdx = 0;
while (iIdx < kiEventCnt) {
pPriData[iIdx].pLayerBs = pLbi;
pPriData[iIdx].iSliceIndex = iIdx;
#ifdef WIN32
if ( pEventsList[iIdx] )
WelsEventSignal( &pEventsList[iIdx] );
#else
WelsEventSignal( pEventsList[iIdx] );
#endif//WIN32
++ iIdx;
}
////////////////////////////////////////
#endif//PACKING_ONE_SLICE_PER_LAYER
return 0;
}
int32_t DynamicDetectCpuCores()
{
WelsLogicalProcessInfo info;
WelsQueryLogicalProcessInfo(&info);
return info.ProcessorCount;
}
#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
int32_t AdjustBaseLayer( sWelsEncCtx *pCtx )
{
SDqLayer *pCurDq = pCtx->ppDqLayerList[0];
int32_t iNeedAdj = 1;
#ifdef MT_DEBUG
int64_t iT0 = WelsTime();
#endif//MT_DEBUG
#ifdef TRY_SLICING_BALANCE
pCtx->pCurDqLayer = pCurDq;
#ifdef NOT_ABSOLUTE_BALANCING
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust( pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->pSliceEncCtx->iSliceNumInFrame );
if ( iNeedAdj )
#endif//NOT_ABSOLUTE_BALANCING
DynamicAdjustSlicing( pCtx,
pCurDq,
pCtx->pSliceThreading->pSliceComplexRatio[0],
0 );
#endif//TRY_SLICING_BALANCE
#ifdef MT_DEBUG
iT0 = WelsTime() - iT0;
if ( pCtx->pSliceThreading->pFSliceDiff )
{
fprintf( pCtx->pSliceThreading->pFSliceDiff,
#ifdef WIN32
"%6I64d us adjust time at base spatial layer, iNeedAdj %d, DynamicAdjustSlicing()\n",
#else
"%6lld us adjust time at base spatial layer, iNeedAdj %d, DynamicAdjustSlicing()\n",
#endif//WIN32
iT0, iNeedAdj );
}
#endif//MT_DEBUG
return iNeedAdj;
}
int32_t AdjustEnhanceLayer( sWelsEncCtx *pCtx, int32_t iCurDid )
{
#ifdef MT_DEBUG
int64_t iT1 = WelsTime();
#endif//MT_DEBUG
int32_t iNeedAdj = 1;
// uiSliceMode of referencing spatial should be SM_FIXEDSLCNUM_SLICE
// if using spatial base layer for complexity estimation
const BOOL_T kbModelingFromSpatial = (pCtx->pCurDqLayer->pRefLayer != NULL && iCurDid > 0)
&& (pCtx->pSvcParam->sDependencyLayers[iCurDid-1].sMso.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sDependencyLayers[iCurDid-1].sMso.sSliceArgument.iSliceNum);
if ( kbModelingFromSpatial ) // using spatial base layer for complexity estimation
{
#ifdef TRY_SLICING_BALANCE
#ifdef NOT_ABSOLUTE_BALANCING
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust( pCtx->pSliceThreading->pSliceConsumeTime[iCurDid-1], pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame );
if ( iNeedAdj )
#endif//NOT_ABSOLUTE_BALANCING
DynamicAdjustSlicing( pCtx,
pCtx->pCurDqLayer,
pCtx->pSliceThreading->pSliceComplexRatio[iCurDid-1],
iCurDid
);
#endif//TRY_SLICING_BALANCE
}
else // use temporal layer for complexity estimation
{
#ifdef TRY_SLICING_BALANCE
#ifdef NOT_ABSOLUTE_BALANCING
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust( pCtx->pSliceThreading->pSliceConsumeTime[iCurDid], pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame );
if ( iNeedAdj )
#endif//NOT_ABSOLUTE_BALANCING
DynamicAdjustSlicing( pCtx,
pCtx->pCurDqLayer,
pCtx->pSliceThreading->pSliceComplexRatio[iCurDid],
iCurDid
);
#endif//TRY_SLICING_BALANCE
}
#ifdef MT_DEBUG
iT1 = WelsTime() - iT1;
if ( pCtx->pSliceThreading->pFSliceDiff )
{
fprintf( pCtx->pSliceThreading->pFSliceDiff,
#ifdef WIN32
"%6I64d us adjust time at spatial layer %d, iNeedAdj %d, DynamicAdjustSlicing()\n",
#else
"%6lld us adjust time at spatial layer %d, iNeedAdj %d, DynamicAdjustSlicing()\n",
#endif//WIN32
iT1, iCurDid, iNeedAdj );
}
#endif//MT_DEBUG
return iNeedAdj;
}
#endif//#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
#if defined(MT_ENABLED)
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE) && defined(MT_DEBUG)
void TrackSliceComplexities( sWelsEncCtx *pCtx, const int32_t iCurDid )
{
const int32_t kiCountSliceNum = pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame;
if ( kiCountSliceNum > 0 )
{
int32_t iSliceIdx = 0;
do {
fprintf( pCtx->pSliceThreading->pFSliceDiff, "%6.3f complexity pRatio at iDid %d pSlice %d\n", pCtx->pSliceThreading->pSliceComplexRatio[iCurDid][iSliceIdx], iCurDid, iSliceIdx );
++ iSliceIdx;
} while(iSliceIdx < kiCountSliceNum);
}
}
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(DYNAMIC_SLICE_ASSIGN) && defined(MT_DEBUG)
void TrackSliceConsumeTime( sWelsEncCtx *pCtx, int32_t *pDidList, const int32_t iSpatialNum )
{
SWelsSvcCodingParam *pPara = NULL;
int32_t iSpatialIdx = 0;
if ( iSpatialNum > MAX_DEPENDENCY_LAYER )
return;
pPara = pCtx->pSvcParam;
while ( iSpatialIdx < iSpatialNum )
{
const int32_t kiDid = pDidList[iSpatialIdx];
SDLayerParam *pDlp = &pPara->sDependencyLayers[kiDid];
SMulSliceOption *pMso = &pDlp->sMso;
SDqLayer *pCurDq = pCtx->ppDqLayerList[kiDid];
SSliceCtx *pSliceCtx= pCurDq->pSliceEncCtx;
const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame;
if(pCtx->pSliceThreading)
{
if ( pCtx->pSliceThreading->pFSliceDiff && pMso->uiSliceMode == SM_FIXEDSLCNUM_SLICE && pPara->iMultipleThreadIdc > 1 && pPara->iMultipleThreadIdc >= kuiCountSliceNum )
{
uint32_t i = 0;
uint32_t uiMaxT = 0;
int32_t iMaxI = 0;
while (i < kuiCountSliceNum) {
if ( pCtx->pSliceThreading->pSliceConsumeTime[kiDid] != NULL )
fprintf( pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n",
pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i], pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i] > uiMaxT)
{
uiMaxT = pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i];
iMaxI = i;
}
++ i;
}
fprintf( pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time_max coding_idx %d iDid %d pSlice %d\n", uiMaxT, pCtx->iCodingIndex, kiDid, iMaxI /*/ 1000*/);
}
}
++ iSpatialIdx;
}
}
#endif//#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#endif//MT_ENABLED
}
#endif//MT_ENABLED