ref: d2e66deb66e8c524dbe5a22f9d19c8b07afdc9b6
parent: beacba76e33841be0b26013a735081a8b8f54407
parent: e508c86daca3c0b97b7603b6ba61ddeb554c8ff7
author: huili2 <huili2@cisco.com>
date: Fri Nov 13 02:38:06 EST 2015
Merge pull request #2227 from sijchen/thp92 [Encoder] fix the missing loadbalancing part
--- a/codec/encoder/core/inc/svc_enc_slice_segment.h
+++ b/codec/encoder/core/inc/svc_enc_slice_segment.h
@@ -89,6 +89,8 @@
int32_t* pCountMbNumInSlice; /* count number of MBs in every slice respectively; */
uint32_t uiSliceSizeConstraint; /* in byte */
int32_t iMaxSliceNumConstraint; /* maximal number of slices constraint */
+
+uint32_t* pSliceConsumeTime;
} SSliceCtx;
--- a/codec/encoder/core/inc/wels_task_encoder.h
+++ b/codec/encoder/core/inc/wels_task_encoder.h
@@ -90,8 +90,8 @@
class CWelsLoadBalancingSlicingEncodingTask : public CWelsSliceEncodingTask {
public:
- CWelsLoadBalancingSlicingEncodingTask (sWelsEncCtx* pCtx, const int32_t iSliceIdx);
- ~CWelsLoadBalancingSlicingEncodingTask();
+ CWelsLoadBalancingSlicingEncodingTask(sWelsEncCtx* pCtx, const int32_t iSliceIdx) : CWelsSliceEncodingTask (pCtx, iSliceIdx) {
+ };
virtual WelsErrorType InitTask();
virtual void FinishTask();
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -4839,6 +4839,17 @@
pMA->WelsFree (pCurLayer->pSliceEncCtx->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
pCurLayer->pSliceEncCtx->pCountMbNumInSlice = pCountMbNumInSlice;
+ uint32_t* pSliceConsumeTime = (uint32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (uint32_t),
+ "pSliceSeg->pSliceConsumeTime");
+ if (NULL == pSliceConsumeTime) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+ "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSliceConsumeTime not successful");
+ return ENC_RETURN_MEMALLOCERR;
+ }
+ memcpy (pSliceConsumeTime, pCurLayer->pSliceEncCtx->pSliceConsumeTime, sizeof (int32_t) * iMaxSliceNumOld);
+ pMA->WelsFree (pCurLayer->pSliceEncCtx->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
+ pCurLayer->pSliceEncCtx->pSliceConsumeTime = pSliceConsumeTime;
+
//deal with rate control variables
const int32_t kiCurDid = pCtx->uiDependencyId;
SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -65,7 +65,7 @@
#include "wels_task_management.h"
#if defined(ENABLE_TRACE_MT)
-#define MT_TRACE_LOG(x, ...) WelsLog(x, __VA_ARGS__)
+#define MT_TRACE_LOG(pLog, x, ...) WelsLog(pLog, x, __VA_ARGS__)
#else
#define MT_TRACE_LOG(x, ...)
#endif
@@ -812,7 +812,7 @@
if (bDsaFlag) {
pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) (
WelsTime() - iSliceStart);
- MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO,
+ MT_TRACE_LOG (&(pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
"[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, pFirstMbInSlice %d, count_num_mb_in_slice %d",
pEncPEncCtx->iCodingIndex, iSliceIdx,
pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize,
@@ -1060,7 +1060,7 @@
#endif//MT_DEBUG
pCtx->pCurDqLayer = pCurDq;
-
+ memcpy((pCtx->pSliceThreading->pSliceConsumeTime[0]), (pCurDq->pSliceEncCtx->pSliceConsumeTime), pCurDq->pSliceEncCtx->iSliceNumInFrame*sizeof(uint32_t));
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->pSliceEncCtx->iSliceNumInFrame);
if (iNeedAdj)
@@ -1092,6 +1092,7 @@
&& (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
&& pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid -
1].sSliceArgument.uiSliceNum);
+ memcpy((pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]), (pCtx->pCurDqLayer->pSliceEncCtx->pSliceConsumeTime), pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame*sizeof(uint32_t));
if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
// do not need adjust due to not different at both slices of consumed time
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -382,6 +382,11 @@
pSliceSeg->pCountMbNumInSlice = NULL;
}
+ if (NULL != pSliceSeg->pSliceConsumeTime) {
+ pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
+
+ pSliceSeg->pSliceConsumeTime = NULL;
+ }
// just for safe
pSliceSeg->iSliceNumInFrame = 0;
pSliceSeg->iMbNumInFrame = 0;
@@ -405,6 +410,7 @@
"pSliceSeg->pCountMbNumInSlice");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
+ pSliceSeg->pSliceConsumeTime = NULL;
pSliceSeg->uiSliceMode = uiSliceMode;
pSliceSeg->iMbWidth = kiMbWidth;
pSliceSeg->iMbHeight = kiMbHeight;
@@ -419,7 +425,6 @@
return 1;
pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
-
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap, 0, kiCountMbNum, sizeof(uint16_t));
@@ -426,20 +431,23 @@
//SM_SIZELIMITED_SLICE: init, set pSliceSeg->iSliceNumInFrame = 1;
pSliceSeg->iSliceNumInFrame = GetInitialSliceNum (kiMbWidth, kiMbHeight, pSliceArgument);
-
if (-1 == pSliceSeg->iSliceNumInFrame)
return 1;
pSliceSeg->pCountMbNumInSlice = (int32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (int32_t),
"pSliceSeg->pCountMbNumInSlice");
-
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
pSliceSeg->pFirstMbInSlice = (int32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (int32_t),
"pSliceSeg->pFirstMbInSlice");
-
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pFirstMbInSlice)
+
+ pSliceSeg->pSliceConsumeTime = (uint32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (uint32_t),
+ "pSliceSeg->pSliceConsumeTime");
+ WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pSliceConsumeTime)
+
pSliceSeg->uiSliceMode = pSliceArgument->uiSliceMode;
+
pSliceSeg->iMbWidth = kiMbWidth;
pSliceSeg->iMbHeight = kiMbHeight;
pSliceSeg->iMbNumInFrame = kiCountMbNum;
@@ -487,6 +495,11 @@
pMa->WelsFree (pSliceSeg->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
pSliceSeg->pCountMbNumInSlice = NULL;
+ }
+ if (NULL != pSliceSeg->pSliceConsumeTime) {
+ pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
+
+ pSliceSeg->pSliceConsumeTime = NULL;
}
pSliceSeg->iMbNumInFrame = 0;
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -192,14 +192,6 @@
// CWelsLoadBalancingSlicingEncodingTask
-CWelsLoadBalancingSlicingEncodingTask::CWelsLoadBalancingSlicingEncodingTask (sWelsEncCtx* pCtx,
- const int32_t iSliceIdx) :
- CWelsSliceEncodingTask (pCtx, iSliceIdx) {
-}
-
-CWelsLoadBalancingSlicingEncodingTask::~CWelsLoadBalancingSlicingEncodingTask() {
-}
-
WelsErrorType CWelsLoadBalancingSlicingEncodingTask::InitTask() {
WelsErrorType iReturn = CWelsSliceEncodingTask::InitTask();
if (ENC_RETURN_SUCCESS != iReturn) {
@@ -217,12 +209,12 @@
void CWelsLoadBalancingSlicingEncodingTask::FinishTask() {
CWelsSliceEncodingTask::FinishTask();
- m_pCtx->pSliceThreading->pSliceConsumeTime[m_uiDependencyId][m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
+ m_pCtx->pCurDqLayer->pSliceEncCtx->pSliceConsumeTime[m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
"[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, pFirstMbInSlice %d, count_num_mb_in_slice %d",
m_pCtx->iCodingIndex,
m_iSliceIdx,
- m_pCtx->pSliceThreading->pSliceConsumeTime[m_uiDependencyId][m_iSliceIdx],
+ m_pCtx->pCurDqLayer->pSliceEncCtx->pSliceConsumeTime[m_iSliceIdx],
m_iSliceSize,
m_pCtx->pCurDqLayer->pSliceEncCtx->pFirstMbInSlice[m_iSliceIdx],
m_pCtx->pCurDqLayer->pSliceEncCtx->pCountMbNumInSlice[m_iSliceIdx]);
--- a/codec/encoder/core/src/wels_task_management.cpp
+++ b/codec/encoder/core/src/wels_task_management.cpp
@@ -123,7 +123,8 @@
}
for (int idx = 0; idx < kiTaskCount; idx++) {
- pTask = WELS_NEW_OP (CWelsSliceEncodingTask (pEncCtx, idx), CWelsSliceEncodingTask);
+ pTask = WELS_NEW_OP (CWelsLoadBalancingSlicingEncodingTask (pEncCtx, idx), CWelsLoadBalancingSlicingEncodingTask);
+ //TODO: set this after loadbalancing flagpTask = WELS_NEW_OP (CWelsSliceEncodingTask (pEncCtx, idx), CWelsSliceEncodingTask);
WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pTask)
m_cEncodingTaskList->push_back (pTask);
}
--- a/test/api/encode_decode_api_test.cpp
+++ b/test/api/encode_decode_api_test.cpp
@@ -3921,3 +3921,122 @@
}
}
+
+TEST_F (EncodeDecodeTestAPI, TriggerLoadBalancing) {
+ int iSpatialLayerNum = 1;
+ int iWidth = WelsClip3 ((((rand() % MAX_WIDTH) >> 1) + 1) << 1, (64 << 2), MAX_WIDTH);
+ int iHeight = WelsClip3 ((((rand() % MAX_HEIGHT) >> 1) + 1) << 1, (64 << 2),
+ 2240);//TODO: use MAX_HEIGHT after the limit is removed
+ float fFrameRate = rand() + 0.5f;
+ int iEncFrameNum = WelsClip3 ((rand() % ENCODE_FRAME_NUM) + 1, 1, ENCODE_FRAME_NUM);
+
+ // prepare params
+ SEncParamExt sParam;
+ encoder_->GetDefaultParams (&sParam);
+ prepareParamDefault (iSpatialLayerNum, 1, iWidth, iHeight, fFrameRate, &sParam);
+ sParam.iMultipleThreadIdc = (rand() % 8) + 1;
+ sParam.bSimulcastAVC = 1;
+ sParam.sSpatialLayers[0].iVideoWidth = iWidth;
+ sParam.sSpatialLayers[0].iVideoHeight = iHeight;
+ sParam.sSpatialLayers[0].sSliceArgument.uiSliceMode = SM_FIXEDSLCNUM_SLICE;
+ sParam.sSpatialLayers[0].sSliceArgument.uiSliceNum = sParam.iMultipleThreadIdc;
+
+ int rv = encoder_->InitializeExt (&sParam);
+ ASSERT_TRUE (rv == cmResultSuccess) << "Init Failed sParam: rv = " << rv;;
+
+ unsigned char* pBsBuf[MAX_SPATIAL_LAYER_NUM];
+ ISVCDecoder* decoder[MAX_SPATIAL_LAYER_NUM];
+
+ int iIdx = 0;
+ int aLen[MAX_SPATIAL_LAYER_NUM] = {};
+
+ //create decoder
+ for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+ pBsBuf[iIdx] = static_cast<unsigned char*> (malloc (iWidth * iHeight * 3 * sizeof (unsigned char) / 2));
+ EXPECT_TRUE (pBsBuf[iIdx] != NULL);
+
+ long rv = WelsCreateDecoder (&decoder[iIdx]);
+ ASSERT_EQ (0, rv);
+ EXPECT_TRUE (decoder[iIdx] != NULL);
+
+ SDecodingParam decParam;
+ memset (&decParam, 0, sizeof (SDecodingParam));
+ decParam.eOutputColorFormat = videoFormatI420;
+ decParam.uiTargetDqLayer = UCHAR_MAX;
+ decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+ decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+
+ rv = decoder[iIdx]->Initialize (&decParam);
+ ASSERT_EQ (0, rv);
+ }
+
+ rv = encoder_->SetOption (ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, &sParam);
+ ASSERT_TRUE (rv == cmResultSuccess) << "SetOption Failed pParam: rv = " << rv;
+
+ //begin testing
+ for (int iFrame = 0; iFrame < iEncFrameNum; iFrame++) {
+ int iResult;
+ int iLayerLen = 0;
+ unsigned char* pData[3] = { NULL };
+
+ InitialEncDec (sParam.iPicWidth, sParam.iPicHeight);
+ int frameSize = EncPic.iPicWidth * EncPic.iPicHeight * 3 / 2;
+ memset (buf_.data(), rand() % 256, (frameSize >> 2));
+ memset (buf_.data() + (frameSize >> 2), rand() % 256, (frameSize - (frameSize >> 2)));
+
+ int iStartStrip = 0;
+ //during first half the complex strip is at top, then during the second half it is at bottom
+ if (iFrame > iEncFrameNum / 2) {
+ iStartStrip = EncPic.iPicHeight * EncPic.iPicWidth;
+ }
+ for (int k = 0; k < (EncPic.iPicHeight / 2); k++) {
+ memset (buf_.data() + k * EncPic.iPicWidth + iStartStrip, rand() % 256, (EncPic.iPicWidth));
+ }
+
+ int rv = encoder_->EncodeFrame (&EncPic, &info);
+ ASSERT_TRUE (rv == cmResultSuccess);
+
+ // init
+ for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+ aLen[iIdx] = 0;
+ }
+ for (int iLayer = 0; iLayer < info.iLayerNum; ++iLayer) {
+ iLayerLen = 0;
+ const SLayerBSInfo& layerInfo = info.sLayerInfo[iLayer];
+ for (int iNal = 0; iNal < layerInfo.iNalCount; ++iNal) {
+ iLayerLen += layerInfo.pNalLengthInByte[iNal];
+ }
+
+ iIdx = layerInfo.uiSpatialId;
+ EXPECT_TRUE (iIdx < iSpatialLayerNum) << "iIdx = " << iIdx << ", iSpatialLayerNum = " << iSpatialLayerNum;
+ memcpy ((pBsBuf[iIdx] + aLen[iIdx]), layerInfo.pBsBuf, iLayerLen * sizeof (unsigned char));
+ aLen[iIdx] += iLayerLen;
+ }
+
+ for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+ pData[0] = pData[1] = pData[2] = 0;
+ memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+
+#ifdef DEBUG_FILE_SAVE4
+ fwrite (pBsBuf[iIdx], aLen[iIdx], 1, fEnc[iIdx]);
+#endif
+ iResult = decoder[iIdx]->DecodeFrame2 (pBsBuf[iIdx], aLen[iIdx], pData, &dstBufInfo_);
+ EXPECT_TRUE (iResult == cmResultSuccess) << "iResult=" << iResult << ", LayerIdx=" << iIdx;
+
+ iResult = decoder[iIdx]->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_);
+ EXPECT_TRUE (iResult == cmResultSuccess) << "iResult=" << iResult << ", LayerIdx=" << iIdx;
+ EXPECT_EQ (dstBufInfo_.iBufferStatus, 1) << "LayerIdx=" << iIdx;
+ }
+ }
+
+ for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+ free (pBsBuf[iIdx]);
+
+ if (decoder[iIdx] != NULL) {
+ decoder[iIdx]->Uninitialize();
+ WelsDestroyDecoder (decoder[iIdx]);
+ }
+
+ }
+}
+