shithub: openh264

Download patch

ref: cb19ac71f5c907b32d1ff66e0efa61a5050a79a6
parent: f40452d3cbe241a990c57b59a166736c02a88aee
parent: 0d84eadcf30acffe1d1620d079715441542f3ce5
author: sijchen <sijchen@cisco.com>
date: Tue Mar 28 05:48:47 EDT 2017

Merge pull request #2700 from shihuade/MultiThread_V10.1#1708#1710

Slice buffer reallocate design::PR03::pSlice instead of ppSliceInLayer in slice encoder module

--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -63,9 +63,6 @@
 int32_t         iSliceIndex;    // slice index, zero based
 int32_t         iThreadIndex;   // thread index, zero based
 
-// for dynamic slicing mode
-int32_t         iStartMbIndex;  // inclusive
-int32_t         iEndMbIndex;    // exclusive
 } SSliceThreadPrivateData;
 
 typedef struct TagSliceThreading {
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -79,8 +79,7 @@
 int32_t CreateSliceThreads (sWelsEncCtx* pCtx);
 
 int32_t FiredSliceThreads (sWelsEncCtx* pCtx, SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList,
-                           WELS_EVENT* pMasterEventsList, SFrameBSInfo* pFrameBsInfo,
-                           const uint32_t kuiNumThreads, SSliceCtx* pSliceCtx, const bool kbIsDynamicSlicingMode);
+                           WELS_EVENT* pMasterEventsList, SFrameBSInfo* pFrameBsInfo, const uint32_t kuiNumThreads);
 
 int32_t DynamicDetectCpuCores();
 
@@ -97,7 +96,7 @@
 void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t kiSpatialNum);
 #endif//defined(MT_DEBUG)
 
-void SetOneSliceBsBufferUnderMultithread(sWelsEncCtx* pCtx, const int32_t kiThreadIdx, const int32_t iSliceIdx);
+void SetOneSliceBsBufferUnderMultithread(sWelsEncCtx* pCtx, const int32_t kiThreadIdx, SSlice* pSlice);
 int32_t WriteSliceBs (sWelsEncCtx* pCtx,SWelsSliceBs* pSliceBs,const int32_t iSliceIdx,int32_t& iSliceSize);
 }
 
--- a/codec/encoder/core/inc/svc_enc_frame.h
+++ b/codec/encoder/core/inc/svc_enc_frame.h
@@ -70,6 +70,7 @@
 
 typedef struct TagSliceThreadInfo {
 SSlice*                 pSliceInThread[MAX_THREADS_NUM];// slice buffer for multi thread,
+                                                        // pSliceInThread[0] for single thread
                                                         // will not alloated when multi thread is off
 int32_t                 iMaxSliceNumInThread[MAX_THREADS_NUM];
 int32_t                 iEncodedSliceNumInThread[MAX_THREADS_NUM];
@@ -77,8 +78,6 @@
 
 typedef struct TagLayerInfo {
 SNalUnitHeaderExt       sNalHeaderExt;
-SSlice*                 pSliceInLayer;  // Here SSlice identify to Frame on concept, [iSliceIndex],
-                                        // may need extend list size for sliceMode=SM_SIZELIMITED_SLICE
 SSubsetSps*             pSubsetSpsP;    // current pSubsetSps used, memory alloc in external
 SWelsSPS*               pSpsP;          // current pSps based avc used, memory alloc in external
 SWelsPPS*               pPpsP;          // current pPps used
@@ -117,7 +116,9 @@
 int32_t                 iMaxSliceNum;
 int32_t*                pNumSliceCodedOfPartition;      // for dynamic slicing mode
 int32_t*                pLastCodedMbIdxOfPartition;     // for dynamic slicing mode
-int32_t*                pLastMbIdxOfPartition;          // for dynamic slicing mode
+int32_t*                pFirstMbIdxOfPartition;         // for dynamic slicing mode
+int32_t*                pEndMbIdxOfPartition;           // for dynamic slicing mode
+
 bool                    bNeedAdjustingSlicing;
 
 SFeatureSearchPreparation* pFeatureSearchPreparation;
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -64,8 +64,8 @@
                                  SLayerBSInfo* pLayerBsInfo,
                                  int32_t* pNalIdxInLayer,
                                  int32_t* pLayerSize,
-                                 int32_t iFirstMbInPartition,   // first mb inclusive in partition
-                                 int32_t iEndMbInPartition,     // end mb exclusive in partition
+                                 int32_t iFirstMbIdxInPartition,
+                                 int32_t iEndMbIdxInPartition,
                                  int32_t iStartSliceIdx
                                 );
 
@@ -924,8 +924,6 @@
 
 void FreeSliceInLayer (SDqLayer* pDq, CMemoryAlign* pMa) {
   int32_t iIdx = 0;
-
-  FreeSliceBuffer (pDq->sLayerInfo.pSliceInLayer, pDq->iMaxSliceNum, pMa, "pSliceInLayer");
   for (; iIdx < MAX_THREADS_NUM; iIdx ++) {
     FreeSliceBuffer (pDq->sSliceThreadInfo.pSliceInThread[iIdx],
                      pDq->sSliceThreadInfo.iMaxSliceNumInThread[iIdx],
@@ -954,11 +952,17 @@
     pMa->WelsFree (pDq->pLastCodedMbIdxOfPartition, "pLastCodedMbIdxOfPartition");
     pDq->pLastCodedMbIdxOfPartition = NULL;
   }
-  if (pDq->pLastMbIdxOfPartition) {
-    pMa->WelsFree (pDq->pLastMbIdxOfPartition, "pLastMbIdxOfPartition");
-    pDq->pLastMbIdxOfPartition = NULL;
+
+  if (pDq->pFirstMbIdxOfPartition) {
+    pMa->WelsFree (pDq->pFirstMbIdxOfPartition, "pFirstMbIdxOfPartition");
+    pDq->pFirstMbIdxOfPartition = NULL;
   }
 
+  if (pDq->pEndMbIdxOfPartition) {
+    pMa->WelsFree (pDq->pEndMbIdxOfPartition, "pEndMbIdxOfPartition");
+    pDq->pEndMbIdxOfPartition = NULL;
+  }
+
   if (pDq->pFeatureSearchPreparation) {
     ReleaseFeatureSearchPreparation (pMa, pDq->pFeatureSearchPreparation->pFeatureOfBlock);
     pMa->WelsFree (pDq->pFeatureSearchPreparation, "pFeatureSearchPreparation");
@@ -1075,11 +1079,13 @@
 
       pDqLayer->pNumSliceCodedOfPartition       = (int32_t*)pMa->WelsMallocz (iSize, "pNumSliceCodedOfPartition");
       pDqLayer->pLastCodedMbIdxOfPartition      = (int32_t*)pMa->WelsMallocz (iSize, "pLastCodedMbIdxOfPartition");
-      pDqLayer->pLastMbIdxOfPartition           = (int32_t*)pMa->WelsMallocz (iSize, "pLastMbIdxOfPartition");
+      pDqLayer->pFirstMbIdxOfPartition          = (int32_t*)pMa->WelsMallocz (iSize, "pFirstMbIdxOfPartition");
+      pDqLayer->pEndMbIdxOfPartition            = (int32_t*)pMa->WelsMallocz (iSize, "pEndMbIdxOfPartition");
       WELS_VERIFY_RETURN_PROC_IF (1,
-                                  (NULL == pDqLayer->pNumSliceCodedOfPartition ||
+                                  (NULL == pDqLayer->pNumSliceCodedOfPartition  ||
                                    NULL == pDqLayer->pLastCodedMbIdxOfPartition ||
-                                   NULL == pDqLayer->pLastMbIdxOfPartition),
+                                   NULL == pDqLayer->pFirstMbIdxOfPartition     ||
+                                   NULL == pDqLayer->pEndMbIdxOfPartition),
                                   FreeDqLayer (pDqLayer, pMa))
     }
     pDqLayer->bNeedAdjustingSlicing = false;
@@ -2425,10 +2431,10 @@
 
 void UpdateSlicepEncCtxWithPartition (SDqLayer* pCurDq, int32_t iPartitionNum) {
   SSliceCtx* pSliceCtx                  = &pCurDq->sSliceEncCtx;
-  SSlice** ppSliceInLayer               = pCurDq->ppSliceInLayer;
   const int32_t kiMbNumInFrame          = pSliceCtx->iMbNumInFrame;
   int32_t iCountMbNumPerPartition       = kiMbNumInFrame;
   int32_t iAssignableMbLeft             = kiMbNumInFrame;
+  int32_t iCountMbNumInPartition        = 0;
   int32_t iFirstMbIdx                   = 0;
   int32_t i/*, j*/;
 
@@ -2441,18 +2447,20 @@
   i = 0;
   while (i < iPartitionNum) {
     if (i + 1 == iPartitionNum) {
-      ppSliceInLayer[i]->iCountMbNumInSlice = iAssignableMbLeft;
+      iCountMbNumInPartition = iAssignableMbLeft;
     } else {
-      ppSliceInLayer[i]->iCountMbNumInSlice = iCountMbNumPerPartition;
+      iCountMbNumInPartition = iCountMbNumPerPartition;
     }
-    ppSliceInLayer[i]->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbIdx;
 
+    pCurDq->pFirstMbIdxOfPartition[i] = iFirstMbIdx;
+    pCurDq->pEndMbIdxOfPartition[i]   = iFirstMbIdx + iCountMbNumInPartition - 1;
+
     WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdx, i,
-                               ppSliceInLayer[i]->iCountMbNumInSlice, sizeof (uint16_t));
+                               iCountMbNumInPartition, sizeof (uint16_t));
 
     // for next partition(or pSlice)
-    iFirstMbIdx       += ppSliceInLayer[i]->iCountMbNumInSlice;
-    iAssignableMbLeft -= ppSliceInLayer[i]->iCountMbNumInSlice;
+    iFirstMbIdx       += iCountMbNumInPartition;
+    iAssignableMbLeft -= iCountMbNumInPartition;
     ++ i;
   }
 }
@@ -3692,7 +3700,7 @@
     else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc <= 1)) {
       const int32_t kiLastMbInFrame = pCtx->pCurDqLayer->sSliceEncCtx.iMbNumInFrame;
       pCtx->iEncoderError = WelsCodeOnePicPartition (pCtx, pFbi, pLayerBsInfo, &iNalIdxInLayer, &iLayerSize, 0,
-                            kiLastMbInFrame, 0);
+                            kiLastMbInFrame - 1, 0);
       pLayerBsInfo->eFrameType = eFrameType;
       pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType);
       WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
@@ -3738,15 +3746,24 @@
       else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
         const int32_t kiPartitionCnt = pCtx->iActiveThreadsNum;
 
-        int32_t iEndMbIdx = pCtx->pCurDqLayer->sSliceEncCtx.iMbNumInFrame;
-        for (int32_t iIdx = kiPartitionCnt - 1; iIdx >= 0; --iIdx) {
-          const int32_t iFirstMbIdx         =
-            pCtx->pCurDqLayer->ppSliceInLayer[iIdx]->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-          pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iStartMbIndex      = iFirstMbIdx;
-          pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iEndMbIndex        = iEndMbIdx;
-          iEndMbIdx                         = iFirstMbIdx;
+#if 0 //TODO: temporarily use this to keep old codes for a while, will remove old codes later
+        int32_t iRet = 0;
+        // to fire slice coding threads
+        iRet = FiredSliceThreads (pCtx, &pCtx->pSliceThreading->pThreadPEncCtx[0],
+                                  &pCtx->pSliceThreading->pReadySliceCodingEvent[0],
+                                  &pCtx->pSliceThreading->pThreadMasterEvent[0],
+                                  pFbi, kiPartitionCnt);
+        if (iRet) {
+          WelsLog (pLogCtx, WELS_LOG_ERROR,
+                   "[MT] WelsEncoderEncodeExt(), FiredSliceThreads return(%d) failed and exit encoding frame, iSliceCount= %d, uiSliceMode= %d, iMultipleThreadIdc= %d!!",
+                   iRet, iSliceCount, pParam->sSliceArgument.uiSliceMode, pSvcParam->iMultipleThreadIdc);
+          return ENC_RETURN_UNEXPECTED;
         }
 
+        WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0],
+                                           &pCtx->pSliceThreading->pSliceCodedMasterEvent);
+        WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
+#else
         //TODO: use a function to remove duplicate code here and ln3994
         int32_t iLayerBsIdx       = pCtx->pOut->iLayerBsIndex;
         SLayerBSInfo* pLbi        = &pFbi->sLayerInfo[iLayerBsIdx];
@@ -3762,9 +3779,9 @@
         while (iIdx < kiPartitionCnt) {
           pCtx->pSliceThreading->pThreadPEncCtx[iIdx].pFrameBsInfo = pFbi;
           pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iSliceIndex  = iIdx;
-          SetOneSliceBsBufferUnderMultithread (pCtx, iIdx, iIdx);
           ++ iIdx;
         }
+
         pCtx->pTaskManage->ExecuteTasks();
 
         if (pCtx->iEncoderError) {
@@ -3773,7 +3790,7 @@
                    pParam->sSliceArgument.uiSliceMode, pCtx->iEncoderError);
           return pCtx->iEncoderError;
         }
-
+#endif     
         //TO DO: add update ppSliceInLayer module based on pSliceInThread[ThreadNum]
         // UpdateSliceInLayerInfo(); // reordering
 
@@ -4476,8 +4493,8 @@
                                  SLayerBSInfo* pLayerBsInfo,
                                  int32_t* pNalIdxInLayer,
                                  int32_t* pLayerSize,
-                                 int32_t iFirstMbInPartition,   // first mb inclusive in partition
-                                 int32_t iEndMbInPartition,     // end mb exclusive in partition
+                                 int32_t iFirstMbIdxInPartition,
+                                 int32_t iEndMbIdxInPartition,
                                  int32_t iStartSliceIdx
                                 ) {
 
@@ -4489,7 +4506,7 @@
   const int32_t kiSliceStep             = pCtx->iActiveThreadsNum;
   const int32_t kiPartitionId           = iStartSliceIdx % kiSliceStep;
   int32_t iPartitionBsSize              = 0;
-  int32_t iAnyMbLeftInPartition         = iEndMbInPartition - iFirstMbInPartition;
+  int32_t iAnyMbLeftInPartition         = iEndMbIdxInPartition - iFirstMbIdxInPartition + 1;
   const EWelsNalUnitType keNalType      = pCtx->eNalType;
   const EWelsNalRefIdc keNalRefIdc      = pCtx->eNalPriority;
   const bool kbNeedPrefix               = pCtx->bNeedPrefixNalFlag;
@@ -4498,10 +4515,8 @@
 
   //init
   {
-    pStartSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbInPartition;
-    pCurLayer->pNumSliceCodedOfPartition[kiPartitionId] =
-      1;    // one slice per partition intialized, dynamic slicing inside
-    pCurLayer->pLastMbIdxOfPartition[kiPartitionId]     = iEndMbInPartition - 1;
+    pStartSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbIdxInPartition;
+    pCurLayer->pNumSliceCodedOfPartition[kiPartitionId] = 1;
   }
   pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] = 0;
 
@@ -4559,7 +4574,7 @@
 
     ++ iNalIdxInLayer;
     iSliceIdx += kiSliceStep; //if uiSliceIdx is not continuous
-    iAnyMbLeftInPartition = iEndMbInPartition - (1 + pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId]);
+    iAnyMbLeftInPartition = iEndMbIdxInPartition - pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId];
   }
 
   *pLayerSize           = iPartitionBsSize;
--- a/codec/encoder/core/src/ratectl.cpp
+++ b/codec/encoder/core/src/ratectl.cpp
@@ -1186,10 +1186,8 @@
 }
 
 void WelsRcMbInitGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, SSlice* pSlice) {
-  SSlice** ppSliceInLayer       = pEncCtx->pCurDqLayer->ppSliceInLayer;
   SWelsSvcRc* pWelsSvcRc        = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId];
-  const int32_t kiSliceId       = pSlice->uiSliceIdx;
-  SRCSlicing* pSOverRc          = &ppSliceInLayer[kiSliceId]->sSlicingOverRc;
+  SRCSlicing* pSOverRc          = &pSlice->sSlicingOverRc;
   SDqLayer* pCurLayer           = pEncCtx->pCurDqLayer;
   const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset;
 
@@ -1213,11 +1211,8 @@
 }
 
 void WelsRcMbInfoUpdateGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iCostLuma, SSlice* pSlice) {
-  SSlice** ppSliceInLayer           = pEncCtx->pCurDqLayer->ppSliceInLayer;
   SWelsSvcRc* pWelsSvcRc            = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId];
-  int32_t iSliceId                  = pSlice->uiSliceIdx;
-  SRCSlicing* pSOverRc              = &ppSliceInLayer[iSliceId]->sSlicingOverRc;
-
+  SRCSlicing* pSOverRc              = &pSlice->sSlicingOverRc;
   const int32_t kiComplexityIndex   = pSOverRc->iComplexityIndexSlice;
 
   int32_t iCurMbBits = pEncCtx->pFuncList->pfGetBsPosition (pSlice) - pSOverRc->iBsPosSlice;
--- a/codec/encoder/core/src/ref_list_mgr_svc.cpp
+++ b/codec/encoder/core/src/ref_list_mgr_svc.cpp
@@ -707,9 +707,8 @@
  *  update syntax for reference base related
  */
 void WelsUpdateRefSyntax (sWelsEncCtx* pCtx, const int32_t iPOC, const int32_t uiFrameType) {
-
-  int32_t iAbsDiffPicNumMinus1    = -1;
-  SSpatialLayerInternal* pParamD  = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId];
+  int32_t iAbsDiffPicNumMinus1   = -1;
+  SSpatialLayerInternal* pParamD = &pCtx->pSvcParam->sDependencyLayers[pCtx->uiDependencyId];
   /*syntax for ref_pic_list_reordering()*/
   if (pCtx->iNumRef0 > 0) {
     iAbsDiffPicNumMinus1 = pParamD->iFrameNum - (pCtx->pRefList0[0]->iFrameNum) - 1;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -689,18 +689,16 @@
         SSliceCtx* pSliceCtx                    = &pCurDq->sSliceEncCtx;
         const int32_t kiPartitionId             = iThreadIdx;
         const int32_t kiSliceIdxStep            = pEncPEncCtx->iActiveThreadsNum;
-        const int32_t kiFirstMbInPartition      = pPrivateData->iStartMbIndex;  // inclusive
-        const int32_t kiEndMbInPartition        = pPrivateData->iEndMbIndex;            // exclusive
-        int32_t iAnyMbLeftInPartition           = kiEndMbInPartition - kiFirstMbInPartition;
-        SSpatialLayerInternal* pParamInternal = &pCodingParam->sDependencyLayers[kiCurDid];
+        const int32_t kiFirstMbInPartition      = pCurDq->pFirstMbIdxOfPartition[kiPartitionId];
+        const int32_t kiEndMbIdxInPartition     = pCurDq->pEndMbIdxOfPartition[kiPartitionId];
+        int32_t iAnyMbLeftInPartition           = kiEndMbIdxInPartition - kiFirstMbInPartition + 1;
+        SSpatialLayerInternal *pParamInternal   = &pCodingParam->sDependencyLayers[kiCurDid];
         iSliceIdx = pPrivateData->iSliceIndex;
         SSliceHeaderExt* pStartSliceHeaderExt                   = &pCurDq->ppSliceInLayer[iSliceIdx]->sSliceHeaderExt;
         pStartSliceHeaderExt->sSliceHeader.iFirstMbInSlice      = kiFirstMbInPartition;
-        pCurDq->pNumSliceCodedOfPartition[kiPartitionId]        =
-          1;    // one pSlice per partition intialized, dynamic slicing inside
-        pCurDq->pLastMbIdxOfPartition[kiPartitionId]            = kiEndMbInPartition - 1;
-
+        pCurDq->pNumSliceCodedOfPartition[kiPartitionId]        = 1;
         pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]       = 0;
+
         while (iAnyMbLeftInPartition > 0) {
           if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
             // TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage
@@ -715,12 +713,12 @@
                                           iEventIdx);
           }
 
-          SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, iSliceIdx);
-          pSlice                = pCurDq->ppSliceInLayer[iSliceIdx];
+          pSlice = pCurDq->ppSliceInLayer[iSliceIdx];
           pSliceBs              = &pSlice->sSliceBs;
-
           pSliceBs->uiBsPos     = 0;
           pSliceBs->iNalIndex   = 0;
+
+          SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, pSlice);
           InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize);
 
           if (bNeedPrefix) {
@@ -777,9 +775,9 @@
                         "[MT] CodingSliceThreadProc(), coding_idx %d, iPartitionId %d, uiSliceIdx %d, iSliceSize %d, count_mb_slice %d, iEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
                         pEncPEncCtx->iCodingIndex, kiPartitionId, iSliceIdx, iSliceSize,
                         pCurDq->ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice,
-                        kiEndMbInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+                        kiEndMbIdxInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
 
-          iAnyMbLeftInPartition = kiEndMbInPartition - (1 + pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+          iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId];
           iSliceIdx += kiSliceIdxStep;
         }
 
@@ -838,15 +836,12 @@
 
 int32_t FiredSliceThreads (sWelsEncCtx* pCtx, SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList,
                            WELS_EVENT* pMasterEventsList, SFrameBSInfo* pFrameBsInfo,
-                           const uint32_t uiNumThreads, SSliceCtx* pSliceCtx, const bool bIsDynamicSlicingMode) {
-  int32_t iEndMbIdx         = 0;
+                           const uint32_t uiNumThreads) {
   int32_t iIdx              = 0;
   const int32_t kiEventCnt  = uiNumThreads;
   int32_t iLayerBsIdx       = pCtx->pOut->iLayerBsIndex;
   SLayerBSInfo* pLbi        = &pFrameBsInfo->sLayerInfo[iLayerBsIdx];
-  SSlice** ppSliceInLayer   = pCtx->pCurDqLayer->ppSliceInLayer;
 
-
   if (pPriData == NULL || pFrameBsInfo == NULL || pLbi == NULL || kiEventCnt <= 0 || pEventsList == NULL) {
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
              "FiredSliceThreads(), fail due pPriData == %p ||pFrameBsInfo == %p || pLbi == %p || iEventCnt(%d) <= 0 || pEventsList == %p!!",
@@ -854,17 +849,6 @@
     return 1;
   }
 
-  ////////////////////////////////////////
-  if (bIsDynamicSlicingMode) {
-    iEndMbIdx = pSliceCtx->iMbNumInFrame;
-    for (iIdx = kiEventCnt - 1; iIdx >= 0; --iIdx) {
-      const int32_t iFirstMbIdx         = ppSliceInLayer[iIdx]->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-      pPriData[iIdx].iStartMbIndex      = iFirstMbIdx;
-      pPriData[iIdx].iEndMbIndex        = iEndMbIdx;
-      iEndMbIdx                         = iFirstMbIdx;
-    }
-  }
-
   pLbi->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer;
   pLbi->uiLayerType   = VIDEO_CODING_LAYER;
   pLbi->uiSpatialId   = pCtx->uiDependencyId;
@@ -876,9 +860,10 @@
   while (iIdx < kiEventCnt) {
     pPriData[iIdx].pFrameBsInfo = pFrameBsInfo;
     pPriData[iIdx].iSliceIndex  = iIdx;
-    SetOneSliceBsBufferUnderMultithread (pCtx, iIdx, iIdx);
+
     WelsEventSignal (&pEventsList[iIdx]);
     WelsEventSignal (&pMasterEventsList[iIdx]);
+
     ++ iIdx;
   }
 
@@ -1023,8 +1008,8 @@
 }
 #endif//#if defined(MT_DEBUG)
 
-void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, const int32_t iSliceIdx) {
-  SWelsSliceBs* pSliceBs  = &pCtx->pCurDqLayer->ppSliceInLayer[iSliceIdx]->sSliceBs;
+void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, SSlice* pSlice) {
+  SWelsSliceBs* pSliceBs  = &pSlice->sSliceBs;
   pSliceBs->pBsBuffer     = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
   pSliceBs->uiBsPos       = 0;
 }
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -948,7 +948,7 @@
                               iMaxSliceBufferSize,
                               pMa);
     if (ENC_RETURN_SUCCESS != iRet) {
-        return iRet;
+      return iRet;
     }
 
     iRet = InitSliceMBInfo (pSliceArgument, pSlice,
@@ -1014,14 +1014,14 @@
   int32_t iRet                        = 0;
 
   assert (iThreadNum > 0);
-  iMaxSliceNumInThread = (pCtx->iMaxSliceCount / iThreadNum + 1) * 2;
+  //iMaxSliceNumInThread = (pCtx->iMaxSliceCount / iThreadNum + 1) * 2;
+  iMaxSliceNumInThread = pDqLayer->iMaxSliceNum;
   iMaxSliceNumInThread =  WELS_MIN (pCtx->iMaxSliceCount, (int) iMaxSliceNumInThread);
 
   while (iIdx < iThreadNum) {
     pSliceThreadInfo->iMaxSliceNumInThread[iIdx]     = iMaxSliceNumInThread;
     pSliceThreadInfo->iEncodedSliceNumInThread[iIdx] = 0;
-    pSliceThreadInfo->pSliceInThread[iIdx] = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) *
-        iMaxSliceNumInThread, "pSliceInThread");
+    pSliceThreadInfo->pSliceInThread[iIdx] = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNumInThread, "pSliceInThread");
     if (NULL == pSliceThreadInfo->pSliceInThread[iIdx]) {
       WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
                "CWelsH264SVCEncoder::InitSliceThreadInfo: pSliceThreadInfo->pSliceInThread[iIdx] is NULL");
@@ -1069,25 +1069,8 @@
     return iRet;
   }
 
-  pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer");
-  if (NULL ==  pDqLayer->sLayerInfo.pSliceInLayer) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-             "CWelsH264SVCEncoder::InitSliceInLayer: pDqLayer->sLayerInfo.pSliceInLayeris NULL");
-    return ENC_RETURN_MEMALLOCERR;
-  }
-
-  iRet = InitSliceList (pCtx,
-                        pDqLayer,
-                        pDqLayer->sLayerInfo.pSliceInLayer,
-                        iMaxSliceNum,
-                        kiDlayerIndex,
-                        pMa);
-  if (ENC_RETURN_SUCCESS != iRet) {
-    return iRet;
-  }
-
   for (iSliceIdx = 0; iSliceIdx < iMaxSliceNum; iSliceIdx++) {
-    pDqLayer->ppSliceInLayer[iSliceIdx] = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx];
+    pDqLayer->ppSliceInLayer[iSliceIdx] = pDqLayer->sSliceThreadInfo.pSliceInThread[0] + iSliceIdx;
   }
 
   return ENC_RETURN_SUCCESS;
@@ -1295,7 +1278,7 @@
   int32_t iSliceIdx        = 0;
   const int32_t kiCurDid   = pCtx->uiDependencyId;
 
-  SSlice* pLastCodedSlice        = pCurLayer->sLayerInfo.pSliceInLayer + (iMaxSliceNumOld - 1);
+  SSlice* pLastCodedSlice        = pCurLayer->sSliceThreadInfo.pSliceInThread[0] + (iMaxSliceNumOld - 1);
   SSliceArgument* pSliceArgument = & pCtx->pSvcParam->sSpatialLayers[kiCurDid].sSliceArgument;
   iRet = CalculateNewSliceNum (pCurLayer,
                                pLastCodedSlice,
@@ -1308,7 +1291,7 @@
 
   iRet = ReallocateSliceList (pCtx,
                               pSliceArgument,
-                              pCurLayer->sLayerInfo.pSliceInLayer,
+                              pCurLayer->sSliceThreadInfo.pSliceInThread[0],
                               iMaxSliceNumOld,
                               iMaxSliceNumNew);
   if (ENC_RETURN_SUCCESS != iRet) {
@@ -1325,7 +1308,7 @@
   pCurLayer->ppSliceInLayer = ppSlice;
 
   for (iSliceIdx = 0; iSliceIdx < iMaxSliceNumNew; iSliceIdx++) {
-    pCurLayer->ppSliceInLayer[iSliceIdx] = &pCurLayer->sLayerInfo.pSliceInLayer[iSliceIdx];
+    pCurLayer->ppSliceInLayer[iSliceIdx] = pCurLayer->sSliceThreadInfo.pSliceInThread[0] + iSliceIdx;
   }
 
   if (pCtx->iMaxSliceCount < iMaxSliceNumNew) {
@@ -1332,7 +1315,8 @@
     pCtx->iMaxSliceCount = iMaxSliceNumNew;
   }
 
-  pCurLayer->sSliceEncCtx.iMaxSliceNumConstraint = iMaxSliceNumNew;
+  pCurLayer->sSliceEncCtx.iMaxSliceNumConstraint      = iMaxSliceNumNew;
+  pCurLayer->sSliceThreadInfo.iMaxSliceNumInThread[0] = iMaxSliceNumNew;
   pCurLayer->iMaxSliceNum = iMaxSliceNumNew;
   return ENC_RETURN_SUCCESS;
 }
@@ -1543,11 +1527,11 @@
   int32_t iPosBitOffset = 0;
   const int32_t  kiActiveThreadsNum = pEncCtx->iActiveThreadsNum;
   const int32_t  kiPartitaionId = pCurSlice->uiSliceIdx % kiActiveThreadsNum;
-  const int32_t  kiLastMbIdxInPartition = pEncCtx->pCurDqLayer->pLastMbIdxOfPartition[kiPartitaionId];
+  const int32_t  kiEndMbIdxOfPartition = pEncCtx->pCurDqLayer->pEndMbIdxOfPartition[kiPartitaionId];
 
   const bool    kbCurMbNotFirstMbOfCurSlice      = ((iCurMbIdx > 0) && (pSliceCtx->pOverallMbMap[iCurMbIdx] ==
       pSliceCtx->pOverallMbMap[iCurMbIdx - 1]));
-  const bool    kbCurMbNotLastMbOfCurPartition = iCurMbIdx < kiLastMbIdxInPartition;
+  const bool    kbCurMbNotLastMbOfCurPartition = iCurMbIdx < kiEndMbIdxOfPartition;
 
   if (pCurSlice->bDynamicSlicingSliceSizeCtrlFlag)
     return false;
@@ -1580,7 +1564,7 @@
     //DYNAMIC_SLICING_ONE_THREAD: judge jump_avoiding_pack_exceed
     if (kbSliceNumNotExceedConstraint && kbSliceIdxNotExceedConstraint) {//able to add new pSlice
 
-      AddSliceBoundary (pEncCtx, pCurSlice, pSliceCtx, pCurMb, iCurMbIdx, kiLastMbIdxInPartition);
+      AddSliceBoundary (pEncCtx, pCurSlice, pSliceCtx, pCurMb, iCurMbIdx, kiEndMbIdxOfPartition);
 
       ++ pSliceCtx->iSliceNumInFrame;
 
@@ -1597,7 +1581,7 @@
     if ((kbSliceNumReachConstraint || !kbSliceIdxNotExceedConstraint)
         && kbCurMbNotLastMbOfCurPartition
         && JUMPPACKETSIZE_JUDGE (uiLen, iCurMbIdx,
-                                 pSliceCtx->uiSliceSizeConstraint - ((kiLastMbIdxInPartition - iCurMbIdx) <<
+                                 pSliceCtx->uiSliceSizeConstraint - ((kiEndMbIdxOfPartition - iCurMbIdx) <<
                                      (pCurSlice->uiAssumeLog2BytePerMb) //assume each MB consumes these byte under largest QP
                                                                     ))
        ) {
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -113,14 +113,14 @@
   }
 
   //  InitOneSliceInThread();
-  SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_iSliceIdx);
-
-  m_pSlice = m_pCtx->pCurDqLayer->ppSliceInLayer[m_iSliceIdx];
+  m_pSlice   = m_pCtx->pCurDqLayer->ppSliceInLayer[m_iSliceIdx];
   m_pSliceBs = &m_pSlice->sSliceBs;
 
   m_pSliceBs->uiBsPos       = 0;
   m_pSliceBs->iNalIndex     = 0;
 
+  SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_pSlice);
+
   assert ((void*) (&m_pSliceBs->sBsWrite) == (void*)m_pSlice->pSliceBsa);
   InitBits (&m_pSliceBs->sBsWrite, m_pSliceBs->pBsBuffer, m_pSliceBs->uiSize);
   //printf ("CWelsSliceEncodingTask_InitTask slice %d\n", m_iSliceIdx);
@@ -222,7 +222,7 @@
            m_iSliceIdx,
            m_pSlice->uiSliceConsumeTime,
            m_iSliceSize,
-           m_pCtx->pCurDqLayer->ppSliceInLayer[m_iSliceIdx]->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
+           m_pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
            m_pSlice->iCountMbNumInSlice,
            (m_pSlice->uiSliceConsumeTime + m_iSliceStart));
 }
@@ -240,18 +240,14 @@
 
   //deal with partition: TODO: here SSliceThreadPrivateData is just for parition info and actually has little relationship with threadbuffer, and iThreadIndex is not used in threadpool model, need renaming after removing old logic to avoid confusion
   const int32_t kiPartitionId             = m_iSliceIdx % kiSliceIdxStep;
-  SSliceThreadPrivateData* pPrivateData = & (m_pCtx->pSliceThreading->pThreadPEncCtx[kiPartitionId]);
-  const int32_t kiFirstMbInPartition      = pPrivateData->iStartMbIndex;  // inclusive
-  const int32_t kiEndMbInPartition        = pPrivateData->iEndMbIndex;            // exclusive
+  const int32_t kiFirstMbInPartition      = pCurDq->pFirstMbIdxOfPartition[kiPartitionId];
+  const int32_t kiEndMbIdxInPartition     = pCurDq->pEndMbIdxOfPartition[kiPartitionId];
   pStartSliceHeaderExt->sSliceHeader.iFirstMbInSlice      = kiFirstMbInPartition;
-  pCurDq->pNumSliceCodedOfPartition[kiPartitionId]        =
-    1;    // one pSlice per partition intialized, dynamic slicing inside
-  pCurDq->pLastMbIdxOfPartition[kiPartitionId]            = kiEndMbInPartition - 1;
-
+  pCurDq->pNumSliceCodedOfPartition[kiPartitionId]        = 1;
   pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]       = 0;
   //end of deal with partition
 
-  int32_t iAnyMbLeftInPartition           = kiEndMbInPartition - kiFirstMbInPartition;
+  int32_t iAnyMbLeftInPartition           = kiEndMbIdxInPartition - kiFirstMbInPartition + 1;
   int32_t iLocalSliceIdx = m_iSliceIdx;
   while (iAnyMbLeftInPartition > 0) {
     if (iLocalSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
@@ -262,12 +258,13 @@
       return ENC_RETURN_KNOWN_ISSUE;
     }
 
-    SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, iLocalSliceIdx);
     m_pSlice = pCurDq->ppSliceInLayer[iLocalSliceIdx];
     m_pSliceBs = &m_pSlice->sSliceBs;
 
     m_pSliceBs->uiBsPos     = 0;
     m_pSliceBs->iNalIndex   = 0;
+
+    SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_pSlice);
     InitBits (&m_pSliceBs->sBsWrite, m_pSliceBs->pBsBuffer, m_pSliceBs->uiSize);
 
     if (m_bNeedPrefix) {
@@ -311,9 +308,9 @@
     WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
              "[MT] CWelsConstrainedSizeSlicingEncodingTask(), coding_idx %d, iPartitionId %d, m_iThreadIdx %d, iLocalSliceIdx %d, m_iSliceSize %d, ParamValidationExt(), invalid uiMaxNalSizeiEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
              pParamInternal->iCodingIndex, kiPartitionId, m_iThreadIdx, iLocalSliceIdx, m_iSliceSize,
-             kiEndMbInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+             kiEndMbIdxInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
 
-    iAnyMbLeftInPartition = kiEndMbInPartition - (1 + pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+    iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId];
     iLocalSliceIdx += kiSliceIdxStep;
   }