shithub: openh264

Download patch

ref: 159ca4461fbb24c985976c331a222f1c73d91a15
parent: bee0d7d23030ab8c493640bb9fef3d73670f9320
parent: 43ca6db9f516ae6fd69df64ede749eb4f1ae248c
author: huili2 <huili2@cisco.com>
date: Fri Jul 3 07:10:05 EDT 2015

Merge pull request #2021 from sijchen/mt32

[Encoder] put pSliceBsBuffer to thread buffer rather than per slice, so as …

--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -87,6 +87,7 @@
 FILE*                           pFSliceDiff;    // file handle for debug
 #endif//MT_DEBUG
 
+uint8_t*                        pThreadBsBuffer[MAX_THREADS_NUM]; //actual memory for slice buffer
 } SSliceThreading;
 
 #endif//MULTIPLE_THREADING_DEFINES_H__
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -100,6 +100,7 @@
 void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t kiSpatialNum);
 #endif//defined(MT_DEBUG)
 
+void SetOneSliceBsBufferUnderMultithread(sWelsEncCtx* pCtx, const int32_t kiThreadIdx, const int32_t iSliceIdx);
 }
 
 #endif//SVC_SLICE_MULTIPLE_THREADING_H__
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -4061,9 +4061,9 @@
                 // pick up succeeding slice for threading
                 // thread_id equal to iEventId per implementation here
                 pCtx->pSliceThreading->pThreadPEncCtx[iEventId].iSliceIndex = iIndexOfSliceToBeCoded;
+                SetOneSliceBsBufferUnderMultithread(pCtx, iEventId, iIndexOfSliceToBeCoded);
                 WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iEventId]);
                 WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iEventId]);
-
                 ++ iIndexOfSliceToBeCoded;
               } else { // no other slices left for coding
                 -- iNumThreadsRunning;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -289,20 +289,50 @@
   }
 }
 
+int32_t SetMultiSliceBuffer (sWelsEncCtx** ppCtx, CMemoryAlign* pMa, SSliceThreading* pSmt,
+                             int32_t iMaxSliceNum, int32_t iSlice1Len, int32_t iSlice0Len, bool bDynamicSlice) {
+  (*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
+  if (NULL == (*ppCtx)->pSliceBs) {
+    return ENC_RETURN_MEMALLOCERR;
+  }
+  if (iSlice0Len <= 0) {
+    return ENC_RETURN_UNEXPECTED;
+  }
+  //slice 0
+  (*ppCtx)->pSliceBs[0].uiSize = iSlice0Len;
+  (*ppCtx)->pSliceBs[0].pBs    = (*ppCtx)->pFrameBs;
+  (*ppCtx)->pSliceBs[0].uiBsPos = 0;
+  (*ppCtx)->pSliceBs[0].pBsBuffer = pSmt->pThreadBsBuffer[0];
+  if ((iMaxSliceNum == 1) && (!bDynamicSlice)) {
+    return ENC_RETURN_SUCCESS;
+  }
+  //slice >0
+  if (iSlice1Len <= 0) {
+    return ENC_RETURN_UNEXPECTED;
+  }
+  for (int32_t k = 1; k < iMaxSliceNum; k++) {
+    (*ppCtx)->pSliceBs[k].uiSize = iSlice1Len;
+    (*ppCtx)->pSliceBs[k].pBs    = (*ppCtx)->pSliceBs[k - 1].pBs + (*ppCtx)->pSliceBs[k - 1].uiSize;
+  }
+  if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
+    return ENC_RETURN_MEMALLOCERR;
+  }
+  return ENC_RETURN_SUCCESS;
 
+}
+
 int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
                            const int32_t iTargetSpatialBsSize) {
   CMemoryAlign* pMa             = NULL;
   SWelsSvcCodingParam* pPara = NULL;
   SSliceThreading* pSmt         = NULL;
-  SWelsSliceBs* pSliceB         = NULL;
-  uint8_t* pBsBase              = NULL;
   int32_t iNumSpatialLayers     = 0;
   int32_t iThreadNum            = 0;
   int32_t iIdx                  = 0;
-  int32_t iSliceBsBufferSize = 0;
   int16_t iMaxSliceNum          = 1;
   int32_t iReturn = ENC_RETURN_SUCCESS;
+  bool    bDynamicSlice = false;
+  uint32_t uiMaxSliceSizeConstraint = 0;
 
   if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0)
     return 1;
@@ -342,6 +372,13 @@
       pSmt->pSliceConsumeTime[iIdx]     = NULL;
       pSmt->pSliceComplexRatio[iIdx]    = NULL;
     }
+
+    if (pMso->uiSliceMode == SM_DYN_SLICE) {
+      bDynamicSlice = true;
+      if (uiMaxSliceSizeConstraint < pMso->sSliceArgument.uiSliceSizeConstraint) {
+        uiMaxSliceSizeConstraint = pMso->sSliceArgument.uiSliceSizeConstraint;
+      }
+    }
     ++ iIdx;
   }
   // NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
@@ -391,38 +428,26 @@
     MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx,
                   (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno);
 
+
+    pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iTargetSpatialBsSize, "pSmt->pThreadBsBuffer");
+    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]), FreeMemorySvc (ppCtx))
+
     ++ iIdx;
   }
+  for (; iIdx < MAX_THREADS_NUM; iIdx++) {
+    pSmt->pThreadBsBuffer[iIdx] = NULL;
+  }
 
   WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
   err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
   MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
 
-  (*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
-  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc (ppCtx))
+  iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum,
+                                 iTargetSpatialBsSize, //TODO: may use uiMaxSliceSizeConstraint<<1 when bDynamicSlice, but need more twist
+                                 iCountBsLen,
+                                 bDynamicSlice);
+  WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
 
-  pBsBase               = (*ppCtx)->pFrameBs + iCountBsLen;
-  pSliceB               = (*ppCtx)->pSliceBs;
-  iSliceBsBufferSize    = iTargetSpatialBsSize;
-  iIdx = 0;
-  while (iIdx < iMaxSliceNum) {
-    pSliceB->pBsBuffer  = (uint8_t*)pMa->WelsMalloc (iSliceBsBufferSize, "pSliceB->pBsBuffer");
-
-    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSliceB->pBsBuffer), FreeMemorySvc (ppCtx))
-    pSliceB->uiSize = iSliceBsBufferSize;
-
-    if (iIdx > 0) {
-      pSliceB->pBs      = pBsBase;
-      pSliceB->uiBsPos  = 0;
-      pBsBase          += iSliceBsBufferSize;
-    } else {
-      pSliceB->pBs      = NULL;
-      pSliceB->uiBsPos  = 0;
-    }
-    ++ pSliceB;
-    ++ iIdx;
-  }
-
   iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate);
   WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
 
@@ -486,14 +511,19 @@
     pSmt->pThreadPEncCtx = NULL;
   }
 
+  for (int i = 0; i < MAX_THREADS_NUM; i++) {
+    if (pSmt->pThreadBsBuffer[i]) {
+      pMa->WelsFree (pSmt->pThreadBsBuffer[i], "pSmt->pThreadBsBuffer");
+      pSmt->pThreadBsBuffer[i] = NULL;
+    }
+  }
+
   pSliceB = (*ppCtx)->pSliceBs;
   iIdx = 0;
   while (pSliceB != NULL && iIdx < uiSliceNum) {
-    if (pSliceB->pBsBuffer) {
-      pMa->WelsFree (pSliceB->pBsBuffer, "pSliceB->pBsBuffer");
-      pSliceB->pBsBuffer = NULL;
-      pSliceB->uiSize = 0;
-    }
+    pSliceB->pBsBuffer = NULL;
+    pSliceB->uiSize = 0;
+    pSliceB->uiBsPos = 0;
     ++ iIdx;
     ++ pSliceB;
   }
@@ -863,6 +893,7 @@
                                          iEventIdx);
           }
 
+          SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, iSliceIdx);
           pSlice                = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
           pSliceBs              = &pEncPEncCtx->pSliceBs[iSliceIdx];
 
@@ -1028,6 +1059,7 @@
   while (iIdx < kiEventCnt) {
     pPriData[iIdx].pLayerBs = pLbi;
     pPriData[iIdx].iSliceIndex = iIdx;
+    SetOneSliceBsBufferUnderMultithread (pCtx, iIdx, iIdx);
     if (pEventsList[iIdx])
       WelsEventSignal (&pEventsList[iIdx]);
     if (pMasterEventsList[iIdx])
@@ -1178,5 +1210,9 @@
 }
 #endif//#if defined(MT_DEBUG)
 
+void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, const int32_t iSliceIdx) {
+  pCtx->pSliceBs[iSliceIdx].pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
+  pCtx->pSliceBs[iSliceIdx].uiBsPos = 0;
+}
 }