1 /*!
2 * \copy
3 * Copyright (c) 2010-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file slice_multi_threading.h
33 *
34 * \brief pSlice based multiple threading
35 *
36 * \date 04/16/2010 Created
37 *
38 *************************************************************************************
39 */
40
41
42 #include <assert.h>
43 #if !defined(_WIN32)
44 #include <semaphore.h>
45 #include <unistd.h>
46 #endif//!_WIN32
47 #ifndef SEM_NAME_MAX
48 // length of semaphore name should be system constrained at least on mac 10.7
49 #define SEM_NAME_MAX 32
50 #endif//SEM_NAME_MAX
51 #include "slice_multi_threading.h"
52 #include "mt_defs.h"
53 #include "nal_encap.h"
54 #include "utils.h"
55 #include "encoder.h"
56 #include "svc_encode_slice.h"
57 #include "deblocking.h"
58 #include "svc_enc_golomb.h"
59 #include "crt_util_safe_x.h" // for safe crt like calls
60 #include "rc.h"
61
62 #include "cpu.h"
63
64 #include "measure_time.h"
65 #include "wels_task_management.h"
66
67 #if defined(ENABLE_TRACE_MT)
68 #define MT_TRACE_LOG(pLog, x, ...) WelsLog(pLog, x, __VA_ARGS__)
69 #else
70 #define MT_TRACE_LOG(x, ...)
71 #endif
72
73 namespace WelsEnc {
UpdateMbListNeighborParallel(SDqLayer * pCurDq,SMB * pMbList,const int32_t uiSliceIdc)74 void UpdateMbListNeighborParallel (SDqLayer* pCurDq,
75 SMB* pMbList,
76 const int32_t uiSliceIdc) {
77 SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
78 const int32_t kiMbWidth = pSliceCtx->iMbWidth;
79 int32_t iIdx = pCurDq->pFirstMbIdxOfSlice[uiSliceIdc];
80 const int32_t kiEndMbInSlice = iIdx + pCurDq->pCountMbNumInSlice[uiSliceIdc] - 1;
81
82 do {
83 UpdateMbNeighbor (pCurDq, &pMbList[iIdx], kiMbWidth, uiSliceIdc);
84 ++ iIdx;
85 } while (iIdx <= kiEndMbInSlice);
86 }
87
CalcSliceComplexRatio(SDqLayer * pCurDq)88 void CalcSliceComplexRatio (SDqLayer* pCurDq) {
89 SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
90 SSlice** ppSliceInLayer = pCurDq->ppSliceInLayer;
91 int32_t iSumAv = 0;
92 const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame;
93 int32_t iSliceIdx = 0;
94 int32_t iAvI[MAX_SLICES_NUM];
95
96 assert (kiSliceCount <= MAX_SLICES_NUM);
97 WelsEmms();
98
99 while (iSliceIdx < kiSliceCount) {
100 iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice,
101 ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime);
102 MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d",
103 iSliceIdx,
104 ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime, ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice);
105 iSumAv += iAvI[iSliceIdx];
106
107 ++ iSliceIdx;
108 }
109 while (-- iSliceIdx >= 0) {
110 ppSliceInLayer[iSliceIdx]->iSliceComplexRatio = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv);
111 }
112 }
113
NeedDynamicAdjust(SSlice ** ppSliceInLayer,const int32_t iSliceNum)114 int32_t NeedDynamicAdjust (SSlice** ppSliceInLayer, const int32_t iSliceNum) {
115 if (NULL == ppSliceInLayer) {
116 return false;
117 }
118
119 uint32_t uiTotalConsume = 0;
120 int32_t iSliceIdx = 0;
121 int32_t iNeedAdj = false;
122
123 WelsEmms();
124
125 while (iSliceIdx < iSliceNum) {
126 if (NULL == ppSliceInLayer[iSliceIdx]) {
127 return false;
128 }
129
130 uiTotalConsume += ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime;
131 iSliceIdx ++;
132 }
133 if (uiTotalConsume == 0) {
134 MT_TRACE_LOG (NULL, WELS_LOG_DEBUG,
135 "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d",
136 iSliceNum);
137 return false;
138 }
139
140 iSliceIdx = 0;
141 float fThr = EPSN; // threshold for various cores cases
142 float fRmse = .0f; // root mean square error of pSlice consume ratios
143 const float kfMeanRatio = 1.0f / iSliceNum;
144 do {
145 const float fRatio = 1.0f * ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime / uiTotalConsume;
146 const float fDiffRatio = fRatio - kfMeanRatio;
147 fRmse += (fDiffRatio * fDiffRatio);
148 ++ iSliceIdx;
149 } while (iSliceIdx + 1 < iSliceNum);
150 fRmse = sqrtf (fRmse / iSliceNum);
151 if (iSliceNum >= 8) {
152 fThr += THRESHOLD_RMSE_CORE8;
153 } else if (iSliceNum >= 4) {
154 fThr += THRESHOLD_RMSE_CORE4;
155 } else if (iSliceNum >= 2) {
156 fThr += THRESHOLD_RMSE_CORE2;
157 } else
158 fThr = 1.0f;
159 if (fRmse > fThr)
160 iNeedAdj = true;
161 MT_TRACE_LOG (NULL, WELS_LOG_DEBUG,
162 "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d",
163 iNeedAdj, fRmse, fThr, iSliceNum);
164
165 return iNeedAdj;
166 }
167
DynamicAdjustSlicing(sWelsEncCtx * pCtx,SDqLayer * pCurDqLayer,int32_t iCurDid)168 void DynamicAdjustSlicing (sWelsEncCtx* pCtx,
169 SDqLayer* pCurDqLayer,
170 int32_t iCurDid) {
171 SSliceCtx* pSliceCtx = &pCurDqLayer->sSliceEncCtx;
172 SSlice** ppSliceInLayer = pCurDqLayer->ppSliceInLayer;
173 const int32_t kiCountSliceNum = pSliceCtx->iSliceNumInFrame;
174 const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame;
175 int32_t iMinimalMbNum =
176 pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required
177 int32_t iMaximalMbNum = 0; // dynamically assign later
178 int32_t iMbNumLeft = kiCountNumMb;
179 int32_t iRunLen[MAX_THREADS_NUM] = {0};
180 int32_t iSliceIdx = 0;
181
182 int32_t iNumMbInEachGom = 0;
183 SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid];
184 if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) {
185 iNumMbInEachGom = pWelsSvcRc->iNumberMbGom;
186
187 if (iNumMbInEachGom <= 0) {
188 WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
189 "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d", iNumMbInEachGom,
190 iCurDid, kiCountNumMb);
191 return;
192 }
193
194 // do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment,
195 // extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result.
196 if (iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb) {
197 return;
198 }
199 iMinimalMbNum = iNumMbInEachGom;
200 }
201
202 if (kiCountSliceNum < 2 || (kiCountSliceNum & 0x01)) // we need suppose uiSliceNum is even for multiple threading
203 return;
204
205 iMaximalMbNum = kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum;
206
207 WelsEmms();
208
209 MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d", iCurDid,
210 kiCountNumMb);
211
212 iSliceIdx = 0;
213 while (iSliceIdx + 1 < kiCountSliceNum) {
214 int32_t iNumMbAssigning = WELS_DIV_ROUND (kiCountNumMb * ppSliceInLayer[iSliceIdx]->iSliceComplexRatio, INT_MULTIPLY);
215
216 // GOM boundary aligned
217 if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) {
218 iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom;
219 }
220
221 // make sure one GOM at least in each pSlice for safe
222 if (iNumMbAssigning < iMinimalMbNum)
223 iNumMbAssigning = iMinimalMbNum;
224 else if (iNumMbAssigning > iMaximalMbNum)
225 iNumMbAssigning = iMaximalMbNum;
226
227 assert (iNumMbAssigning > 0);
228
229 iMbNumLeft -= iNumMbAssigning;
230 if (iMbNumLeft <= 0) { // error due to we can not support slice_skip now yet, do not adjust this time
231 assert (0);
232 return;
233 }
234 iRunLen[iSliceIdx] = iNumMbAssigning;
235 MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
236 "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, iSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d",
237 iSliceIdx, ppSliceInLayer[iSliceIdx]->iSliceComplexRatio * 1.0f / INT_MULTIPLY,
238 ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice,
239 iNumMbAssigning);
240 ++ iSliceIdx;
241 iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts
242 }
243 iRunLen[iSliceIdx] = iMbNumLeft;
244 MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
245 "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d",
246 iSliceIdx, ppSliceInLayer[iSliceIdx]->iSliceComplexRatio * 1.0f / INT_MULTIPLY,
247 ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, iMbNumLeft);
248 pCurDqLayer->bNeedAdjustingSlicing = !DynamicAdjustSlicePEncCtxAll (pCurDqLayer, iRunLen);
249 }
250
RequestMtResource(sWelsEncCtx ** ppCtx,SWelsSvcCodingParam * pCodingParam,const int32_t iCountBsLen,const int32_t iMaxSliceBufferSize,bool bDynamicSlice)251 int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
252 const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
253 CMemoryAlign* pMa = NULL;
254 SWelsSvcCodingParam* pPara = NULL;
255 SSliceThreading* pSmt = NULL;
256 int32_t iNumSpatialLayers = 0;
257 int32_t iThreadNum = 0;
258 int32_t iIdx = 0;
259 int32_t iReturn = ENC_RETURN_SUCCESS;
260
261 if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0)
262 return 1;
263 #if defined(ENABLE_TRACE_MT)
264 SLogContext* pLogCtx = & ((*ppCtx)->sLogCtx);
265 #endif
266 pMa = (*ppCtx)->pMemAlign;
267 pPara = pCodingParam;
268 iNumSpatialLayers = pPara->iSpatialLayerNum;
269 iThreadNum = pPara->iMultipleThreadIdc;
270
271 assert (iThreadNum > 0);
272
273 pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading");
274 WELS_VERIFY_RETURN_IF (1, (NULL == pSmt))
275 memset (pSmt, 0, sizeof (SSliceThreading));
276 (*ppCtx)->pSliceThreading = pSmt;
277 pSmt->pThreadPEncCtx = (SSliceThreadPrivateData*)pMa->WelsMalloc (sizeof (SSliceThreadPrivateData) * iThreadNum,
278 "pThreadPEncCtx");
279 WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadPEncCtx))
280
281 #ifdef _WIN32
282 // Dummy event namespace, the windows events don't actually use this
283 WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p", (void*) *ppCtx);
284 #else
285 WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p%x", (void*) *ppCtx, getpid());
286 #endif//!_WIN32
287
288 #ifdef MT_DEBUG
289 // file handle for MT debug
290 pSmt->pFSliceDiff = NULL;
291
292 if (pSmt->pFSliceDiff) {
293 fclose (pSmt->pFSliceDiff);
294 pSmt->pFSliceDiff = NULL;
295 }
296 pSmt->pFSliceDiff = fopen ("slice_time.txt", "wt+");
297 #endif//MT_DEBUG
298
299 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "encpEncCtx= 0x%p", (void*) *ppCtx);
300
301 char name[SEM_NAME_MAX] = {0};
302 WELS_GCC_UNUSED WELS_THREAD_ERROR_CODE err = 0;
303
304 iIdx = 0;
305 while (iIdx < iThreadNum) {
306 pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) *ppCtx;
307 pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
308 pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
309 pSmt->pThreadHandles[iIdx] = 0;
310
311 // length of semaphore name should be system constrained at least on mac 10.7
312 WelsSnprintf (name, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace);
313 err = WelsEventOpen (&pSmt->pUpdateMbListEvent[iIdx], name);
314 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno);
315 WelsSnprintf (name, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace);
316 err = WelsEventOpen (&pSmt->pFinUpdateMbListEvent[iIdx], name);
317 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err,
318 errno);
319 WelsSnprintf (name, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace);
320 err = WelsEventOpen (&pSmt->pSliceCodedEvent[iIdx], name);
321 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedEvent%d named(%s) ret%d err%d", iIdx, name, err, errno);
322 WelsSnprintf (name, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace);
323 err = WelsEventOpen (&pSmt->pReadySliceCodingEvent[iIdx], name);
324 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx,
325 (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno);
326 ++ iIdx;
327 }
328
329 WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
330 err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
331 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
332
333 iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate);
334 WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn))
335
336 (*ppCtx)->pTaskManage = IWelsTaskManage::CreateTaskManage (*ppCtx, iNumSpatialLayers, bDynamicSlice);
337 WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pTaskManage))
338
339 int32_t iThreadBufferNum = WELS_MIN ((*ppCtx)->pTaskManage->GetThreadPoolThreadNum(), MAX_THREADS_NUM);
340
341 for (iIdx = 0; iIdx < iThreadBufferNum; iIdx++) {
342 pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMallocz (iCountBsLen, "pSmt->pThreadBsBuffer");
343 WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]))
344 }
345 iReturn = WelsMutexInit (&pSmt->mutexThreadBsBufferUsage);
346 WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
347
348 iReturn = WelsMutexInit (&pSmt->mutexEvent);
349 WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx));
350
351 iReturn = WelsMutexInit (&pSmt->mutexThreadSlcBuffReallocate);
352 WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
353
354 iReturn = WelsMutexInit (& (*ppCtx)->mutexEncoderError);
355 WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn))
356
357 MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iMultipleThreadIdc= %d",
358 pPara->iMultipleThreadIdc,
359 (*ppCtx)->iMaxSliceCount);
360 return 0;
361 }
362
ReleaseMtResource(sWelsEncCtx ** ppCtx)363 void ReleaseMtResource (sWelsEncCtx** ppCtx) {
364 SSliceThreading* pSmt = NULL;
365 CMemoryAlign* pMa = NULL;
366 int32_t iIdx = 0;
367 int32_t iThreadNum = 0;
368
369 if (NULL == ppCtx || NULL == *ppCtx)
370 return;
371
372 pMa = (*ppCtx)->pMemAlign;
373 iThreadNum = (*ppCtx)->pSvcParam->iMultipleThreadIdc;
374 pSmt = (*ppCtx)->pSliceThreading;
375
376 if (NULL == pSmt)
377 return;
378
379 char ename[SEM_NAME_MAX] = {0};
380 while (iIdx < iThreadNum) {
381 // length of semaphore name should be system constrained at least on mac 10.7
382 WelsSnprintf (ename, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace);
383 WelsEventClose (&pSmt->pSliceCodedEvent[iIdx], ename);
384 WelsSnprintf (ename, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace);
385 WelsEventClose (&pSmt->pReadySliceCodingEvent[iIdx], ename);
386 WelsSnprintf (ename, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace);
387 WelsEventClose (&pSmt->pUpdateMbListEvent[iIdx], ename);
388 WelsSnprintf (ename, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace);
389 WelsEventClose (&pSmt->pFinUpdateMbListEvent[iIdx], ename);
390
391 ++ iIdx;
392 }
393 WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
394 WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename);
395
396 WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
397 WelsMutexDestroy (&pSmt->mutexThreadBsBufferUsage);
398 WelsMutexDestroy (&pSmt->mutexThreadSlcBuffReallocate);
399 WelsMutexDestroy (& ((*ppCtx)->mutexEncoderError));
400 WelsMutexDestroy (&pSmt->mutexEvent);
401 if (pSmt->pThreadPEncCtx != NULL) {
402 pMa->WelsFree (pSmt->pThreadPEncCtx, "pThreadPEncCtx");
403 pSmt->pThreadPEncCtx = NULL;
404 }
405
406 for (int i = 0; i < MAX_THREADS_NUM; i++) {
407 if (pSmt->pThreadBsBuffer[i]) {
408 pMa->WelsFree (pSmt->pThreadBsBuffer[i], "pSmt->pThreadBsBuffer");
409 pSmt->pThreadBsBuffer[i] = NULL;
410 }
411 }
412 memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool));
413
414 if ((*ppCtx)->pTaskManage != NULL) {
415 WELS_DELETE_OP ((*ppCtx)->pTaskManage);
416 }
417
418 #ifdef MT_DEBUG
419 // file handle for debug
420 if (pSmt->pFSliceDiff) {
421 fclose (pSmt->pFSliceDiff);
422 pSmt->pFSliceDiff = NULL;
423 }
424 #endif//MT_DEBUG
425 pMa->WelsFree ((*ppCtx)->pSliceThreading, "SSliceThreading");
426 (*ppCtx)->pSliceThreading = NULL;
427 }
428
AppendSliceToFrameBs(sWelsEncCtx * pCtx,SLayerBSInfo * pLbi,const int32_t iSliceCount)429 int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t iSliceCount) {
430 SSlice** ppSliceInlayer = pCtx->pCurDqLayer->ppSliceInLayer;
431 SWelsSliceBs* pSliceBs = NULL;
432 int32_t iLayerSize = 0;
433 int32_t iNalIdxBase = pLbi->iNalCount;
434 int32_t iSliceIdx = 0;
435
436 iNalIdxBase = pLbi->iNalCount = 0;
437 while (iSliceIdx < iSliceCount) {
438 pSliceBs = &ppSliceInlayer[iSliceIdx]->sSliceBs;
439 if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
440 int32_t iNalIdx = 0;
441 const int32_t iCountNal = pSliceBs->iNalIndex;
442
443 #if MT_DEBUG_BS_WR
444 assert (pSliceBs->bSliceCodedFlag);
445 #endif//MT_DEBUG_BS_WR
446
447 memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
448 pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
449
450 iLayerSize += pSliceBs->uiBsPos;
451
452 while (iNalIdx < iCountNal) {
453 pLbi->pNalLengthInByte[iNalIdxBase + iNalIdx] = pSliceBs->iNalLen[iNalIdx];
454 ++ iNalIdx;
455 }
456 pLbi->iNalCount += iCountNal;
457 iNalIdxBase += iCountNal;
458 }
459 ++ iSliceIdx;
460 }
461
462 return iLayerSize;
463 }
464
WriteSliceBs(sWelsEncCtx * pCtx,SWelsSliceBs * pSliceBs,const int32_t iSliceIdx,int32_t & iSliceSize)465 int32_t WriteSliceBs (sWelsEncCtx* pCtx, SWelsSliceBs* pSliceBs, const int32_t iSliceIdx, int32_t& iSliceSize) {
466 const int32_t kiNalCnt = pSliceBs->iNalIndex;
467 int32_t iNalIdx = 0;
468 int32_t iNalSize = 0;
469 int32_t iReturn = ENC_RETURN_SUCCESS;
470 int32_t iTotalLeftLength = pSliceBs->uiSize - pSliceBs->uiBsPos;
471 SNalUnitHeaderExt* pNalHdrExt = &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt;
472 uint8_t* pDst = pSliceBs->pBs;
473
474 assert (kiNalCnt <= 2);
475 if (kiNalCnt > 2)
476 return 0;
477
478 iSliceSize = 0;
479 while (iNalIdx < kiNalCnt) {
480 iNalSize = 0;
481 iReturn = WelsEncodeNal (&pSliceBs->sNalList[iNalIdx], pNalHdrExt, iTotalLeftLength - iSliceSize,
482 pDst, &iNalSize);
483 WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
484
485 pSliceBs->iNalLen[iNalIdx] = iNalSize;
486 iSliceSize += iNalSize;
487 pDst += iNalSize;
488 ++ iNalIdx;
489 }
490 pSliceBs->uiBsPos = iSliceSize;
491
492 return iReturn;
493 }
494
495 // thread process for coding one pSlice
DynamicDetectCpuCores()496 int32_t DynamicDetectCpuCores() {
497 WelsLogicalProcessInfo info;
498 WelsQueryLogicalProcessInfo (&info);
499 return info.ProcessorCount;
500 }
501
AdjustBaseLayer(sWelsEncCtx * pCtx)502 int32_t AdjustBaseLayer (sWelsEncCtx* pCtx) {
503 SDqLayer* pCurDq = pCtx->ppDqLayerList[0];
504 int32_t iNeedAdj = 1;
505 #ifdef MT_DEBUG
506 int64_t iT0 = WelsTime();
507 #endif//MT_DEBUG
508
509 pCtx->pCurDqLayer = pCurDq;
510
511 // do not need adjust due to not different at both slices of consumed time
512 iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->ppSliceInLayer
513 , pCurDq->sSliceEncCtx.iSliceNumInFrame);
514 if (iNeedAdj)
515 DynamicAdjustSlicing (pCtx,
516 pCurDq,
517 0);
518 #ifdef MT_DEBUG
519 iT0 = WelsTime() - iT0;
520 if (pCtx->pSliceThreading->pFSliceDiff) {
521 fprintf (pCtx->pSliceThreading->pFSliceDiff,
522 "%6" PRId64" us adjust time at base spatial layer, iNeedAdj %d, DynamicAdjustSlicing()\n",
523 iT0, iNeedAdj);
524 }
525 #endif//MT_DEBUG
526
527 return iNeedAdj;
528 }
529
AdjustEnhanceLayer(sWelsEncCtx * pCtx,int32_t iCurDid)530 int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) {
531 #ifdef MT_DEBUG
532 int64_t iT1 = WelsTime();
533 #endif//MT_DEBUG
534 int32_t iNeedAdj = 1;
535 // uiSliceMode of referencing spatial should be SM_FIXEDSLCNUM_SLICE
536 // if using spatial base layer for complexity estimation
537
538 const bool kbModelingFromSpatial = (pCtx->pCurDqLayer->pRefLayer != NULL && iCurDid > 0)
539 && (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
540 && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid -
541 1].sSliceArgument.uiSliceNum);
542
543 if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
544 // do not need adjust due to not different at both slices of consumed time
545 iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->ppSliceInLayer,
546 pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
547 if (iNeedAdj)
548 DynamicAdjustSlicing (pCtx,
549 pCtx->pCurDqLayer,
550 iCurDid
551 );
552 } else { // use temporal layer for complexity estimation
553 // do not need adjust due to not different at both slices of consumed time
554 iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->ppSliceInLayer,
555 pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
556 if (iNeedAdj)
557 DynamicAdjustSlicing (pCtx,
558 pCtx->pCurDqLayer,
559 iCurDid
560 );
561 }
562
563 #ifdef MT_DEBUG
564 iT1 = WelsTime() - iT1;
565 if (pCtx->pSliceThreading->pFSliceDiff) {
566 fprintf (pCtx->pSliceThreading->pFSliceDiff,
567 "%6" PRId64" us adjust time at spatial layer %d, iNeedAdj %d, DynamicAdjustSlicing()\n",
568 iT1, iCurDid, iNeedAdj);
569 }
570 #endif//MT_DEBUG
571
572 return iNeedAdj;
573 }
574
575
576
577 #if defined(MT_DEBUG)
TrackSliceComplexities(sWelsEncCtx * pCtx,const int32_t iCurDid)578 void TrackSliceComplexities (sWelsEncCtx* pCtx, const int32_t iCurDid) {
579 const int32_t kiCountSliceNum = pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame;
580 SSlice** ppSliceInLayer = pCtx->pCurDqLayer->ppSliceInLayer;
581 if (kiCountSliceNum > 0) {
582 int32_t iSliceIdx = 0;
583 do {
584 fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6.3f complexity pRatio at iDid %d pSlice %d\n",
585 ppSliceInLayer[iSliceIdx]->iSliceComplexRatio, iCurDid, iSliceIdx);
586 ++ iSliceIdx;
587 } while (iSliceIdx < kiCountSliceNum);
588 }
589 }
590 #endif
591
592 #if defined(MT_DEBUG)
TrackSliceConsumeTime(sWelsEncCtx * pCtx,int32_t * pDidList,const int32_t iSpatialNum)593 void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t iSpatialNum) {
594 SWelsSvcCodingParam* pPara = NULL;
595 int32_t iSpatialIdx = 0;
596
597 if (iSpatialNum > MAX_DEPENDENCY_LAYER)
598 return;
599
600 pPara = pCtx->pSvcParam;
601 while (iSpatialIdx < iSpatialNum) {
602 const int32_t kiDid = pDidList[iSpatialIdx];
603 SSliceArgument* pSliceArgument = &pPara->sSpatialLayers[kiDid].sSliceArgument;
604 SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid];
605 SSlice** ppSliceInLayer = pCurDq->ppSliceInLayer;
606 SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
607 const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame;
608 if (pCtx->pSliceThreading) {
609 if (pCtx->pSliceThreading->pFSliceDiff
610 && ((pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE) || (pSliceArgument->uiSliceMode == SM_SIZELIMITED_SLICE))
611 && pPara->iMultipleThreadIdc > 1
612 && pPara->iMultipleThreadIdc >= kuiCountSliceNum) {
613 uint32_t i = 0;
614 uint32_t uiMaxT = 0;
615 int32_t iMaxI = 0;
616 while (i < kuiCountSliceNum) {
617 fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n",
618 ppSliceInLayer[i]->uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
619 if (ppSliceInLayer[i]->uiSliceConsumeTime > uiMaxT) {
620 uiMaxT = ppSliceInLayer[i]->uiSliceConsumeTime;
621 iMaxI = i;
622 }
623 ++ i;
624 }
625 fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time_max coding_idx %d iDid %d pSlice %d\n", uiMaxT,
626 pCtx->iCodingIndex, kiDid, iMaxI /*/ 1000*/);
627 }
628 }
629 ++ iSpatialIdx;
630 }
631 }
632 #endif//#if defined(MT_DEBUG)
633
SetOneSliceBsBufferUnderMultithread(sWelsEncCtx * pCtx,const int32_t kiThreadIdx,SSlice * pSlice)634 void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, SSlice* pSlice) {
635 SWelsSliceBs* pSliceBs = &pSlice->sSliceBs;
636 pSliceBs->pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
637 pSliceBs->uiBsPos = 0;
638 }
639 }
640
641