• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2008-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  *  Abstract
33  *      current slice decoding
34  *
35  *  History
36  *      07/10/2008 Created
37  *      08/09/2013 Modified
38  *
39  *****************************************************************************/
40 
41 
42 #include "deblocking.h"
43 
44 #include "decode_slice.h"
45 
46 #include "parse_mb_syn_cavlc.h"
47 #include "parse_mb_syn_cabac.h"
48 #include "rec_mb.h"
49 #include "mv_pred.h"
50 
51 #include "cpu_core.h"
52 
53 namespace WelsDec {
54 
55 extern void FreePicture (PPicture pPic, CMemoryAlign* pMa);
56 
57 extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight);
58 
CheckRefPics(const PWelsDecoderContext & pCtx)59 static bool CheckRefPics (const PWelsDecoderContext& pCtx) {
60   int32_t listCount = 1;
61   if (pCtx->eSliceType == B_SLICE) {
62     ++listCount;
63   }
64   for (int32_t list = LIST_0; list < listCount; ++list) {
65     int32_t shortRefCount = pCtx->sRefPic.uiShortRefCount[list];
66     for (int32_t refIdx = 0; refIdx < shortRefCount; ++refIdx) {
67       if (!pCtx->sRefPic.pShortRefList[list][refIdx]) {
68         return false;
69       }
70     }
71     int32_t longRefCount = pCtx->sRefPic.uiLongRefCount[list];
72     for (int32_t refIdx = 0; refIdx < longRefCount; ++refIdx) {
73       if (!pCtx->sRefPic.pLongRefList[list][refIdx]) {
74         return false;
75       }
76     }
77   }
78   return true;
79 }
80 
WelsTargetSliceConstruction(PWelsDecoderContext pCtx)81 int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
82   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
83   PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
84   PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
85 
86   int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
87 
88   int32_t iCurLayerWidth  = pCurDqLayer->iMbWidth << 4;
89   int32_t iCurLayerHeight = pCurDqLayer->iMbHeight << 4;
90 
91   int32_t iNextMbXyIndex = 0;
92   PFmo pFmo = pCtx->pFmo;
93 
94   int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
95   int32_t iCountNumMb = 0;
96   PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb;
97 
98   if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
99     return ERR_INFO_WIDTH_MISMATCH;
100   }
101 
102   iNextMbXyIndex   = pSliceHeader->iFirstMbInSlice;
103   pCurDqLayer->iMbX  = iNextMbXyIndex % pCurDqLayer->iMbWidth;
104   pCurDqLayer->iMbY  = iNextMbXyIndex / pCurDqLayer->iMbWidth;
105   pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
106 
107   if (0 == iNextMbXyIndex) {
108     pCurDqLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
109     pCurDqLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
110 
111     pCurDqLayer->pDec->uiQualityId = pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
112   }
113 
114   do {
115     if (iCountNumMb >= iTotalNumMb) {
116       break;
117     }
118 
119     if (!pCtx->pParam->bParseOnly) { //for parse only, actual recon MB unnecessary
120       if (WelsTargetMbConstruction (pCtx)) {
121         WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
122                  "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
123                  pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurSlice->eSliceType);
124 
125         return ERR_INFO_MB_RECON_FAIL;
126       }
127     }
128 
129     ++iCountNumMb;
130     if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
131       pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
132       pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
133       ++pCtx->iTotalNumMbRec;
134     }
135 
136     if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
137       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
138                "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
139                pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
140 
141       return ERR_INFO_MB_NUM_EXCEED_FAIL;
142     }
143 
144     if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
145       iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
146     } else {
147       ++iNextMbXyIndex;
148     }
149     if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
150       break;
151     }
152     pCurDqLayer->iMbX  = iNextMbXyIndex % pCurDqLayer->iMbWidth;
153     pCurDqLayer->iMbY  = iNextMbXyIndex / pCurDqLayer->iMbWidth;
154     pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
155   } while (1);
156 
157   pCtx->pDec->iWidthInPixel  = iCurLayerWidth;
158   pCtx->pDec->iHeightInPixel = iCurLayerHeight;
159 
160   if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE) && (pCurSlice->eSliceType != B_SLICE))
161     return ERR_NONE; //no error but just ignore the type unsupported
162 
163   if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on
164     return ERR_NONE;
165 
166   if (1 == pSliceHeader->uiDisableDeblockingFilterIdc
167       || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) {
168     return ERR_NONE;//NO_SUPPORTED_FILTER_IDX
169   } else {
170     WelsDeblockingFilterSlice (pCtx, pDeblockMb);
171   }
172   // any other filter_idc not supported here, 7/22/2010
173 
174   return ERR_NONE;
175 }
176 
WelsMbInterSampleConstruction(PWelsDecoderContext pCtx,PDqLayer pCurDqLayer,uint8_t * pDstY,uint8_t * pDstU,uint8_t * pDstV,int32_t iStrideL,int32_t iStrideC)177 int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer,
178                                        uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
179   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
180   int32_t i, iIndex, iOffset;
181 
182   if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
183     for (i = 0; i < 4; i++) {
184       iIndex = g_kuiMbCountScan4Idx[i << 2];
185       if (pCurDqLayer->pNzc[iMbXy][iIndex] || pCurDqLayer->pNzc[iMbXy][iIndex + 1] || pCurDqLayer->pNzc[iMbXy][iIndex + 4]
186           || pCurDqLayer->pNzc[iMbXy][iIndex + 5]) {
187         iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
188         pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 6));
189       }
190     }
191   } else {
192     // luma.
193     const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
194     int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy];
195     pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc +  0);
196     pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc +  2);
197     pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc +  8);
198     pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10);
199   }
200 
201   const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
202   int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy];
203   // Cb.
204   pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16);
205   // Cr.
206   pCtx->pIdctFourResAddPredFunc (pDstV, iStrideC, pScaledTCoeff + 5 * 64, pNzc + 18);
207 
208   return ERR_NONE;
209 }
WelsMbInterConstruction(PWelsDecoderContext pCtx,PDqLayer pCurDqLayer)210 int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) {
211   int32_t iMbX = pCurDqLayer->iMbX;
212   int32_t iMbY = pCurDqLayer->iMbY;
213   uint8_t*  pDstY, *pDstCb, *pDstCr;
214 
215   int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
216   int32_t iChromaStride = pCtx->pDec->iLinesize[1];
217 
218   pDstY  = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
219   pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
220   pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
221 
222   if (pCtx->eSliceType == P_SLICE) {
223     WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx));
224   } else {
225     if (pCtx->pTempDec == NULL)
226       pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
227     uint8_t*   pTempDstYCbCr[3];
228     uint8_t*   pDstYCbCr[3];
229     pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
230     pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
231     pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
232     pDstYCbCr[0] = pDstY;
233     pDstYCbCr[1] = pDstCb;
234     pDstYCbCr[2] = pDstCr;
235     WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx));
236   }
237   WelsMbInterSampleConstruction (pCtx, pCurDqLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
238 
239   if (GetThreadCount (pCtx) <= 1) {
240     pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
241       pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
242   }
243   return ERR_NONE;
244 }
245 
WelsLumaDcDequantIdct(int16_t * pBlock,int32_t iQp,PWelsDecoderContext pCtx)246 void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx) {
247   const int32_t kiQMul = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[0][iQp][0] : (g_kuiDequantCoeff[iQp][0] << 4);
248 #define STRIDE 16
249   int32_t i;
250   int32_t iTemp[16]; //FIXME check if this is a good idea
251   int16_t* pBlk = pBlock;
252   static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2,  5 * STRIDE};
253   static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE};
254 
255   for (i = 0; i < 4; i++) {
256     const int32_t kiOffset = kiYOffset[i];
257     const int32_t kiX1 = kiOffset + kiXOffset[2];
258     const int32_t kiX2 = STRIDE + kiOffset;
259     const int32_t kiX3 = kiOffset + kiXOffset[3];
260     const int32_t kiI4 = i << 2; // 4*i
261     const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1];
262     const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1];
263     const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3];
264     const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3];
265 
266     iTemp[kiI4]  = kiZ0 + kiZ3;
267     iTemp[1 + kiI4] = kiZ1 + kiZ2;
268     iTemp[2 + kiI4] = kiZ1 - kiZ2;
269     iTemp[3 + kiI4] = kiZ0 - kiZ3;
270   }
271 
272   for (i = 0; i < 4; i++) {
273     const int32_t kiOffset = kiXOffset[i];
274     const int32_t kiI4 = 4 + i;
275     const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4];
276     const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4];
277     const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4];
278     const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4];
279 
280     pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + (1 << 5)) >> 6; //FIXME think about merging this into decode_resdual
281     pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + (1 << 5)) >> 6;
282     pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + (1 << 5)) >> 6;
283     pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + (1 << 5)) >> 6;
284   }
285 #undef STRIDE
286 }
287 
WelsMbIntraPredictionConstruction(PWelsDecoderContext pCtx,PDqLayer pCurDqLayer,bool bOutput)288 int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput) {
289 //seems IPCM should not enter this path
290   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
291 
292   WelsFillRecNeededMbInfo (pCtx, bOutput, pCurDqLayer);
293 
294   if (IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])) {
295     RecI16x16Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer);
296   } else if (IS_INTRA8x8 (pCurDqLayer->pDec->pMbType[iMbXy])) {
297     RecI8x8Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer);
298   } else if (IS_INTRA4x4 (pCurDqLayer->pDec->pMbType[iMbXy])) {
299     RecI4x4Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer);
300   }
301   return ERR_NONE;
302 }
303 
WelsMbInterPrediction(PWelsDecoderContext pCtx,PDqLayer pCurDqLayer)304 int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) {
305   int32_t iMbX = pCurDqLayer->iMbX;
306   int32_t iMbY = pCurDqLayer->iMbY;
307   uint8_t*  pDstY, *pDstCb, *pDstCr;
308 
309   int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
310   int32_t iChromaStride = pCtx->pDec->iLinesize[1];
311 
312   pDstY  = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
313   pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
314   pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
315 
316   if (pCtx->eSliceType == P_SLICE) {
317     WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx));
318   } else {
319     if (pCtx->pTempDec == NULL)
320       pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
321     uint8_t*   pTempDstYCbCr[3];
322     uint8_t*   pDstYCbCr[3];
323     pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
324     pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
325     pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
326     pDstYCbCr[0] = pDstY;
327     pDstYCbCr[1] = pDstCb;
328     pDstYCbCr[2] = pDstCr;
329     WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx));
330   }
331   return ERR_NONE;
332 }
333 
WelsTargetMbConstruction(PWelsDecoderContext pCtx)334 int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
335   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
336   if (MB_TYPE_INTRA_PCM == pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]) {
337     //already decoded and reconstructed when parsing
338     return ERR_NONE;
339   } else if (IS_INTRA (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) {
340     WelsMbIntraPredictionConstruction (pCtx, pCurDqLayer, 1);
341   } else if (IS_INTER (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) { //InterMB
342     if (0 == pCurDqLayer->pCbp[pCurDqLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
343       if (!CheckRefPics (pCtx)) {
344         return ERR_INFO_MB_RECON_FAIL;
345       }
346       return WelsMbInterPrediction (pCtx, pCurDqLayer);
347     } else {
348       WelsMbInterConstruction (pCtx, pCurDqLayer);
349     }
350   } else {
351     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
352              pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]);
353     return ERR_INFO_MB_RECON_FAIL;
354   }
355 
356   return ERR_NONE;
357 }
358 
WelsChromaDcIdct(int16_t * pBlock)359 void WelsChromaDcIdct (int16_t* pBlock) {
360   int32_t iStride = 32;
361   int32_t iXStride = 16;
362   int32_t iStride1 = iXStride + iStride;
363   int16_t* pBlk = pBlock;
364   int32_t iA, iB, iC, iD, iE;
365 
366   iA = pBlk[0];
367   iB = pBlk[iXStride];
368   iC = pBlk[iStride];
369   iD = pBlk[iStride1];
370 
371   iE = iA - iB;
372   iA += iB;
373   iB = iC - iD;
374   iC += iD;
375 
376   pBlk[0] = (iA + iC);
377   pBlk[iXStride] = (iE + iB);
378   pBlk[iStride] = (iA - iC);
379   pBlk[iStride1] = (iE - iB);
380 }
381 
WelsMapNxNNeighToSampleNormal(PWelsNeighAvail pNeighAvail,int32_t * pSampleAvail)382 void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
383   if (pNeighAvail->iLeftAvail) {  //left
384     pSampleAvail[ 6] =
385       pSampleAvail[12] =
386         pSampleAvail[18] =
387           pSampleAvail[24] = 1;
388   }
389   if (pNeighAvail->iLeftTopAvail) { //top_left
390     pSampleAvail[0] = 1;
391   }
392   if (pNeighAvail->iTopAvail) { //top
393     pSampleAvail[1] =
394       pSampleAvail[2] =
395         pSampleAvail[3] =
396           pSampleAvail[4] = 1;
397   }
398   if (pNeighAvail->iRightTopAvail) { //top_right
399     pSampleAvail[5] = 1;
400   }
401 }
402 
WelsMapNxNNeighToSampleConstrain1(PWelsNeighAvail pNeighAvail,int32_t * pSampleAvail)403 void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
404   if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
405     pSampleAvail[ 6] =
406       pSampleAvail[12] =
407         pSampleAvail[18] =
408           pSampleAvail[24] = 1;
409   }
410   if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
411     pSampleAvail[0] = 1;
412   }
413   if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
414     pSampleAvail[1] =
415       pSampleAvail[2] =
416         pSampleAvail[3] =
417           pSampleAvail[4] = 1;
418   }
419   if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
420     pSampleAvail[5] = 1;
421   }
422 }
WelsMap16x16NeighToSampleNormal(PWelsNeighAvail pNeighAvail,uint8_t * pSampleAvail)423 void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
424   if (pNeighAvail->iLeftAvail) {
425     *pSampleAvail = (1 << 2);
426   }
427   if (pNeighAvail->iLeftTopAvail) {
428     *pSampleAvail |= (1 << 1);
429   }
430   if (pNeighAvail->iTopAvail) {
431     *pSampleAvail |= 1;
432   }
433 }
434 
WelsMap16x16NeighToSampleConstrain1(PWelsNeighAvail pNeighAvail,uint8_t * pSampleAvail)435 void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
436   if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
437     *pSampleAvail = (1 << 2);
438   }
439   if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
440     *pSampleAvail |= (1 << 1);
441   }
442   if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
443     *pSampleAvail |= 1;
444   }
445 }
446 
ParseIntra4x4Mode(PWelsDecoderContext pCtx,PWelsNeighAvail pNeighAvail,int8_t * pIntraPredMode,PBitStringAux pBs,PDqLayer pCurDqLayer)447 int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
448                            PBitStringAux pBs,
449                            PDqLayer pCurDqLayer) {
450   int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
451   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
452   int32_t iFinalMode, i;
453 
454   uint8_t uiNeighAvail = 0;
455   uint32_t uiCode;
456   int32_t iCode;
457   pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
458   uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
459   for (i = 0; i < 16; i++) {
460     int32_t iPrevIntra4x4PredMode = 0;
461     if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
462       WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
463       iPrevIntra4x4PredMode = iCode;
464     } else {
465       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
466       iPrevIntra4x4PredMode = uiCode;
467     }
468     const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
469 
470     int8_t iBestMode;
471     if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
472       if (iPrevIntra4x4PredMode == -1)
473         iBestMode = kiPredMode;
474       else
475         iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
476     } else {
477       if (iPrevIntra4x4PredMode) {
478         iBestMode = kiPredMode;
479       } else {
480         WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
481         iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
482       }
483     }
484 
485     iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
486     if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
487       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
488     }
489 
490     pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
491 
492     pIntraPredMode[g_kuiScan8[i]] = iBestMode;
493 
494     iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
495   }
496   ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
497   pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
498   pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
499   pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
500 
501   if (pCtx->pSps->uiChromaFormatIdc == 0)//no need parse chroma
502     return ERR_NONE;
503 
504   if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
505     WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
506     if (iCode > MAX_PRED_MODE_ID_CHROMA) {
507       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
508     }
509     pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
510   } else {
511     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
512     if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
513       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
514     }
515     pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
516   }
517 
518   if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
519       || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
520     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
521   }
522   return ERR_NONE;
523 }
524 
ParseIntra8x8Mode(PWelsDecoderContext pCtx,PWelsNeighAvail pNeighAvail,int8_t * pIntraPredMode,PBitStringAux pBs,PDqLayer pCurDqLayer)525 int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
526                            PBitStringAux pBs,
527                            PDqLayer pCurDqLayer) {
528   // Similar with Intra_4x4, can put them together when needed
529   int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
530   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
531   int32_t iFinalMode, i;
532 
533   uint8_t uiNeighAvail = 0;
534   uint32_t uiCode;
535   int32_t iCode;
536   pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
537   // Top-Right : Left : Top-Left : Top
538   uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
539 
540   pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
541 
542   for (i = 0; i < 4; i++) {
543     int32_t iPrevIntra4x4PredMode = 0;
544     if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
545       WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
546       iPrevIntra4x4PredMode = iCode;
547     } else {
548       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
549       iPrevIntra4x4PredMode = uiCode;
550     }
551     const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
552 
553     int8_t iBestMode;
554     if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
555       if (iPrevIntra4x4PredMode == -1)
556         iBestMode = kiPredMode;
557       else
558         iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
559     } else {
560       if (iPrevIntra4x4PredMode) {
561         iBestMode = kiPredMode;
562       } else {
563         WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
564         iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
565       }
566     }
567 
568     iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
569 
570     if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
571       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
572     }
573 
574     for (int j = 0; j < 4; j++) {
575       pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
576       pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
577       iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
578     }
579   }
580   ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
581   pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
582   pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
583   pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
584 
585   if (pCtx->pSps->uiChromaFormatIdc == 0)
586     return ERR_NONE;
587 
588   if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
589     WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
590     if (iCode > MAX_PRED_MODE_ID_CHROMA) {
591       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
592     }
593     pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
594   } else {
595     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
596     if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
597       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
598     }
599     pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
600   }
601 
602   if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
603       || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
604     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
605   }
606 
607   return ERR_NONE;
608 }
609 
ParseIntra16x16Mode(PWelsDecoderContext pCtx,PWelsNeighAvail pNeighAvail,PBitStringAux pBs,PDqLayer pCurDqLayer)610 int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
611                              PDqLayer pCurDqLayer) {
612   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
613   uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
614   uint32_t uiCode;
615   int32_t iCode;
616   pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
617 
618   if (CheckIntra16x16PredMode (uiNeighAvail,
619                                &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
620     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE);
621   }
622   if (pCtx->pSps->uiChromaFormatIdc == 0)
623     return ERR_NONE;
624 
625   if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
626     WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
627     if (iCode > MAX_PRED_MODE_ID_CHROMA) {
628       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
629     }
630     pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
631   } else {
632     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
633     if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
634       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
635     }
636     pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
637   }
638   if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
639       || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
640     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
641   }
642 
643   return ERR_NONE;
644 }
645 
WelsDecodeMbCabacISliceBaseMode0(PWelsDecoderContext pCtx,uint32_t & uiEosFlag)646 int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
647   PDqLayer pCurDqLayer             = pCtx->pCurDqLayer;
648   PBitStringAux pBsAux           = pCurDqLayer->pBitStringAux;
649   PSlice pSlice                  = &pCurDqLayer->sLayerInfo.sSliceInLayer;
650   PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
651   SWelsNeighAvail sNeighAvail;
652   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
653   int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
654   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
655   int32_t i;
656   uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
657 
658   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
659 
660   pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
661   pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
662 
663   pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
664   pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
665   GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
666   WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
667   if (uiMbType > 25) {
668     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
669   } else if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17
670              && uiMbType <= 24))) {
671     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
672   } else if (25 == uiMbType) {   //I_PCM
673     WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
674     WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
675     pSlice->iLastDeltaQp = 0;
676     WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
677     if (uiEosFlag) {
678       RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
679     }
680     return ERR_NONE;
681   } else if (0 == uiMbType) { //I4x4
682     ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
683     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
684     if (pCtx->pPps->bTransform8x8ModeFlag) {
685       // Transform 8x8 cabac will be added soon
686       WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
687     }
688     if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
689       uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
690       pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
691       WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
692     } else {
693       pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
694       WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
695     }
696     //get uiCbp for I4x4
697     WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
698     pCurDqLayer->pCbp[iMbXy] = uiCbp;
699     pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
700     uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0;
701     uiCbpLuma = uiCbp & 15;
702   } else { //I16x16;
703     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
704     pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
705     pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
706     pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
707     pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
708     uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ;
709     uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
710     WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
711     WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurDqLayer));
712   }
713 
714   ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
715   ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
716   ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
717   ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
718   ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
719   ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
720   pCurDqLayer->pCbfDc[iMbXy] = 0;
721 
722   if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) {
723     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
724     for (i = 0; i < 2; i++) {
725       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurDqLayer->pLumaQp[iMbXy] +
726                                          pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
727     }
728   }
729 
730   if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
731     memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
732     int32_t iQpDelta, iId8x8, iId4x4;
733     WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
734     if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
735       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
736     }
737     pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
738     pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
739     for (i = 0; i < 2; i++) {
740       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
741                                          pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
742     }
743     if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
744       //step1: Luma DC
745       WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
746                         I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx));
747       //step2: Luma AC
748       if (uiCbpLuma) {
749         for (i = 0; i < 16; i++) {
750           WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
751                             iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
752                             pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurDqLayer->pLumaQp[iMbXy], pCtx));
753         }
754         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
755         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
756         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
757         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
758       } else { //pNonZeroCount = 0
759         ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
760         ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
761         ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
762         ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
763       }
764     } else { //non-MB_TYPE_INTRA16x16
765       if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
766         // Transform 8x8 support for CABAC
767         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
768           if (uiCbpLuma & (1 << iId8x8)) {
769             WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
770                               iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
771                               pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx));
772           } else {
773             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
774             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
775           }
776         }
777         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
778         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
779         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
780         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
781       } else {
782         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
783           if (uiCbpLuma & (1 << iId8x8)) {
784             int32_t iIdx = (iId8x8 << 2);
785             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
786               //Luma (DC and AC decoding together)
787               WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
788                                 g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
789                                 pCurDqLayer->pLumaQp[iMbXy], pCtx));
790               iIdx++;
791             }
792           } else {
793             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
794             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
795           }
796         }
797         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
798         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
799         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
800         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
801       }
802     }
803     int32_t iMbResProperty;
804     //chroma
805     //step1: DC
806     if (1 == uiCbpChroma || 2 == uiCbpChroma) {
807       //Cb Cr
808       for (i = 0; i < 2; i++) {
809         iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
810         WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
811                           iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
812       }
813     }
814 
815     //step2: AC
816     if (2 == uiCbpChroma) {
817       for (i = 0; i < 2; i++) { //Cb Cr
818         iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
819         int32_t iIdx = 16 + (i << 2);
820         for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
821           WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
822                             iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
823                             pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
824           iIdx++;
825         }
826       }
827       ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
828       ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
829       ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
830       ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
831     } else {
832       ST16 (&pCurDqLayer->pNzc[iMbXy][16], 0);
833       ST16 (&pCurDqLayer->pNzc[iMbXy][20], 0);
834       ST16 (&pCurDqLayer->pNzc[iMbXy][18], 0);
835       ST16 (&pCurDqLayer->pNzc[iMbXy][22], 0);
836     }
837   } else {
838     ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
839     ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
840     ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
841     ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
842     ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
843     ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
844   }
845 
846   WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
847   if (uiEosFlag) {
848     RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
849   }
850   return ERR_NONE;
851 }
852 
WelsDecodeMbCabacISlice(PWelsDecoderContext pCtx,PNalUnit pNalCur,uint32_t & uiEosFlag)853 int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
854   WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
855   return ERR_NONE;
856 }
857 
WelsDecodeMbCabacPSliceBaseMode0(PWelsDecoderContext pCtx,PWelsNeighAvail pNeighAvail,uint32_t & uiEosFlag)858 int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
859   PDqLayer pCurDqLayer             = pCtx->pCurDqLayer;
860   PBitStringAux pBsAux           = pCurDqLayer->pBitStringAux;
861   PSlice pSlice                  = &pCurDqLayer->sLayerInfo.sSliceInLayer;
862   PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
863 
864   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
865   int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
866   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
867   int32_t iMbResProperty;
868   int32_t i;
869   uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
870 
871   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
872 
873   pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
874 
875   WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
876   // uiMbType = 4 is not allowded.
877   if (uiMbType < 4) { //Inter mode
878     int16_t pMotionVector[LIST_A][30][MV_A];
879     int16_t pMvdCache[LIST_A][30][MV_A];
880     int8_t  pRefIndex[LIST_A][30];
881     pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
882     WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer);
883     WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
884     pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
885   } else { //Intra mode
886     uiMbType -= 5;
887     if (uiMbType > 25)
888       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
889     if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
890       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
891 
892     if (25 == uiMbType) {   //I_PCM
893       WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
894       WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
895       pSlice->iLastDeltaQp = 0;
896       WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
897       if (uiEosFlag) {
898         RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
899       }
900       return ERR_NONE;
901     } else { //normal Intra mode
902       if (0 == uiMbType) { //Intra4x4
903         ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
904         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
905         if (pCtx->pPps->bTransform8x8ModeFlag) {
906           WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
907         }
908         if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
909           uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
910           pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
911           WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
912         } else {
913           pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
914           WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
915         }
916       } else { //Intra16x16
917         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
918         pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
919         pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
920         pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
921         pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
922         uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
923         uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
924         WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
925         WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer));
926       }
927     }
928   }
929 
930   ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
931   ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
932   ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
933   ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
934   ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
935   ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
936 
937   if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
938     WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
939 
940     pCurDqLayer->pCbp[iMbXy] = uiCbp;
941     pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
942     uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ;
943     uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
944   }
945 
946   if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
947 
948     if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
949       // Need modification when B picutre add in
950       bool bNeedParseTransformSize8x8Flag =
951         (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16)
952           || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
953          && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
954          && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
955          && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0)
956          && (pCtx->pPps->bTransform8x8ModeFlag));
957 
958       if (bNeedParseTransformSize8x8Flag) {
959         WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
960                           pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
961       }
962     }
963 
964     memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
965 
966     int32_t iQpDelta, iId8x8, iId4x4;
967 
968     WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
969     if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
970       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
971     }
972     pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
973     pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
974     for (i = 0; i < 2; i++) {
975       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
976                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
977     }
978 
979     if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
980       //step1: Luma DC
981       WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
982                         I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx));
983       //step2: Luma AC
984       if (uiCbpLuma) {
985         for (i = 0; i < 16; i++) {
986           WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
987                             1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
988                             pCurDqLayer->pLumaQp[iMbXy], pCtx));
989         }
990         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
991         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
992         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
993         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
994       } else {
995         ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
996         ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
997         ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
998         ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
999       }
1000     } else { //non-MB_TYPE_INTRA16x16
1001       if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
1002         // Transform 8x8 support for CABAC
1003         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
1004           if (uiCbpLuma & (1 << iId8x8)) {
1005             WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
1006                               iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
1007                               IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
1008                               pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx));
1009           } else {
1010             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
1011             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
1012           }
1013         }
1014         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
1015         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
1016         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
1017         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
1018       } else {
1019         iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
1020         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
1021           if (uiCbpLuma & (1 << iId8x8)) {
1022             int32_t iIdx = (iId8x8 << 2);
1023             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
1024               //Luma (DC and AC decoding together)
1025               WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
1026                                 g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
1027                                 pCurDqLayer->pLumaQp[iMbXy],
1028                                 pCtx));
1029               iIdx++;
1030             }
1031           } else {
1032             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
1033             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
1034           }
1035         }
1036         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
1037         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
1038         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
1039         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
1040       }
1041     }
1042 
1043     //chroma
1044     //step1: DC
1045     if (1 == uiCbpChroma || 2 == uiCbpChroma) {
1046       for (i = 0; i < 2; i++) {
1047         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
1048           iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
1049         else
1050           iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
1051 
1052         WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
1053                           iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
1054       }
1055     }
1056     //step2: AC
1057     if (2 == uiCbpChroma) {
1058       for (i = 0; i < 2; i++) {
1059         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
1060           iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
1061         else
1062           iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
1063         int32_t index = 16 + (i << 2);
1064         for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
1065           WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
1066                             iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
1067                             iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
1068           index++;
1069         }
1070       }
1071       ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
1072       ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
1073       ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
1074       ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
1075     } else {
1076       ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
1077       ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
1078     }
1079   } else {
1080     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
1081     for (i = 0; i < 2; i++) {
1082       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
1083                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
1084     }
1085   }
1086 
1087   WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
1088   if (uiEosFlag) {
1089     RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
1090   }
1091 
1092   return ERR_NONE;
1093 }
1094 
WelsDecodeMbCabacBSliceBaseMode0(PWelsDecoderContext pCtx,PWelsNeighAvail pNeighAvail,uint32_t & uiEosFlag)1095 int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
1096   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1097   PBitStringAux pBsAux = pCurDqLayer->pBitStringAux;
1098   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1099   PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
1100 
1101   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
1102   int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
1103   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1104   int32_t iMbResProperty;
1105   int32_t i;
1106   uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
1107 
1108   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
1109 
1110   pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
1111 
1112   WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType));
1113 
1114   if (uiMbType < 23) { //Inter B mode
1115     int16_t pMotionVector[LIST_A][30][MV_A];
1116     int16_t pMvdCache[LIST_A][30][MV_A];
1117     int8_t  pRefIndex[LIST_A][30];
1118     int8_t  pDirect[30];
1119     pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
1120     WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer);
1121     WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurDqLayer);
1122     WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex,
1123                       pDirect));
1124     pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
1125   } else { //Intra mode
1126     uiMbType -= 23;
1127     if (uiMbType > 25)
1128       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
1129     if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
1130       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
1131 
1132     if (25 == uiMbType) {   //I_PCM
1133       WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!");
1134       WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
1135       pSlice->iLastDeltaQp = 0;
1136       WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
1137       if (uiEosFlag) {
1138         RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
1139       }
1140       return ERR_NONE;
1141     } else { //normal Intra mode
1142       if (0 == uiMbType) { //Intra4x4
1143         ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
1144         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
1145         if (pCtx->pPps->bTransform8x8ModeFlag) {
1146           WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
1147         }
1148         if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
1149           uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
1150           pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
1151           WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
1152         } else {
1153           pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
1154           WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer));
1155         }
1156       } else { //Intra16x16
1157         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
1158         pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
1159         pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
1160         pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
1161         pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
1162         uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
1163         uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
1164         WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
1165         WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer));
1166       }
1167     }
1168   }
1169 
1170   ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
1171   ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
1172   ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
1173   ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
1174   ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
1175   ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
1176 
1177   if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
1178     WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
1179 
1180     pCurDqLayer->pCbp[iMbXy] = uiCbp;
1181     pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
1182     uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
1183     uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15;
1184   }
1185 
1186   if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
1187 
1188     if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
1189       // Need modification when B picutre add in
1190       bool bNeedParseTransformSize8x8Flag =
1191         (((IS_INTER_16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_DIRECT (pCurDqLayer->pDec->pMbType[iMbXy])
1192            || IS_INTER_16x8 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_INTER_8x16 (pCurDqLayer->pDec->pMbType[iMbXy]))
1193           || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
1194          && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
1195          && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
1196          && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0)
1197          && (pCtx->pPps->bTransform8x8ModeFlag));
1198 
1199       if (bNeedParseTransformSize8x8Flag) {
1200         WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
1201                           pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
1202       }
1203     }
1204 
1205     memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
1206 
1207     int32_t iQpDelta, iId8x8, iId4x4;
1208 
1209     WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
1210     if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
1211       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
1212     }
1213     pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
1214     pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
1215     for (i = 0; i < 2; i++) {
1216       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
1217                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
1218     }
1219 
1220     if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
1221       //step1: Luma DC
1222       WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
1223                         I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx));
1224       //step2: Luma AC
1225       if (uiCbpLuma) {
1226         for (i = 0; i < 16; i++) {
1227           WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
1228                             1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
1229                             pCurDqLayer->pLumaQp[iMbXy], pCtx));
1230         }
1231         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
1232         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
1233         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
1234         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
1235       } else {
1236         ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
1237         ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
1238         ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
1239         ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
1240       }
1241     } else { //non-MB_TYPE_INTRA16x16
1242       if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
1243         // Transform 8x8 support for CABAC
1244         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
1245           if (uiCbpLuma & (1 << iId8x8)) {
1246             WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
1247                               iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
1248                               IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
1249                               pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx));
1250           } else {
1251             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
1252             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
1253           }
1254         }
1255         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
1256         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
1257         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
1258         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
1259       } else {
1260         iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
1261         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
1262           if (uiCbpLuma & (1 << iId8x8)) {
1263             int32_t iIdx = (iId8x8 << 2);
1264             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
1265               //Luma (DC and AC decoding together)
1266               WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
1267                                 g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
1268                                 pCurDqLayer->pLumaQp[iMbXy],
1269                                 pCtx));
1270               iIdx++;
1271             }
1272           } else {
1273             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
1274             ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
1275           }
1276         }
1277         ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
1278         ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
1279         ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
1280         ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
1281       }
1282     }
1283 
1284     //chroma
1285     //step1: DC
1286     if (1 == uiCbpChroma || 2 == uiCbpChroma) {
1287       for (i = 0; i < 2; i++) {
1288         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
1289           iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
1290         else
1291           iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
1292 
1293         WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
1294                           iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
1295       }
1296     }
1297     //step2: AC
1298     if (2 == uiCbpChroma) {
1299       for (i = 0; i < 2; i++) {
1300         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
1301           iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
1302         else
1303           iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
1304         int32_t index = 16 + (i << 2);
1305         for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
1306           WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
1307                             iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
1308                             iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx));
1309           index++;
1310         }
1311       }
1312       ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
1313       ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
1314       ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
1315       ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
1316     } else {
1317       ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
1318       ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
1319     }
1320   } else {
1321     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
1322     for (i = 0; i < 2; i++) {
1323       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
1324                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
1325     }
1326   }
1327 
1328   WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
1329   if (uiEosFlag) {
1330     RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
1331   }
1332 
1333   return ERR_NONE;
1334 }
1335 
1336 
WelsDecodeMbCabacPSlice(PWelsDecoderContext pCtx,PNalUnit pNalCur,uint32_t & uiEosFlag)1337 int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
1338   PDqLayer pCurDqLayer             = pCtx->pCurDqLayer;
1339   PSlice pSlice                  = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1340   PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
1341   PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
1342   uint32_t uiCode;
1343   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1344   int32_t i;
1345   SWelsNeighAvail uiNeighAvail;
1346   pCurDqLayer->pCbp[iMbXy] = 0;
1347   pCurDqLayer->pCbfDc[iMbXy] = 0;
1348   pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
1349 
1350   pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
1351   pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
1352 
1353   GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer);
1354   WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
1355 
1356   if (uiCode) {
1357     int16_t pMv[2] = {0};
1358     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP;
1359     ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
1360     ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
1361     ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
1362     ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
1363     ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
1364     ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
1365 
1366     pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
1367     memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
1368     bool bIsPending = GetThreadCount (pCtx) > 1;
1369     pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete
1370                             || bIsPending));
1371     //predict mv
1372     PredPSkipMvFromNeighbor (pCurDqLayer, pMv);
1373     for (i = 0; i < 16; i++) {
1374       ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)pMv);
1375       ST32 (pCurDqLayer->pMvd[0][iMbXy][i], 0);
1376     }
1377 
1378     //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
1379     //  memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
1380     //}
1381 
1382     //reset rS
1383     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
1384     for (i = 0; i < 2; i++) {
1385       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
1386                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
1387     }
1388 
1389     //for neighboring CABAC usage
1390     pSlice->iLastDeltaQp = 0;
1391 
1392     WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
1393 
1394     return ERR_NONE;
1395   }
1396 
1397   WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
1398   return ERR_NONE;
1399 }
1400 
1401 
WelsDecodeMbCabacBSlice(PWelsDecoderContext pCtx,PNalUnit pNalCur,uint32_t & uiEosFlag)1402 int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
1403   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1404   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1405   PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
1406   PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
1407   PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
1408   uint32_t uiCode;
1409   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1410   int32_t i;
1411   SWelsNeighAvail uiNeighAvail;
1412   pCurDqLayer->pCbp[iMbXy] = 0;
1413   pCurDqLayer->pCbfDc[iMbXy] = 0;
1414   pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
1415 
1416   pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
1417   pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
1418 
1419   GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer);
1420   WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
1421 
1422   memset (pCurDqLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
1423 
1424   bool bIsPending = GetThreadCount (pCtx) > 1;
1425 
1426   if (uiCode) {
1427     int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } };
1428     int8_t  ref[LIST_A] = { 0 };
1429     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
1430     ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0);
1431     ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0);
1432     ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0);
1433     ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0);
1434     ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0);
1435     ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0);
1436 
1437     pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
1438     memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
1439     memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
1440     pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete
1441                             || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending));
1442 
1443     if (pCtx->bMbRefConcealed) {
1444       SLogContext* pLogCtx = & (pCtx->sLogCtx);
1445       WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
1446       return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
1447     }
1448 
1449     SubMbType subMbType;
1450     if (pSliceHeader->iDirectSpatialMvPredFlag) {
1451 
1452       //predict direct spatial mv
1453       int32_t ret = PredMvBDirectSpatial (pCtx, pMv, ref, subMbType);
1454       if (ret != ERR_NONE) {
1455         return ret;
1456       }
1457     } else {
1458       //temporal direct mode
1459       int32_t ret = PredBDirectTemporal (pCtx, pMv, ref, subMbType);
1460       if (ret != ERR_NONE) {
1461         return ret;
1462       }
1463     }
1464 
1465 
1466     //reset rS
1467     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
1468     for (i = 0; i < 2; i++) {
1469       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
1470                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
1471     }
1472 
1473     //for neighboring CABAC usage
1474     pSlice->iLastDeltaQp = 0;
1475 
1476     WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
1477 
1478     return ERR_NONE;
1479   }
1480 
1481   WELS_READ_VERIFY (WelsDecodeMbCabacBSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
1482   return ERR_NONE;
1483 }
1484 
1485 // Calculate deqaunt coeff scaling list value
WelsCalcDeqCoeffScalingList(PWelsDecoderContext pCtx)1486 int32_t  WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
1487   if (pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bPicScalingMatrixPresentFlag) {
1488     pCtx->bUseScalingList = true;
1489 
1490     if (!pCtx->bDequantCoeff4x4Init || (pCtx->iDequantCoeffPpsid != pCtx->pPps->iPpsId)) {
1491       int i, q, x, y;
1492       //Init dequant coeff value for different QP
1493       for (i = 0; i < 6; i++) {
1494         pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
1495         pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
1496         for (q = 0; q < 51; q++) {
1497           for (x = 0; x < 16; x++) {
1498             pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
1499                                                g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList4x4[i][x] * g_kuiDequantCoeff[q][x & 0x07];
1500           }
1501           for (y = 0; y < 64; y++) {
1502             pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
1503                                                g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
1504           }
1505         }
1506       }
1507       pCtx->bDequantCoeff4x4Init = true;
1508       pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
1509     }
1510   } else
1511     pCtx->bUseScalingList = false;
1512   return ERR_NONE;
1513 }
1514 
WelsDecodeSlice(PWelsDecoderContext pCtx,bool bFirstSliceInLayer,PNalUnit pNalCur)1515 int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
1516   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1517   PFmo pFmo = pCtx->pFmo;
1518   int32_t iRet;
1519   int32_t iNextMbXyIndex, iSliceIdc;
1520 
1521   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1522   PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
1523   PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
1524   int32_t iMbX, iMbY;
1525   const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
1526   uint32_t uiEosFlag = 0;
1527   PWelsDecMbFunc pDecMbFunc;
1528 
1529   pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
1530 
1531   if (pCtx->pPps->bEntropyCodingModeFlag) {
1532     if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
1533         pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
1534         pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
1535       WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
1536                "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
1537       pCtx->iErrorCode |= dsBitstreamError;
1538       return dsBitstreamError;
1539     }
1540     if (P_SLICE == pSliceHeader->eSliceType)
1541       pDecMbFunc = WelsDecodeMbCabacPSlice;
1542     else if (B_SLICE == pSliceHeader->eSliceType)
1543       pDecMbFunc = WelsDecodeMbCabacBSlice;
1544     else //I_SLICE. B_SLICE is being supported
1545       pDecMbFunc = WelsDecodeMbCabacISlice;
1546   } else {
1547     if (P_SLICE == pSliceHeader->eSliceType) {
1548       pDecMbFunc = WelsDecodeMbCavlcPSlice;
1549     } else if (B_SLICE == pSliceHeader->eSliceType) {
1550       pDecMbFunc = WelsDecodeMbCavlcBSlice;
1551     } else { //I_SLICE
1552       pDecMbFunc = WelsDecodeMbCavlcISlice;
1553     }
1554   }
1555 
1556   if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
1557     pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
1558     pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleConstrain1;
1559     pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleConstrain1;
1560   } else {
1561     pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
1562     pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleNormal;
1563     pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleNormal;
1564   }
1565 
1566   pCtx->eSliceType = pSliceHeader->eSliceType;
1567   if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
1568     int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
1569     int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
1570     WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
1571     //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
1572     pSlice->iLastDeltaQp = 0;
1573     WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
1574   }
1575   //try to calculate  the dequant_coeff
1576   WelsCalcDeqCoeffScalingList (pCtx);
1577 
1578   iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
1579   iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
1580   iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
1581   pSlice->iMbSkipRun = -1;
1582   iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId;
1583 
1584   pCurDqLayer->iMbX =  iMbX;
1585   pCurDqLayer->iMbY = iMbY;
1586   pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1587 
1588   do {
1589     if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
1590       break;
1591     }
1592 
1593     pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
1594     pCtx->bMbRefConcealed = false;
1595     iRet = pDecMbFunc (pCtx,  pNalCur, uiEosFlag);
1596     pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
1597     if (iRet != ERR_NONE) {
1598       return iRet;
1599     }
1600 
1601     ++pSlice->iTotalMbInCurSlice;
1602     if (uiEosFlag) { //end of slice
1603       break;
1604     }
1605     if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
1606       iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
1607     } else {
1608       ++iNextMbXyIndex;
1609     }
1610     iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
1611     iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth;
1612     pCurDqLayer->iMbX =  iMbX;
1613     pCurDqLayer->iMbY = iMbY;
1614     pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1615   } while (1);
1616 
1617   return ERR_NONE;
1618 }
1619 
WelsDecodeAndConstructSlice(PWelsDecoderContext pCtx)1620 int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx) {
1621   PNalUnit pNalCur = pCtx->pNalCur;
1622   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
1623   PFmo pFmo = pCtx->pFmo;
1624   int32_t iRet;
1625   int32_t iNextMbXyIndex, iSliceIdc;
1626 
1627   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1628   PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
1629   PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
1630   int32_t iMbX, iMbY;
1631   const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
1632   int32_t iTotalMbTargetLayer = kiCountNumMb;
1633   uint32_t uiEosFlag = 0;
1634   PWelsDecMbFunc pDecMbFunc;
1635 
1636   pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
1637 
1638   if (pCtx->pPps->bEntropyCodingModeFlag) {
1639     if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
1640         pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
1641         pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
1642       WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
1643                "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
1644       pCtx->iErrorCode |= dsBitstreamError;
1645       return dsBitstreamError;
1646     }
1647     if (P_SLICE == pSliceHeader->eSliceType)
1648       pDecMbFunc = WelsDecodeMbCabacPSlice;
1649     else if (B_SLICE == pSliceHeader->eSliceType)
1650       pDecMbFunc = WelsDecodeMbCabacBSlice;
1651     else //I_SLICE. B_SLICE is being supported
1652       pDecMbFunc = WelsDecodeMbCabacISlice;
1653   } else {
1654     if (P_SLICE == pSliceHeader->eSliceType) {
1655       pDecMbFunc = WelsDecodeMbCavlcPSlice;
1656     } else if (B_SLICE == pSliceHeader->eSliceType) {
1657       pDecMbFunc = WelsDecodeMbCavlcBSlice;
1658     } else { //I_SLICE
1659       pDecMbFunc = WelsDecodeMbCavlcISlice;
1660     }
1661   }
1662 
1663   if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
1664     pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
1665     pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1;
1666     pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1;
1667   } else {
1668     pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
1669     pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal;
1670     pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal;
1671   }
1672 
1673   pCtx->eSliceType = pSliceHeader->eSliceType;
1674   if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
1675     int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
1676     int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
1677     WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
1678     //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
1679     pSlice->iLastDeltaQp = 0;
1680     WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
1681   }
1682   //try to calculate  the dequant_coeff
1683   WelsCalcDeqCoeffScalingList (pCtx);
1684 
1685   iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
1686   iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
1687   iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
1688   pSlice->iMbSkipRun = -1;
1689   iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId;
1690 
1691   pCurDqLayer->iMbX = iMbX;
1692   pCurDqLayer->iMbY = iMbY;
1693   pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1694 
1695   PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb;
1696 
1697   SDeblockingFilter pFilter;
1698   int32_t iFilterIdc = 1;
1699   if (pSliceHeader->uiDisableDeblockingFilterIdc != 1) {
1700     WelsDeblockingInitFilter (pCtx, pFilter, iFilterIdc);
1701   }
1702 
1703   do {
1704     if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
1705       break;
1706     }
1707 
1708     pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
1709     pCtx->bMbRefConcealed = false;
1710     iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag);
1711     pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
1712     if (iRet != ERR_NONE) {
1713       return iRet;
1714     }
1715     if (WelsTargetMbConstruction (pCtx)) {
1716       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
1717                "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
1718                pCurDqLayer->iMbX, pCurDqLayer->iMbY, pSlice->eSliceType);
1719 
1720       return ERR_INFO_MB_RECON_FAIL;
1721     }
1722     memcpy (pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex], pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], 24);
1723     if (pCtx->eSliceType != I_SLICE) {
1724       pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
1725         pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
1726     }
1727     WelsDeblockingFilterMB (pCurDqLayer, pFilter, iFilterIdc, pDeblockMb);
1728     if (pCtx->uiNalRefIdc > 0) {
1729       if (pCurDqLayer->iMbX == 0 || pCurDqLayer->iMbX == pCurDqLayer->iMbWidth - 1 || pCurDqLayer->iMbY == 0
1730           || pCurDqLayer->iMbY == pCurDqLayer->iMbHeight - 1) {
1731         PadMBLuma_c (pCurDqLayer->pDec->pData[0], pCurDqLayer->pDec->iLinesize[0], pCurDqLayer->pDec->iWidthInPixel,
1732                      pCurDqLayer->pDec->iHeightInPixel, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, pCurDqLayer->iMbHeight);
1733         PadMBChroma_c (pCurDqLayer->pDec->pData[1], pCurDqLayer->pDec->iLinesize[1], pCurDqLayer->pDec->iWidthInPixel / 2,
1734                        pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth,
1735                        pCurDqLayer->iMbHeight);
1736         PadMBChroma_c (pCurDqLayer->pDec->pData[2], pCurDqLayer->pDec->iLinesize[2], pCurDqLayer->pDec->iWidthInPixel / 2,
1737                        pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth,
1738                        pCurDqLayer->iMbHeight);
1739       }
1740     }
1741     if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
1742       pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
1743       pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
1744       ++pCtx->iTotalNumMbRec;
1745     }
1746 
1747     if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
1748       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
1749                "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
1750                pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
1751 
1752       return ERR_INFO_MB_NUM_EXCEED_FAIL;
1753     }
1754 
1755     ++pSlice->iTotalMbInCurSlice;
1756     if (uiEosFlag) { //end of slice
1757       SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]);
1758       break;
1759     }
1760     if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
1761       iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
1762     } else {
1763       ++iNextMbXyIndex;
1764     }
1765     int32_t iLastMby = iMbY;
1766     int32_t iLastMbx = iMbX;
1767     iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth;
1768     iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth;
1769     pCurDqLayer->iMbX = iMbX;
1770     pCurDqLayer->iMbY = iMbY;
1771     pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
1772     if (GetThreadCount (pCtx) > 1) {
1773       if ((iMbY > iLastMby) && (iLastMbx == pCurDqLayer->iMbWidth - 1)) {
1774         SET_EVENT (&pCtx->pDec->pReadyEvent[iLastMby]);
1775       }
1776     }
1777   } while (1);
1778   if (GetThreadCount (pCtx) > 1) {
1779     SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]);
1780   }
1781   return ERR_NONE;
1782 }
1783 
WelsActualDecodeMbCavlcISlice(PWelsDecoderContext pCtx)1784 int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
1785   SVlcTable* pVlcTable     = pCtx->pVlcTable;
1786   PDqLayer pCurDqLayer             = pCtx->pCurDqLayer;
1787   PBitStringAux pBs              = pCurDqLayer->pBitStringAux;
1788   PSlice pSlice                  = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1789   PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
1790 
1791   SWelsNeighAvail sNeighAvail;
1792   int32_t iMbResProperty;
1793 
1794   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
1795   int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
1796 
1797   int32_t iMbX = pCurDqLayer->iMbX;
1798   int32_t iMbY = pCurDqLayer->iMbY;
1799   const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1800   int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
1801   int32_t i;
1802   int32_t iRet = ERR_NONE;
1803   uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
1804   uint32_t uiCode;
1805   int32_t iCode;
1806 
1807   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
1808   GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
1809   pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
1810   pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
1811 
1812   pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
1813   pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
1814 
1815   WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
1816   uiMbType = uiCode;
1817   if (uiMbType > 25)
1818     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
1819   if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
1820     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
1821 
1822   if (25 == uiMbType) {
1823     WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
1824     int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0];
1825     int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1];
1826 
1827     int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
1828     int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
1829 
1830     uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL;
1831     uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC;
1832     uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC;
1833 
1834     uint8_t* pTmpBsBuf;
1835 
1836 
1837     int32_t i;
1838     int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
1839     int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
1840 
1841     int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
1842 
1843     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
1844 
1845     //step 1: locating bit-stream pointer [must align into integer byte]
1846     pBs->pCurBuf -= iIndex;
1847 
1848     //step 2: copy pixel from bit-stream into fdec [reconstruction]
1849     pTmpBsBuf = pBs->pCurBuf;
1850     if (!pCtx->pParam->bParseOnly) {
1851       for (i = 0; i < 16; i++) { //luma
1852         memcpy (pDecY, pTmpBsBuf, iCopySizeY);
1853         pDecY += iDecStrideL;
1854         pTmpBsBuf += 16;
1855       }
1856       for (i = 0; i < 8; i++) { //cb
1857         memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
1858         pDecU += iDecStrideC;
1859         pTmpBsBuf += 8;
1860       }
1861       for (i = 0; i < 8; i++) { //cr
1862         memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
1863         pDecV += iDecStrideC;
1864         pTmpBsBuf += 8;
1865       }
1866     }
1867 
1868     pBs->pCurBuf += 384;
1869 
1870     //step 3: update QP and pNonZeroCount
1871     pCurDqLayer->pLumaQp[iMbXy] = 0;
1872     memset (pCurDqLayer->pChromaQp[iMbXy], 0, sizeof (pCurDqLayer->pChromaQp[iMbXy]));
1873     memset (pNzc, 16, sizeof (pCurDqLayer->pNzc[iMbXy]));   //Rec. 9.2.1 for PCM, nzc=16
1874     WELS_READ_VERIFY (InitReadBits (pBs, 0));
1875     return ERR_NONE;
1876   } else if (0 == uiMbType) { //reference to JM
1877     ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
1878     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
1879     if (pCtx->pPps->bTransform8x8ModeFlag) {
1880       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
1881       pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
1882       if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
1883         uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
1884       }
1885     }
1886     if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
1887       pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
1888       WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
1889     } else {
1890       pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
1891       WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
1892     }
1893 
1894     //uiCbp
1895     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
1896     uiCbp = uiCode;
1897     //G.9.1 Alternative parsing process for coded pBlock pattern
1898     if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
1899       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
1900     if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
1901       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
1902 
1903     if (pCtx->pSps->uiChromaFormatIdc)
1904       uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
1905     else
1906       uiCbp = g_kuiIntra4x4CbpTable400[uiCbp];
1907     pCurDqLayer->pCbp[iMbXy] = uiCbp;
1908     uiCbpC = uiCbp >> 4;
1909     uiCbpL = uiCbp & 15;
1910   } else { //I_PCM exclude, we can ignore it
1911     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
1912     pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
1913     pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
1914     pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
1915     pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
1916     uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
1917     uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
1918     WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
1919     WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer));
1920   }
1921 
1922   ST32A4 (&pNzc[0], 0);
1923   ST32A4 (&pNzc[4], 0);
1924   ST32A4 (&pNzc[8], 0);
1925   ST32A4 (&pNzc[12], 0);
1926   ST32A4 (&pNzc[16], 0);
1927   ST32A4 (&pNzc[20], 0);
1928 
1929   if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) {
1930     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
1931     for (i = 0; i < 2; i++) {
1932       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
1933                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
1934     }
1935 
1936   }
1937 
1938   if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
1939     memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0]));
1940     int32_t iQpDelta, iId8x8, iId4x4;
1941 
1942     WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
1943     iQpDelta = iCode;
1944 
1945     if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
1946       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
1947     }
1948 
1949     pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
1950     pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
1951     for (i = 0; i < 2; i++) {
1952       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
1953                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
1954                                          51)];
1955     }
1956 
1957 
1958     BsStartCavlc (pBs);
1959 
1960     if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
1961       //step1: Luma DC
1962       if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
1963                                           pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
1964         return iRet;//abnormal
1965       }
1966       //step2: Luma AC
1967       if (uiCbpL) {
1968         for (i = 0; i < 16; i++) {
1969           if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
1970                                               g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
1971                                               pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
1972             return iRet;//abnormal
1973           }
1974         }
1975         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
1976         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
1977         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
1978         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
1979       }
1980     } else { //non-MB_TYPE_INTRA16x16
1981       if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
1982         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
1983           iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
1984           if (uiCbpL & (1 << iId8x8)) {
1985             int32_t iIndex = (iId8x8 << 2);
1986             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
1987               if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
1988                                                      g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
1989                                                      pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
1990                 return iRet;
1991               }
1992               iIndex++;
1993             }
1994           } else {
1995             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
1996             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
1997           }
1998         }
1999         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2000         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2001         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2002         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2003       } else {
2004         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
2005           if (uiCbpL & (1 << iId8x8)) {
2006             int32_t iIndex = (iId8x8 << 2);
2007             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2008               //Luma (DC and AC decoding together)
2009               if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
2010                                                   g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
2011                                                   pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2012                 return iRet;//abnormal
2013               }
2014               iIndex++;
2015             }
2016           } else {
2017             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
2018             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
2019           }
2020         }
2021         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2022         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2023         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2024         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2025       }
2026     }
2027 
2028     //chroma
2029     //step1: DC
2030     if (1 == uiCbpC || 2 == uiCbpC) {
2031       for (i = 0; i < 2; i++) { //Cb Cr
2032         iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
2033         if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
2034                                             pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
2035           return iRet;//abnormal
2036         }
2037       }
2038     }
2039 
2040     //step2: AC
2041     if (2 == uiCbpC) {
2042       for (i = 0; i < 2; i++) { //Cb Cr
2043         iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
2044         int32_t iIndex = 16 + (i << 2);
2045         for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2046           if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
2047                                               1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
2048                                               pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
2049                                               pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
2050             return iRet;//abnormal
2051           }
2052           iIndex++;
2053         }
2054       }
2055       ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
2056       ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
2057       ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
2058       ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
2059     }
2060     BsEndCavlc (pBs);
2061   }
2062 
2063   return ERR_NONE;
2064 }
2065 
WelsDecodeMbCavlcISlice(PWelsDecoderContext pCtx,PNalUnit pNalCur,uint32_t & uiEosFlag)2066 int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
2067   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
2068   PBitStringAux pBs = pCurDqLayer->pBitStringAux;
2069   PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
2070   int32_t iBaseModeFlag;
2071   int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
2072   uint32_t uiCode;
2073   intX_t iUsedBits;
2074   if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
2075     WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
2076     iBaseModeFlag = uiCode;
2077   } else {
2078     iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag;
2079   }
2080   if (!iBaseModeFlag) {
2081     iRet = WelsActualDecodeMbCavlcISlice (pCtx);
2082   } else {
2083     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
2084              iBaseModeFlag);
2085     return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
2086   }
2087   if (iRet) { //occur error when parsing, MUST STOP decoding
2088     return iRet;
2089   }
2090 
2091   // check whether there is left bits to read next time in case multiple slices
2092   iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
2093   // sub 1, for stop bit
2094   if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
2095     uiEosFlag = 1;
2096   }
2097   if (iUsedBits > (pBs->iBits -
2098                    1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
2099     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
2100              "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
2101              (int64_t) iUsedBits, pBs->iBits);
2102     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
2103   }
2104   return ERR_NONE;
2105 }
2106 
WelsActualDecodeMbCavlcPSlice(PWelsDecoderContext pCtx)2107 int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
2108   SVlcTable* pVlcTable     = pCtx->pVlcTable;
2109   PDqLayer pCurDqLayer             = pCtx->pCurDqLayer;
2110   PBitStringAux pBs              = pCurDqLayer->pBitStringAux;
2111   PSlice pSlice                  = &pCurDqLayer->sLayerInfo.sSliceInLayer;
2112   PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
2113 
2114   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
2115   int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
2116 
2117   SWelsNeighAvail sNeighAvail;
2118   int32_t iMbX = pCurDqLayer->iMbX;
2119   int32_t iMbY = pCurDqLayer->iMbY;
2120   const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
2121   int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
2122   int32_t i;
2123   int32_t iRet = ERR_NONE;
2124   uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
2125   uint32_t uiCode;
2126   int32_t iCode;
2127   int32_t iMbResProperty;
2128 
2129   GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
2130   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
2131   pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
2132   WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
2133   uiMbType = uiCode;
2134   if (uiMbType < 5) { //inter MB type
2135     int16_t iMotionVector[LIST_A][30][MV_A];
2136     int8_t  iRefIndex[LIST_A][30];
2137     pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
2138     WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer);
2139 
2140     if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
2141       return iRet;//abnormal
2142     }
2143 
2144     if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
2145       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
2146       pCurDqLayer->pResidualPredFlag[iMbXy] =  uiCode;
2147     } else {
2148       pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
2149     }
2150 
2151     if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) {
2152       pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
2153     } else {
2154       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
2155       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
2156     }
2157   } else { //intra MB type
2158     uiMbType -= 5;
2159     if (uiMbType > 25)
2160       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
2161     if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
2162       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
2163 
2164     if (25 == uiMbType) {
2165       WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
2166       int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0];
2167       int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1];
2168 
2169       int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
2170       int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
2171 
2172       uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL;
2173       uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC;
2174       uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC;
2175 
2176       uint8_t* pTmpBsBuf;
2177 
2178       int32_t i;
2179       int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
2180       int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
2181 
2182       int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
2183 
2184       pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
2185 
2186       //step 1: locating bit-stream pointer [must align into integer byte]
2187       pBs->pCurBuf -= iIndex;
2188 
2189       //step 2: copy pixel from bit-stream into fdec [reconstruction]
2190       pTmpBsBuf = pBs->pCurBuf;
2191       if (!pCtx->pParam->bParseOnly) {
2192         for (i = 0; i < 16; i++) { //luma
2193           memcpy (pDecY, pTmpBsBuf, iCopySizeY);
2194           pDecY += iDecStrideL;
2195           pTmpBsBuf += 16;
2196         }
2197 
2198         for (i = 0; i < 8; i++) { //cb
2199           memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
2200           pDecU += iDecStrideC;
2201           pTmpBsBuf += 8;
2202         }
2203         for (i = 0; i < 8; i++) { //cr
2204           memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
2205           pDecV += iDecStrideC;
2206           pTmpBsBuf += 8;
2207         }
2208       }
2209 
2210       pBs->pCurBuf += 384;
2211 
2212       //step 3: update QP and pNonZeroCount
2213       pCurDqLayer->pLumaQp[iMbXy] = 0;
2214       pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0;
2215       //Rec. 9.2.1 for PCM, nzc=16
2216       ST32A4 (&pNzc[0], 0x10101010);
2217       ST32A4 (&pNzc[4], 0x10101010);
2218       ST32A4 (&pNzc[8], 0x10101010);
2219       ST32A4 (&pNzc[12], 0x10101010);
2220       ST32A4 (&pNzc[16], 0x10101010);
2221       ST32A4 (&pNzc[20], 0x10101010);
2222       WELS_READ_VERIFY (InitReadBits (pBs, 0));
2223       return ERR_NONE;
2224     } else {
2225       if (0 == uiMbType) {
2226         ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
2227         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
2228         if (pCtx->pPps->bTransform8x8ModeFlag) {
2229           WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
2230           pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
2231           if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
2232             uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
2233           }
2234         }
2235         if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
2236           pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
2237           WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
2238         } else {
2239           pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
2240           WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
2241         }
2242       } else { //I_PCM exclude, we can ignore it
2243         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
2244         pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
2245         pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
2246         pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
2247         pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
2248         uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
2249         uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
2250         WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
2251         if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) {
2252           return iRet;
2253         }
2254       }
2255     }
2256   }
2257 
2258   if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
2259     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
2260     uiCbp = uiCode;
2261     {
2262       if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
2263         return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
2264       if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
2265         return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
2266       if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) {
2267 
2268         uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
2269       } else //inter
2270         uiCbp = pCtx->pSps->uiChromaFormatIdc ?  g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
2271     }
2272 
2273     pCurDqLayer->pCbp[iMbXy] = uiCbp;
2274     uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4;
2275     uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
2276 
2277     // Need modification when B picutre add in
2278     bool bNeedParseTransformSize8x8Flag =
2279       (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16)
2280         || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
2281        && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
2282        && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
2283        && (uiCbpL > 0)
2284        && (pCtx->pPps->bTransform8x8ModeFlag));
2285 
2286     if (bNeedParseTransformSize8x8Flag) {
2287       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
2288       pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
2289     }
2290   }
2291 
2292   ST32A4 (&pNzc[0], 0);
2293   ST32A4 (&pNzc[4], 0);
2294   ST32A4 (&pNzc[8], 0);
2295   ST32A4 (&pNzc[12], 0);
2296   ST32A4 (&pNzc[16], 0);
2297   ST32A4 (&pNzc[20], 0);
2298   if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])
2299       && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) {
2300     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
2301     for (i = 0; i < 2; i++) {
2302       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
2303                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
2304     }
2305   }
2306 
2307   if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
2308     int32_t iQpDelta, iId8x8, iId4x4;
2309     memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
2310     WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
2311     iQpDelta = iCode;
2312 
2313     if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
2314       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
2315     }
2316 
2317     pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
2318     pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
2319     for (i = 0; i < 2; i++) {
2320       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
2321                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
2322                                          51)];
2323     }
2324 
2325     BsStartCavlc (pBs);
2326 
2327     if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
2328       //step1: Luma DC
2329       if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
2330                                           pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2331         return iRet;//abnormal
2332       }
2333       //step2: Luma AC
2334       if (uiCbpL) {
2335         for (i = 0; i < 16; i++) {
2336           if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
2337                                               g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
2338                                               pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2339             return iRet;//abnormal
2340           }
2341         }
2342         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2343         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2344         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2345         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2346       }
2347     } else { //non-MB_TYPE_INTRA16x16
2348       if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
2349         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
2350           iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
2351           if (uiCbpL & (1 << iId8x8)) {
2352             int32_t iIndex = (iId8x8 << 2);
2353             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2354               if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
2355                                                      g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
2356                                                      pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2357                 return iRet;
2358               }
2359               iIndex++;
2360             }
2361           } else {
2362             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
2363             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
2364           }
2365         }
2366         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2367         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2368         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2369         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2370       } else { // Normal T4x4
2371         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
2372           iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
2373           if (uiCbpL & (1 << iId8x8)) {
2374             int32_t iIndex = (iId8x8 << 2);
2375             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2376               //Luma (DC and AC decoding together)
2377               if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
2378                                                   g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
2379                                                   pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2380                 return iRet;//abnormal
2381               }
2382               iIndex++;
2383             }
2384           } else {
2385             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
2386             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
2387           }
2388         }
2389         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2390         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2391         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2392         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2393       }
2394     }
2395 
2396 
2397     //chroma
2398     //step1: DC
2399     if (1 == uiCbpC || 2 == uiCbpC) {
2400       for (i = 0; i < 2; i++) { //Cb Cr
2401         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
2402           iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
2403         else
2404           iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
2405 
2406         if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
2407                                             pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
2408           return iRet;//abnormal
2409         }
2410       }
2411     } else {
2412     }
2413     //step2: AC
2414     if (2 == uiCbpC) {
2415       for (i = 0; i < 2; i++) { //Cb Cr
2416         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
2417           iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
2418         else
2419           iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
2420 
2421         int32_t iIndex = 16 + (i << 2);
2422         for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2423           if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
2424                                               1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
2425                                               pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
2426                                               pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
2427             return iRet;//abnormal
2428           }
2429           iIndex++;
2430         }
2431       }
2432       ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
2433       ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
2434       ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
2435       ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
2436     }
2437     BsEndCavlc (pBs);
2438   }
2439 
2440   return ERR_NONE;
2441 }
2442 
WelsDecodeMbCavlcPSlice(PWelsDecoderContext pCtx,PNalUnit pNalCur,uint32_t & uiEosFlag)2443 int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
2444   PDqLayer pCurDqLayer             = pCtx->pCurDqLayer;
2445   PBitStringAux pBs              = pCurDqLayer->pBitStringAux;
2446   PSlice pSlice                  = &pCurDqLayer->sLayerInfo.sSliceInLayer;
2447   PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
2448   PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
2449   intX_t iUsedBits;
2450   const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
2451   int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
2452   int32_t iBaseModeFlag, i;
2453   int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
2454   uint32_t uiCode;
2455 
2456   pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
2457   pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
2458 
2459   if (-1 == pSlice->iMbSkipRun) {
2460     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
2461     pSlice->iMbSkipRun = uiCode;
2462     if (-1 == pSlice->iMbSkipRun) {
2463       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
2464     }
2465   }
2466   if (pSlice->iMbSkipRun--) {
2467     int16_t iMv[2];
2468 
2469     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP;
2470     ST32A4 (&pNzc[0], 0);
2471     ST32A4 (&pNzc[4], 0);
2472     ST32A4 (&pNzc[8], 0);
2473     ST32A4 (&pNzc[12], 0);
2474     ST32A4 (&pNzc[16], 0);
2475     ST32A4 (&pNzc[20], 0);
2476 
2477     pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
2478     memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
2479     bool bIsPending = GetThreadCount (pCtx) > 1;
2480     pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete
2481                             || bIsPending));
2482     //predict iMv
2483     PredPSkipMvFromNeighbor (pCurDqLayer, iMv);
2484     for (i = 0; i < 16; i++) {
2485       ST32A2 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)iMv);
2486     }
2487 
2488     //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
2489     //  memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
2490     //}
2491 
2492     //reset rS
2493     if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
2494         (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
2495       pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
2496       for (i = 0; i < 2; i++) {
2497         pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
2498                                            pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
2499       }
2500     }
2501 
2502     pCurDqLayer->pCbp[iMbXy] = 0;
2503   } else {
2504     if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
2505       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
2506       iBaseModeFlag = uiCode;
2507     } else {
2508       iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
2509     }
2510     if (!iBaseModeFlag) {
2511       iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
2512     } else {
2513       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
2514                iBaseModeFlag);
2515       return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
2516     }
2517     if (iRet) { //occur error when parsing, MUST STOP decoding
2518       return iRet;
2519     }
2520   }
2521   // check whether there is left bits to read next time in case multiple slices
2522   iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
2523   // sub 1, for stop bit
2524   if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
2525     uiEosFlag = 1;
2526   }
2527   if (iUsedBits > (pBs->iBits -
2528                    1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
2529     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
2530              "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
2531              (int64_t) iUsedBits, pBs->iBits);
2532     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
2533   }
2534   return ERR_NONE;
2535 }
2536 
WelsDecodeMbCavlcBSlice(PWelsDecoderContext pCtx,PNalUnit pNalCur,uint32_t & uiEosFlag)2537 int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
2538   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
2539   PBitStringAux pBs = pCurDqLayer->pBitStringAux;
2540   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
2541   PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
2542   PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
2543   PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
2544   intX_t iUsedBits;
2545   const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
2546   int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
2547   int32_t iBaseModeFlag, i;
2548   int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
2549   uint32_t uiCode;
2550 
2551   pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
2552   pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
2553 
2554   if (-1 == pSlice->iMbSkipRun) {
2555     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
2556     pSlice->iMbSkipRun = uiCode;
2557     if (-1 == pSlice->iMbSkipRun) {
2558       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
2559     }
2560     if ((uint32_t) (pSlice->iMbSkipRun) > (uint32_t) (pCurDqLayer->iMbWidth * pCurDqLayer->iMbHeight - iMbXy)) {
2561       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
2562     }
2563   }
2564   if (pSlice->iMbSkipRun--) {
2565     int16_t iMv[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
2566     int8_t  ref[LIST_A] = { 0 };
2567 
2568     pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
2569     ST32A4 (&pNzc[0], 0);
2570     ST32A4 (&pNzc[4], 0);
2571     ST32A4 (&pNzc[8], 0);
2572     ST32A4 (&pNzc[12], 0);
2573     ST32A4 (&pNzc[16], 0);
2574     ST32A4 (&pNzc[20], 0);
2575 
2576     pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
2577     memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
2578     memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
2579     bool bIsPending = GetThreadCount (pCtx) > 1;
2580     pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete
2581                             || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending));
2582 
2583     /*if (pCtx->bMbRefConcealed) {
2584       SLogContext* pLogCtx = & (pCtx->sLogCtx);
2585       WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
2586       return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
2587     }*/
2588     //predict iMv
2589     SubMbType subMbType;
2590     if (pSliceHeader->iDirectSpatialMvPredFlag) {
2591 
2592       //predict direct spatial mv
2593       int32_t ret = PredMvBDirectSpatial (pCtx, iMv, ref, subMbType);
2594       if (ret != ERR_NONE) {
2595         return ret;
2596       }
2597     } else {
2598       //temporal direct mode
2599       int32_t ret = PredBDirectTemporal (pCtx, iMv, ref, subMbType);
2600       if (ret != ERR_NONE) {
2601         return ret;
2602       }
2603     }
2604 
2605     //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
2606     //  memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
2607     //}
2608 
2609     //reset rS
2610     if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
2611         (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
2612       pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
2613       for (i = 0; i < 2; i++) {
2614         pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
2615                                            pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
2616       }
2617     }
2618 
2619     pCurDqLayer->pCbp[iMbXy] = 0;
2620   } else {
2621     if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
2622       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
2623       iBaseModeFlag = uiCode;
2624     } else {
2625       iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
2626     }
2627     if (!iBaseModeFlag) {
2628       iRet = WelsActualDecodeMbCavlcBSlice (pCtx);
2629     } else {
2630       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
2631                iBaseModeFlag);
2632       return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
2633     }
2634     if (iRet) { //occur error when parsing, MUST STOP decoding
2635       return iRet;
2636     }
2637   }
2638   // check whether there is left bits to read next time in case multiple slices
2639   iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
2640   // sub 1, for stop bit
2641   if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
2642     uiEosFlag = 1;
2643   }
2644   if (iUsedBits > (pBs->iBits -
2645                    1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
2646     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
2647              "WelsDecodeMbCavlcBSlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
2648              (int64_t)iUsedBits, pBs->iBits);
2649     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
2650   }
2651   return ERR_NONE;
2652 }
2653 
WelsActualDecodeMbCavlcBSlice(PWelsDecoderContext pCtx)2654 int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx) {
2655   SVlcTable* pVlcTable = pCtx->pVlcTable;
2656   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
2657   PBitStringAux pBs = pCurDqLayer->pBitStringAux;
2658   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
2659   PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
2660 
2661   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
2662   int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
2663 
2664   SWelsNeighAvail sNeighAvail;
2665   int32_t iMbX = pCurDqLayer->iMbX;
2666   int32_t iMbY = pCurDqLayer->iMbY;
2667   const int32_t iMbXy = pCurDqLayer->iMbXyIndex;
2668   int8_t* pNzc = pCurDqLayer->pNzc[iMbXy];
2669   int32_t i;
2670   int32_t iRet = ERR_NONE;
2671   uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
2672   uint32_t uiCode;
2673   int32_t iCode;
2674   int32_t iMbResProperty;
2675 
2676   GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer);
2677   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
2678   pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
2679   WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
2680   uiMbType = uiCode;
2681   if (uiMbType < 23) { //inter MB type
2682     int16_t iMotionVector[LIST_A][30][MV_A];
2683     int8_t  iRefIndex[LIST_A][30];
2684     pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
2685     WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer);
2686 
2687     if ((iRet = ParseInterBInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
2688       return iRet;//abnormal
2689     }
2690 
2691     if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
2692       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
2693       pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode;
2694     } else {
2695       pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
2696     }
2697 
2698     if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) {
2699       pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
2700     } else {
2701       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
2702       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
2703     }
2704   } else { //intra MB type
2705     uiMbType -= 23;
2706     if (uiMbType > 25)
2707       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
2708     if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
2709       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
2710 
2711     if (25 == uiMbType) {
2712       WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!");
2713       int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0];
2714       int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1];
2715 
2716       int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
2717       int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
2718 
2719       uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL;
2720       uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC;
2721       uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC;
2722 
2723       uint8_t* pTmpBsBuf;
2724 
2725       int32_t i;
2726       int32_t iCopySizeY = (sizeof (uint8_t) << 4);
2727       int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
2728 
2729       int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
2730 
2731       pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
2732 
2733       //step 1: locating bit-stream pointer [must align into integer byte]
2734       pBs->pCurBuf -= iIndex;
2735 
2736       //step 2: copy pixel from bit-stream into fdec [reconstruction]
2737       pTmpBsBuf = pBs->pCurBuf;
2738       if (!pCtx->pParam->bParseOnly) {
2739         for (i = 0; i < 16; i++) { //luma
2740           memcpy (pDecY, pTmpBsBuf, iCopySizeY);
2741           pDecY += iDecStrideL;
2742           pTmpBsBuf += 16;
2743         }
2744 
2745         for (i = 0; i < 8; i++) { //cb
2746           memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
2747           pDecU += iDecStrideC;
2748           pTmpBsBuf += 8;
2749         }
2750         for (i = 0; i < 8; i++) { //cr
2751           memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
2752           pDecV += iDecStrideC;
2753           pTmpBsBuf += 8;
2754         }
2755       }
2756 
2757       pBs->pCurBuf += 384;
2758 
2759       //step 3: update QP and pNonZeroCount
2760       pCurDqLayer->pLumaQp[iMbXy] = 0;
2761       pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0;
2762       //Rec. 9.2.1 for PCM, nzc=16
2763       ST32A4 (&pNzc[0], 0x10101010);
2764       ST32A4 (&pNzc[4], 0x10101010);
2765       ST32A4 (&pNzc[8], 0x10101010);
2766       ST32A4 (&pNzc[12], 0x10101010);
2767       ST32A4 (&pNzc[16], 0x10101010);
2768       ST32A4 (&pNzc[20], 0x10101010);
2769       WELS_READ_VERIFY (InitReadBits (pBs, 0));
2770       return ERR_NONE;
2771     } else {
2772       if (0 == uiMbType) {
2773         ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
2774         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
2775         if (pCtx->pPps->bTransform8x8ModeFlag) {
2776           WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
2777           pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
2778           if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
2779             uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
2780           }
2781         }
2782         if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
2783           pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
2784           WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
2785         } else {
2786           pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer);
2787           WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer));
2788         }
2789       } else { //I_PCM exclude, we can ignore it
2790         pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
2791         pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false;
2792         pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
2793         pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
2794         pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
2795         uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0;
2796         uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
2797         WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer);
2798         if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) {
2799           return iRet;
2800         }
2801       }
2802     }
2803   }
2804 
2805   if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) {
2806     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
2807     uiCbp = uiCode;
2808     {
2809       if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
2810         return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
2811       if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
2812         return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
2813       if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) {
2814 
2815         uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
2816       } else //inter
2817         uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
2818     }
2819 
2820     pCurDqLayer->pCbp[iMbXy] = uiCbp;
2821     uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4;
2822     uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15;
2823 
2824     // Need modification when B picutre add in
2825     bool bNeedParseTransformSize8x8Flag =
2826       (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16)
2827         || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
2828        && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
2829        && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
2830        && (uiCbpL > 0)
2831        && (pCtx->pPps->bTransform8x8ModeFlag));
2832 
2833     if (bNeedParseTransformSize8x8Flag) {
2834       WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
2835       pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
2836     }
2837   }
2838 
2839   ST32A4 (&pNzc[0], 0);
2840   ST32A4 (&pNzc[4], 0);
2841   ST32A4 (&pNzc[8], 0);
2842   ST32A4 (&pNzc[12], 0);
2843   ST32A4 (&pNzc[16], 0);
2844   ST32A4 (&pNzc[20], 0);
2845   if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])
2846       && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) {
2847     pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
2848     for (i = 0; i < 2; i++) {
2849       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] +
2850                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
2851     }
2852   }
2853 
2854   if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
2855     int32_t iQpDelta, iId8x8, iId4x4;
2856     memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
2857     WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
2858     iQpDelta = iCode;
2859 
2860     if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
2861       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
2862     }
2863 
2864     pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
2865     pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy];
2866     for (i = 0; i < 2; i++) {
2867       pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
2868                                          pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
2869                                          51)];
2870     }
2871 
2872     BsStartCavlc (pBs);
2873 
2874     if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) {
2875       //step1: Luma DC
2876       if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
2877                                           pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2878         return iRet;//abnormal
2879       }
2880       //step2: Luma AC
2881       if (uiCbpL) {
2882         for (i = 0; i < 16; i++) {
2883           if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
2884                                               g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4),
2885                                               pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2886             return iRet;//abnormal
2887           }
2888         }
2889         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2890         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2891         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2892         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2893       }
2894     } else { //non-MB_TYPE_INTRA16x16
2895       if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
2896         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
2897           iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
2898           if (uiCbpL & (1 << iId8x8)) {
2899             int32_t iIndex = (iId8x8 << 2);
2900             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2901               if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
2902                                                      g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
2903                                                      pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2904                 return iRet;
2905               }
2906               iIndex++;
2907             }
2908           } else {
2909             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
2910             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
2911           }
2912         }
2913         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2914         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2915         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2916         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2917       } else { // Normal T4x4
2918         for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
2919           iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
2920           if (uiCbpL & (1 << iId8x8)) {
2921             int32_t iIndex = (iId8x8 << 2);
2922             for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2923               //Luma (DC and AC decoding together)
2924               if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
2925                                                   g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
2926                                                   pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
2927                 return iRet;//abnormal
2928               }
2929               iIndex++;
2930             }
2931           } else {
2932             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
2933             ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
2934           }
2935         }
2936         ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
2937         ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
2938         ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
2939         ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
2940       }
2941     }
2942 
2943 
2944     //chroma
2945     //step1: DC
2946     if (1 == uiCbpC || 2 == uiCbpC) {
2947       for (i = 0; i < 2; i++) { //Cb Cr
2948         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
2949           iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
2950         else
2951           iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
2952 
2953         if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
2954                                             pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
2955           return iRet;//abnormal
2956         }
2957       }
2958     } else {
2959     }
2960     //step2: AC
2961     if (2 == uiCbpC) {
2962       for (i = 0; i < 2; i++) { //Cb Cr
2963         if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]))
2964           iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
2965         else
2966           iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
2967 
2968         int32_t iIndex = 16 + (i << 2);
2969         for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
2970           if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
2971                                               1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
2972                                               pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
2973                                               pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
2974             return iRet;//abnormal
2975           }
2976           iIndex++;
2977         }
2978       }
2979       ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
2980       ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
2981       ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
2982       ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
2983     }
2984     BsEndCavlc (pBs);
2985   }
2986 
2987   return ERR_NONE;
2988 }
2989 
WelsBlockFuncInit(SBlockFunc * pFunc,int32_t iCpu)2990 void WelsBlockFuncInit (SBlockFunc*    pFunc,  int32_t iCpu) {
2991   pFunc->pWelsSetNonZeroCountFunc   = WelsNonZeroCount_c;
2992   pFunc->pWelsBlockZero16x16Func    = WelsBlockZero16x16_c;
2993   pFunc->pWelsBlockZero8x8Func      = WelsBlockZero8x8_c;
2994 
2995 #ifdef HAVE_NEON
2996   if (iCpu & WELS_CPU_NEON) {
2997     pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon;
2998     pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_neon;
2999     pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_neon;
3000   }
3001 #endif
3002 
3003 #ifdef HAVE_NEON_AARCH64
3004   if (iCpu & WELS_CPU_NEON) {
3005     pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon;
3006     pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_AArch64_neon;
3007     pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_AArch64_neon;
3008   }
3009 #endif
3010 
3011 #if defined(X86_ASM)
3012   if (iCpu & WELS_CPU_SSE2) {
3013     pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2;
3014     pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_sse2;
3015     pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_sse2;
3016   }
3017 #endif
3018 
3019 }
3020 
WelsBlockInit(int16_t * pBlock,int iW,int iH,int iStride,uint8_t uiVal)3021 void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
3022   int32_t i;
3023   int16_t* pDst = pBlock;
3024 
3025   for (i = 0; i < iH; i++) {
3026     memset (pDst, uiVal, iW * sizeof (int16_t));
3027     pDst += iStride;
3028   }
3029 }
WelsBlockZero16x16_c(int16_t * pBlock,int32_t iStride)3030 void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
3031   WelsBlockInit (pBlock, 16, 16, iStride, 0);
3032 }
3033 
WelsBlockZero8x8_c(int16_t * pBlock,int32_t iStride)3034 void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
3035   WelsBlockInit (pBlock, 8, 8, iStride, 0);
3036 }
3037 
3038 // Compute the temporal-direct scaling factor that's common
3039 // to all direct MBs in this slice, as per clause 8.4.1.2.3
3040 // of T-REC H.264 201704
ComputeColocatedTemporalScaling(PWelsDecoderContext pCtx)3041 bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx) {
3042   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
3043   PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
3044   PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
3045   if (!pSliceHeader->iDirectSpatialMvPredFlag) {
3046     uint32_t uiRefCount = pSliceHeader->uiRefCount[LIST_0];
3047     if (pCtx->sRefPic.pRefList[LIST_1][0] != NULL) {
3048       for (uint32_t i = 0; i < uiRefCount; ++i) {
3049         if (pCtx->sRefPic.pRefList[LIST_0][i] != NULL) {
3050           const int32_t poc0 = pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc;
3051           const int32_t poc1 = pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc;
3052           const int32_t poc = pSliceHeader->iPicOrderCntLsb;
3053           const int32_t td = WELS_CLIP3 (poc1 - poc0, -128, 127);
3054           if (td == 0) {
3055             pCurSlice->iMvScale[LIST_0][i] = 1 << 8;
3056           } else {
3057             int32_t tb = WELS_CLIP3 (poc - poc0, -128, 127);
3058             int32_t tx = (16384 + (abs (td) >> 1)) / td;
3059             pCurSlice->iMvScale[LIST_0][i] = WELS_CLIP3 ((tb * tx + 32) >> 6, -1024, 1023);
3060           }
3061         }
3062       }
3063     }
3064   }
3065   return true;
3066 }
3067 } // namespace WelsDec
3068