• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    mv_pred.c
33  *
34  * \brief   Get MV predictor and update motion vector of mb cache
35  *
36  * \date    05/22/2009 Created
37  *
38  *************************************************************************************
39  */
40 
41 #include "mv_pred.h"
42 #include "ls_defines.h"
43 #include "mb_cache.h"
44 #include "parse_mb_syn_cabac.h"
45 
46 namespace WelsDec {
47 
SetRectBlock(void * vp,int32_t w,const int32_t h,int32_t stride,const uint32_t val,const int32_t size)48 static inline  void SetRectBlock (void* vp, int32_t w, const int32_t h, int32_t stride, const uint32_t val,
49                                   const int32_t size) {
50   uint8_t* p = (uint8_t*)vp;
51   w *= size;
52   if (w == 1 && h == 4) {
53     * (uint8_t*) (p + 0 * stride) =
54       * (uint8_t*) (p + 1 * stride) =
55         * (uint8_t*) (p + 2 * stride) =
56           * (uint8_t*) (p + 3 * stride) = (uint8_t)val;
57   } else if (w == 2 && h == 2) {
58     * (uint16_t*) (p + 0 * stride) =
59       * (uint16_t*) (p + 1 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U);
60   } else if (w == 2 && h == 4) {
61     * (uint16_t*) (p + 0 * stride) =
62       * (uint16_t*) (p + 1 * stride) =
63         * (uint16_t*) (p + 2 * stride) =
64           * (uint16_t*) (p + 3 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U);
65   } else if (w == 4 && h == 2) {
66     * (uint32_t*) (p + 0 * stride) =
67       * (uint32_t*) (p + 1 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
68   } else if (w == 4 && h == 4) {
69     * (uint32_t*) (p + 0 * stride) =
70       * (uint32_t*) (p + 1 * stride) =
71         * (uint32_t*) (p + 2 * stride) =
72           * (uint32_t*) (p + 3 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
73   } else if (w == 8 && h == 1) {
74     * (uint32_t*) (p + 0 * stride) =
75       * (uint32_t*) (p + 0 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
76   } else if (w == 8 && h == 2) {
77     * (uint32_t*) (p + 0 * stride) =
78       * (uint32_t*) (p + 0 * stride + 4) =
79         * (uint32_t*) (p + 1 * stride) =
80           * (uint32_t*) (p + 1 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
81   } else if (w == 8 && h == 4) {
82     * (uint32_t*) (p + 0 * stride) =
83       * (uint32_t*) (p + 0 * stride + 4) =
84         * (uint32_t*) (p + 1 * stride) =
85           * (uint32_t*) (p + 1 * stride + 4) =
86             * (uint32_t*) (p + 2 * stride) =
87               * (uint32_t*) (p + 2 * stride + 4) =
88                 * (uint32_t*) (p + 3 * stride) =
89                   * (uint32_t*) (p + 3 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
90   } else if (w == 16 && h == 2) {
91     * (uint32_t*) (p + 0 * stride + 0) =
92       * (uint32_t*) (p + 0 * stride + 4) =
93         * (uint32_t*) (p + 0 * stride + 8) =
94           * (uint32_t*) (p + 0 * stride + 12) =
95             * (uint32_t*) (p + 1 * stride + 0) =
96               * (uint32_t*) (p + 1 * stride + 4) =
97                 * (uint32_t*) (p + 1 * stride + 8) =
98                   * (uint32_t*) (p + 1 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
99   } else if (w == 16 && h == 3) {
100     * (uint32_t*) (p + 0 * stride + 0) =
101       * (uint32_t*) (p + 0 * stride + 4) =
102         * (uint32_t*) (p + 0 * stride + 8) =
103           * (uint32_t*) (p + 0 * stride + 12) =
104             * (uint32_t*) (p + 1 * stride + 0) =
105               * (uint32_t*) (p + 1 * stride + 4) =
106                 * (uint32_t*) (p + 1 * stride + 8) =
107                   * (uint32_t*) (p + 1 * stride + 12) =
108                     * (uint32_t*) (p + 2 * stride + 0) =
109                       * (uint32_t*) (p + 2 * stride + 4) =
110                         * (uint32_t*) (p + 2 * stride + 8) =
111                           * (uint32_t*) (p + 2 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
112   } else if (w == 16 && h == 4) {
113     * (uint32_t*) (p + 0 * stride + 0) =
114       * (uint32_t*) (p + 0 * stride + 4) =
115         * (uint32_t*) (p + 0 * stride + 8) =
116           * (uint32_t*) (p + 0 * stride + 12) =
117             * (uint32_t*) (p + 1 * stride + 0) =
118               * (uint32_t*) (p + 1 * stride + 4) =
119                 * (uint32_t*) (p + 1 * stride + 8) =
120                   * (uint32_t*) (p + 1 * stride + 12) =
121                     * (uint32_t*) (p + 2 * stride + 0) =
122                       * (uint32_t*) (p + 2 * stride + 4) =
123                         * (uint32_t*) (p + 2 * stride + 8) =
124                           * (uint32_t*) (p + 2 * stride + 12) =
125                             * (uint32_t*) (p + 3 * stride + 0) =
126                               * (uint32_t*) (p + 3 * stride + 4) =
127                                 * (uint32_t*) (p + 3 * stride + 8) =
128                                   * (uint32_t*) (p + 3 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
129   }
130 }
CopyRectBlock4Cols(void * vdst,void * vsrc,const int32_t stride_dst,const int32_t stride_src,int32_t w,const int32_t size)131 void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const int32_t stride_src, int32_t w,
132                          const int32_t size) {
133   uint8_t* dst = (uint8_t*)vdst;
134   uint8_t* src = (uint8_t*)vsrc;
135   w *= size;
136   if (w == 1) {
137     dst[stride_dst * 0] = src[stride_src * 0];
138     dst[stride_dst * 1] = src[stride_src * 1];
139     dst[stride_dst * 2] = src[stride_src * 2];
140     dst[stride_dst * 3] = src[stride_src * 3];
141   } else if (w == 2) {
142     * (uint16_t*) (&dst[stride_dst * 0]) = * (uint16_t*) (&src[stride_src * 0]);
143     * (uint16_t*) (&dst[stride_dst * 1]) = * (uint16_t*) (&src[stride_src * 1]);
144     * (uint16_t*) (&dst[stride_dst * 2]) = * (uint16_t*) (&src[stride_src * 2]);
145     * (uint16_t*) (&dst[stride_dst * 3]) = * (uint16_t*) (&src[stride_src * 3]);
146   } else if (w == 4) {
147     * (uint32_t*) (&dst[stride_dst * 0]) = * (uint32_t*) (&src[stride_src * 0]);
148     * (uint32_t*) (&dst[stride_dst * 1]) = * (uint32_t*) (&src[stride_src * 1]);
149     * (uint32_t*) (&dst[stride_dst * 2]) = * (uint32_t*) (&src[stride_src * 2]);
150     * (uint32_t*) (&dst[stride_dst * 3]) = * (uint32_t*) (&src[stride_src * 3]);
151   } else if (w == 16) {
152     memcpy (&dst[stride_dst * 0], &src[stride_src * 0], 16);
153     memcpy (&dst[stride_dst * 1], &src[stride_src * 1], 16);
154     memcpy (&dst[stride_dst * 2], &src[stride_src * 2], 16);
155     memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16);
156   }
157 }
PredPSkipMvFromNeighbor(PDqLayer pCurDqLayer,int16_t iMvp[2])158 void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]) {
159   bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
160 
161   int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
162   int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
163   int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
164 
165   int8_t iLeftRef;
166   int8_t iTopRef;
167   int8_t iRightTopRef;
168   int8_t iLeftTopRef;
169   int8_t iDiagonalRef;
170   int8_t iMatchRef;
171   int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2];
172 
173   iCurXy = pCurDqLayer->iMbXyIndex;
174   iCurX  = pCurDqLayer->iMbX;
175   iCurY  = pCurDqLayer->iMbY;
176   iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
177 
178   if (iCurX != 0) {
179     iLeftXy = iCurXy - 1;
180     iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
181     bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
182   } else {
183     bLeftAvail = 0;
184     bLeftTopAvail = 0;
185   }
186 
187   if (iCurY != 0) {
188     iTopXy = iCurXy - pCurDqLayer->iMbWidth;
189     iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
190     bTopAvail = (iTopSliceIdc == iCurSliceIdc);
191     if (iCurX != 0) {
192       iLeftTopXy = iTopXy - 1;
193       iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
194       bLeftTopAvail = (iLeftTopSliceIdc  == iCurSliceIdc);
195     } else {
196       bLeftTopAvail = 0;
197     }
198     if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
199       iRightTopXy = iTopXy + 1;
200       iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
201       bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
202     } else {
203       bRightTopAvail = 0;
204     }
205   } else {
206     bTopAvail = 0;
207     bLeftTopAvail = 0;
208     bRightTopAvail = 0;
209   }
210 
211   iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
212   iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
213   iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
214                   ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
215   iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
216                    ? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
217 
218   /*get neb mv&iRefIdxArray*/
219   /*left*/
220   if (bLeftAvail && IS_INTER (iLeftType)) {
221     ST32 (iMvA, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftXy][3] : pCurDqLayer->pMv[0][iLeftXy][3]));
222     iLeftRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftXy][3] : pCurDqLayer->pRefIndex[0][iLeftXy][3];
223   } else {
224     ST32 (iMvA, 0);
225     if (0 == bLeftAvail) { //not available
226       iLeftRef = REF_NOT_AVAIL;
227     } else { //available but is intra mb type
228       iLeftRef = REF_NOT_IN_LIST;
229     }
230   }
231   if (REF_NOT_AVAIL == iLeftRef ||
232       (0 == iLeftRef && 0 == * (int32_t*)iMvA)) {
233     ST32 (iMvp, 0);
234     return;
235   }
236 
237   /*top*/
238   if (bTopAvail && IS_INTER (iTopType)) {
239     ST32 (iMvB, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iTopXy][12] : pCurDqLayer->pMv[0][iTopXy][12]));
240     iTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iTopXy][12] : pCurDqLayer->pRefIndex[0][iTopXy][12];
241   } else {
242     ST32 (iMvB, 0);
243     if (0 == bTopAvail) { //not available
244       iTopRef = REF_NOT_AVAIL;
245     } else { //available but is intra mb type
246       iTopRef = REF_NOT_IN_LIST;
247     }
248   }
249   if (REF_NOT_AVAIL == iTopRef ||
250       (0 == iTopRef  && 0 == * (int32_t*)iMvB)) {
251     ST32 (iMvp, 0);
252     return;
253   }
254 
255   /*right_top*/
256   if (bRightTopAvail && IS_INTER (iRightTopType)) {
257     ST32 (iMvC, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iRightTopXy][12] :
258                       pCurDqLayer->pMv[0][iRightTopXy][12]));
259     iRightTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iRightTopXy][12] :
260                    pCurDqLayer->pRefIndex[0][iRightTopXy][12];
261   } else {
262     ST32 (iMvC, 0);
263     if (0 == bRightTopAvail) { //not available
264       iRightTopRef = REF_NOT_AVAIL;
265     } else { //available but is intra mb type
266       iRightTopRef = REF_NOT_IN_LIST;
267     }
268   }
269 
270   /*left_top*/
271   if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
272     ST32 (iMvD, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftTopXy][15] : pCurDqLayer->pMv[0][iLeftTopXy][15]));
273     iLeftTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftTopXy][15] :
274                   pCurDqLayer->pRefIndex[0][iLeftTopXy][15];
275   } else {
276     ST32 (iMvD, 0);
277     if (0 == bLeftTopAvail) { //not available
278       iLeftTopRef = REF_NOT_AVAIL;
279     } else { //available but is intra mb type
280       iLeftTopRef = REF_NOT_IN_LIST;
281     }
282   }
283 
284   iDiagonalRef = iRightTopRef;
285   if (REF_NOT_AVAIL == iDiagonalRef) {
286     iDiagonalRef = iLeftTopRef;
287     * (int32_t*)iMvC = * (int32_t*)iMvD;
288   }
289 
290   if (REF_NOT_AVAIL == iTopRef && REF_NOT_AVAIL == iDiagonalRef && iLeftRef >= REF_NOT_IN_LIST) {
291     ST32 (iMvp, LD32 (iMvA));
292     return;
293   }
294 
295   iMatchRef = (0 == iLeftRef) + (0 == iTopRef) + (0 == iDiagonalRef);
296   if (1 == iMatchRef) {
297     if (0 == iLeftRef) {
298       ST32 (iMvp, LD32 (iMvA));
299     } else if (0 == iTopRef) {
300       ST32 (iMvp, LD32 (iMvB));
301     } else {
302       ST32 (iMvp, LD32 (iMvC));
303     }
304   } else {
305     iMvp[0] = WelsMedian (iMvA[0], iMvB[0], iMvC[0]);
306     iMvp[1] = WelsMedian (iMvA[1], iMvB[1], iMvC[1]);
307   }
308 }
309 
GetColocatedMb(PWelsDecoderContext pCtx,MbType & mbType,SubMbType & subMbType)310 int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) {
311   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
312   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
313 
314   uint32_t is8x8 = IS_Inter_8x8 (GetMbType (pCurDqLayer)[iMbXy]);
315   mbType = GetMbType (pCurDqLayer)[iMbXy];
316 
317   PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0];
318   if (GetThreadCount (pCtx) > 1) {
319     if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) {
320       if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) {
321         WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE);
322       }
323       pCtx->lastReadyHeightOffset[1][0] = 16 * pCurDqLayer->iMbY;
324     }
325   }
326 
327   if (colocPic == NULL) {
328     SLogContext* pLogCtx = & (pCtx->sLogCtx);
329     WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
330     return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
331   }
332 
333   MbType coloc_mbType = colocPic->pMbType[iMbXy];
334   if (coloc_mbType == MB_TYPE_SKIP) {
335     //This indicates the colocated MB is P SKIP MB
336     coloc_mbType |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0;
337   }
338   if (IS_Inter_8x8 (coloc_mbType) && !pCtx->pSps->bDirect8x8InferenceFlag) {
339     subMbType = SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
340     mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
341   } else if (!is8x8 && (IS_INTER_16x16 (coloc_mbType) || IS_INTRA (coloc_mbType)/* || IS_SKIP(coloc_mbType)*/)) {
342     subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
343     mbType |= MB_TYPE_16x16 | MB_TYPE_L0 | MB_TYPE_L1;
344   } else {
345     subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
346     mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
347   }
348 
349   if (IS_INTRA (coloc_mbType)) {
350     SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
351     return ERR_NONE;
352   }
353   SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
354 
355   if (IS_INTER_16x16 (mbType)) {
356     int16_t iMVZero[2] = { 0 };
357     int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero;
358     ST32 (pCurDqLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
359     ST32 (pCurDqLayer->iColocMv[LIST_1][0], LD32 (pMv));
360     pCurDqLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
361     pCurDqLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
362         REF_NOT_IN_LIST;
363   } else {
364     if (!pCtx->pSps->bDirect8x8InferenceFlag) {
365       CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
366       CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
367       if (IS_TYPE_L1 (coloc_mbType)) {
368         CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
369         CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
370       } else { // only forward prediction
371         SetRectBlock (pCurDqLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
372       }
373     } else {
374       for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) {
375         SetRectBlock (pCurDqLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
376         SetRectBlock (pCurDqLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
377         SetRectBlock (pCurDqLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
378         SetRectBlock (pCurDqLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
379 
380         SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
381         SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
382         SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
383         SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
384       }
385       if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction
386         SetRectBlock (&pCurDqLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
387     }
388   }
389   return ERR_NONE;
390 }
391 
PredMvBDirectSpatial(PWelsDecoderContext pCtx,int16_t iMvp[LIST_A][2],int8_t ref[LIST_A],SubMbType & subMbType)392 int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
393                               SubMbType& subMbType) {
394 
395   int32_t ret = ERR_NONE;
396   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
397   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
398   bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
399 
400   MbType mbType;
401   ret = GetColocatedMb (pCtx, mbType, subMbType);
402   if (ret != ERR_NONE) {
403     return ret;
404   }
405 
406   bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
407   int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
408   int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
409   int32_t iCurX, iCurY, iCurXy, iLeftXy = 0, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
410 
411   int8_t iLeftRef[LIST_A];
412   int8_t iTopRef[LIST_A];
413   int8_t iRightTopRef[LIST_A];
414   int8_t iLeftTopRef[LIST_A];
415   int8_t iDiagonalRef[LIST_A];
416   int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2];
417 
418   iCurXy = pCurDqLayer->iMbXyIndex;
419 
420   iCurX = pCurDqLayer->iMbX;
421   iCurY = pCurDqLayer->iMbY;
422   iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
423 
424   if (iCurX != 0) {
425     iLeftXy = iCurXy - 1;
426     iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
427     bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
428   } else {
429     bLeftAvail = 0;
430     bLeftTopAvail = 0;
431   }
432 
433   if (iCurY != 0) {
434     iTopXy = iCurXy - pCurDqLayer->iMbWidth;
435     iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
436     bTopAvail = (iTopSliceIdc == iCurSliceIdc);
437     if (iCurX != 0) {
438       iLeftTopXy = iTopXy - 1;
439       iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
440       bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
441     } else {
442       bLeftTopAvail = 0;
443     }
444     if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
445       iRightTopXy = iTopXy + 1;
446       iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
447       bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
448     } else {
449       bRightTopAvail = 0;
450     }
451   } else {
452     bTopAvail = 0;
453     bLeftTopAvail = 0;
454     bRightTopAvail = 0;
455   }
456 
457   iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
458   iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
459   iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
460                   ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
461   iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
462                    ? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
463 
464   /*get neb mv&iRefIdxArray*/
465   for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
466 
467     /*left*/
468     if (bLeftAvail && IS_INTER (iLeftType)) {
469       ST32 (iMvA[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3] :
470                                  pCurDqLayer->pMv[listIdx][iLeftXy][3]));
471       iLeftRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3] :
472                           pCurDqLayer->pRefIndex[listIdx][iLeftXy][3];
473     } else {
474       ST32 (iMvA[listIdx], 0);
475       if (0 == bLeftAvail) { //not available
476         iLeftRef[listIdx] = REF_NOT_AVAIL;
477       } else { //available but is intra mb type
478         iLeftRef[listIdx] = REF_NOT_IN_LIST;
479       }
480     }
481 
482     /*top*/
483     if (bTopAvail && IS_INTER (iTopType)) {
484       ST32 (iMvB[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iTopXy][12] :
485                                  pCurDqLayer->pMv[listIdx][iTopXy][12]));
486       iTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12] :
487                          pCurDqLayer->pRefIndex[listIdx][iTopXy][12];
488     } else {
489       ST32 (iMvB[listIdx], 0);
490       if (0 == bTopAvail) { //not available
491         iTopRef[listIdx] = REF_NOT_AVAIL;
492       } else { //available but is intra mb type
493         iTopRef[listIdx] = REF_NOT_IN_LIST;
494       }
495     }
496 
497     /*right_top*/
498     if (bRightTopAvail && IS_INTER (iRightTopType)) {
499       ST32 (iMvC[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12] :
500                                  pCurDqLayer->pMv[listIdx][iRightTopXy][12]));
501       iRightTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12] :
502                               pCurDqLayer->pRefIndex[listIdx][iRightTopXy][12];
503     } else {
504       ST32 (iMvC[listIdx], 0);
505       if (0 == bRightTopAvail) { //not available
506         iRightTopRef[listIdx] = REF_NOT_AVAIL;
507       } else { //available but is intra mb type
508         iRightTopRef[listIdx] = REF_NOT_IN_LIST;
509       }
510     }
511     /*left_top*/
512     if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
513       ST32 (iMvD[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15] :
514                                  pCurDqLayer->pMv[listIdx][iLeftTopXy][15]));
515       iLeftTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15] :
516                              pCurDqLayer->pRefIndex[listIdx][iLeftTopXy][15];
517     } else {
518       ST32 (iMvD[listIdx], 0);
519       if (0 == bLeftTopAvail) { //not available
520         iLeftTopRef[listIdx] = REF_NOT_AVAIL;
521       } else { //available but is intra mb type
522         iLeftTopRef[listIdx] = REF_NOT_IN_LIST;
523       }
524     }
525 
526     iDiagonalRef[listIdx] = iRightTopRef[listIdx];
527     if (REF_NOT_AVAIL == iDiagonalRef[listIdx]) {
528       iDiagonalRef[listIdx] = iLeftTopRef[listIdx];
529       ST32 (iMvC[listIdx], LD32 (iMvD[listIdx]));
530     }
531 
532     int8_t ref_temp = WELS_MIN_POSITIVE (iTopRef[listIdx], iDiagonalRef[listIdx]);
533     ref[listIdx] = WELS_MIN_POSITIVE (iLeftRef[listIdx], ref_temp);
534     if (ref[listIdx] >= 0) {
535 
536       uint32_t match_count = (iLeftRef[listIdx] == ref[listIdx]) + (iTopRef[listIdx] == ref[listIdx]) +
537                              (iDiagonalRef[listIdx] == ref[listIdx]);
538       if (match_count == 1) {
539         if (iLeftRef[listIdx] == ref[listIdx]) {
540           ST32 (iMvp[listIdx], LD32 (iMvA[listIdx]));
541         } else if (iTopRef[listIdx] == ref[listIdx]) {
542           ST32 (iMvp[listIdx], LD32 (iMvB[listIdx]));
543         } else {
544           ST32 (iMvp[listIdx], LD32 (iMvC[listIdx]));
545         }
546       } else {
547         iMvp[listIdx][0] = WelsMedian (iMvA[listIdx][0], iMvB[listIdx][0], iMvC[listIdx][0]);
548         iMvp[listIdx][1] = WelsMedian (iMvA[listIdx][1], iMvB[listIdx][1], iMvC[listIdx][1]);
549       }
550     } else {
551       iMvp[listIdx][0] = 0;
552       iMvp[listIdx][1] = 0;
553       ref[listIdx] = REF_NOT_IN_LIST;
554     }
555   }
556   if (ref[LIST_0] <= REF_NOT_IN_LIST && ref[LIST_1] <= REF_NOT_IN_LIST) {
557     ref[LIST_0] = ref[LIST_1] = 0;
558   } else if (ref[LIST_1] < 0) {
559     mbType &= ~MB_TYPE_L1;
560     subMbType &= ~MB_TYPE_L1;
561   } else if (ref[LIST_0] < 0) {
562     mbType &= ~MB_TYPE_L0;
563     subMbType &= ~MB_TYPE_L0;
564   }
565   GetMbType (pCurDqLayer)[iMbXy] = mbType;
566 
567   int16_t pMvd[4] = { 0 };
568 
569   bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
570 
571   if (IS_INTER_16x16 (mbType)) {
572     if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
573       if (0 == pCurDqLayer->iColocIntra[0] && !bIsLongRef
574           && ((pCurDqLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][0] + 1) <= 2
575                && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
576               || (pCurDqLayer->iColocRefIndex[LIST_0][0] < 0 && pCurDqLayer->iColocRefIndex[LIST_1][0] == 0
577                   && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][0] + 1) <= 2
578                   && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
579         if (0 >= ref[0])  * (uint32_t*)iMvp[LIST_0] = 0;
580         if (0 >= ref[1])  * (uint32_t*)iMvp[LIST_1] = 0;
581       }
582     }
583     UpdateP16x16DirectCabac (pCurDqLayer);
584     for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
585       UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref[listIdx], iMvp[listIdx]);
586       UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx);
587     }
588   } else {
589     if (bSkipOrDirect) {
590       int8_t pSubPartCount[4], pPartW[4];
591       for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
592         int16_t iIdx8 = i << 2;
593         pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
594         int8_t pRefIndex[LIST_A][30];
595         UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
596         UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
597         UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
598 
599         pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
600         pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
601 
602         if (IS_SUB_4x4 (subMbType)) {
603           pSubPartCount[i] = 4;
604           pPartW[i] = 1;
605         }
606         FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL);
607       }
608     }
609   }
610   return ret;
611 }
612 
PredBDirectTemporal(PWelsDecoderContext pCtx,int16_t iMvp[LIST_A][2],int8_t ref[LIST_A],SubMbType & subMbType)613 int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
614                              SubMbType& subMbType) {
615   int32_t ret = ERR_NONE;
616   PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
617   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
618   bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
619 
620   MbType mbType;
621   ret = GetColocatedMb (pCtx, mbType, subMbType);
622   if (ret != ERR_NONE) {
623     return ret;
624   }
625 
626   GetMbType (pCurDqLayer)[iMbXy] = mbType;
627 
628   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
629   PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
630   int16_t pMvd[4] = { 0 };
631   const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]);
632   if (IS_INTER_16x16 (mbType)) {
633     ref[LIST_0] = 0;
634     ref[LIST_1] = 0;
635     UpdateP16x16DirectCabac (pCurDqLayer);
636     UpdateP16x16RefIdx (pCurDqLayer, LIST_1, ref[LIST_1]);
637     ST64 (iMvp,  0);
638     if (pCurDqLayer->iColocIntra[0]) {
639       UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
640       UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
641       UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
642     } else {
643       ref[LIST_0] = 0;
644       int16_t* mv = pCurDqLayer->iColocMv[LIST_0][0];
645       int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][0];
646       if (colocRefIndexL0 >= 0) {
647         ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
648       } else {
649         mv = pCurDqLayer->iColocMv[LIST_1][0];
650       }
651       UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
652 
653       iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
654       iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
655       UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
656       iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0];
657       iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1];
658       UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
659     }
660     UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
661     UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_1);
662   } else {
663     if (bSkipOrDirect) {
664       int8_t pSubPartCount[4], pPartW[4];
665       int8_t pRefIndex[LIST_A][30];
666       for (int32_t i = 0; i < 4; i++) {
667         int16_t iIdx8 = i << 2;
668         const uint8_t iScan4Idx = g_kuiScan4[iIdx8];
669         pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
670 
671         int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
672 
673         ref[LIST_1] = 0;
674         UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
675         if (pCurDqLayer->iColocIntra[iScan4Idx]) {
676           ref[LIST_0] = 0;
677           UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
678           ST64 (iMvp, 0);
679         } else {
680           ref[LIST_0] = 0;
681           int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][iScan4Idx];
682           if (colocRefIndexL0 >= 0) {
683             ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
684           } else {
685             mvColoc = pCurDqLayer->iColocMv[LIST_1];
686           }
687           UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
688         }
689         UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
690 
691         pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
692         pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
693 
694         if (IS_SUB_4x4 (subMbType)) {
695           pSubPartCount[i] = 4;
696           pPartW[i] = 1;
697         }
698         FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL);
699       }
700     }
701   }
702   return ret;
703 }
704 
705 //basic iMVs prediction unit for iMVs partition width (4, 2, 1)
PredMv(int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int32_t iPartWidth,int8_t iRef,int16_t iMVP[2])706 void PredMv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
707              int32_t listIdx, int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]) {
708   const uint8_t kuiLeftIdx      = g_kuiCache30ScanIdx[iPartIdx] - 1;
709   const uint8_t kuiTopIdx       = g_kuiCache30ScanIdx[iPartIdx] - 6;
710   const uint8_t kuiRightTopIdx  = kuiTopIdx + iPartWidth;
711   const uint8_t kuiLeftTopIdx   = kuiTopIdx - 1;
712 
713   const int8_t kiLeftRef      = iRefIndex[listIdx][kuiLeftIdx];
714   const int8_t kiTopRef       = iRefIndex[listIdx][ kuiTopIdx];
715   const int8_t kiRightTopRef  = iRefIndex[listIdx][kuiRightTopIdx];
716   const int8_t kiLeftTopRef   = iRefIndex[listIdx][ kuiLeftTopIdx];
717   int8_t iDiagonalRef  = kiRightTopRef;
718 
719   int8_t iMatchRef = 0;
720 
721 
722   int16_t iAMV[2], iBMV[2], iCMV[2];
723 
724   ST32 (iAMV, LD32 (iMotionVector[listIdx][     kuiLeftIdx]));
725   ST32 (iBMV, LD32 (iMotionVector[listIdx][      kuiTopIdx]));
726   ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiRightTopIdx]));
727 
728   if (REF_NOT_AVAIL == iDiagonalRef) {
729     iDiagonalRef = kiLeftTopRef;
730     ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiLeftTopIdx]));
731   }
732 
733   iMatchRef = (iRef == kiLeftRef) + (iRef == kiTopRef) + (iRef == iDiagonalRef);
734 
735   if (REF_NOT_AVAIL == kiTopRef && REF_NOT_AVAIL == iDiagonalRef && kiLeftRef >= REF_NOT_IN_LIST) {
736     ST32 (iMVP, LD32 (iAMV));
737     return;
738   }
739 
740   if (1 == iMatchRef) {
741     if (iRef == kiLeftRef) {
742       ST32 (iMVP, LD32 (iAMV));
743     } else if (iRef == kiTopRef) {
744       ST32 (iMVP, LD32 (iBMV));
745     } else {
746       ST32 (iMVP, LD32 (iCMV));
747     }
748   } else {
749     iMVP[0] = WelsMedian (iAMV[0], iBMV[0], iCMV[0]);
750     iMVP[1] = WelsMedian (iAMV[1], iBMV[1], iCMV[1]);
751   }
752 }
PredInter8x16Mv(int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVP[2])753 void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
754                       int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
755   if (0 == iPartIdx) {
756     const int8_t kiLeftRef = iRefIndex[listIdx][6];
757     if (iRef == kiLeftRef) {
758       ST32 (iMVP, LD32 (&iMotionVector[listIdx][6][0]));
759       return;
760     }
761   } else { // 1 == iPartIdx
762     int8_t iDiagonalRef = iRefIndex[listIdx][5]; //top-right
763     int8_t index = 5;
764     if (REF_NOT_AVAIL == iDiagonalRef) {
765       iDiagonalRef = iRefIndex[listIdx][2]; //top-left for 8*8 block(index 1)
766       index = 2;
767     }
768     if (iRef == iDiagonalRef) {
769       ST32 (iMVP, LD32 (&iMotionVector[listIdx][index][0]));
770       return;
771     }
772   }
773 
774   PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 2, iRef, iMVP);
775 }
PredInter16x8Mv(int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVP[2])776 void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
777                       int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
778   if (0 == iPartIdx) {
779     const int8_t kiTopRef = iRefIndex[listIdx][1];
780     if (iRef == kiTopRef) {
781       ST32 (iMVP, LD32 (&iMotionVector[listIdx][1][0]));
782       return;
783     }
784   } else { // 8 == iPartIdx
785     const int8_t kiLeftRef = iRefIndex[listIdx][18];
786     if (iRef == kiLeftRef) {
787       ST32 (iMVP, LD32 (&iMotionVector[listIdx][18][0]));
788       return;
789     }
790   }
791 
792   PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 4, iRef, iMVP);
793 }
794 
795 //update iMVs and iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive)
796 /* can be further optimized */
UpdateP16x16MotionInfo(PDqLayer pCurDqLayer,int32_t listIdx,int8_t iRef,int16_t iMVs[2])797 void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, int16_t iMVs[2]) {
798   const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
799   const int32_t kiMV32 = LD32 (iMVs);
800   int32_t i;
801   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
802 
803   for (i = 0; i < 16; i += 4) {
804     //mb
805     const uint8_t kuiScan4Idx = g_kuiScan4[i];
806     const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
807     if (pCurDqLayer->pDec != NULL) {
808       ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
809       ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
810 
811       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
812       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
813       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
814       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
815     } else {
816       ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
817       ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
818 
819       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
820       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
821       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
822       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
823     }
824   }
825 }
826 
827 //update iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive)
828 /* can be further optimized */
UpdateP16x16RefIdx(PDqLayer pCurDqLayer,int32_t listIdx,int8_t iRef)829 void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) {
830   const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
831   int32_t i;
832   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
833 
834   for (i = 0; i < 16; i += 4) {
835     //mb
836     const uint8_t kuiScan4Idx = g_kuiScan4[i];
837     const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
838 
839     ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
840     ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
841   }
842 }
843 
844 //update iMVs only cache for current MB, only for P_16*16 (SKIP inclusive)
845 /* can be further optimized */
UpdateP16x16MotionOnly(PDqLayer pCurDqLayer,int32_t listIdx,int16_t iMVs[2])846 void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs[2]) {
847   const int32_t kiMV32 = LD32 (iMVs);
848   int32_t i;
849   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
850 
851   for (i = 0; i < 16; i += 4) {
852     //mb
853     const uint8_t kuiScan4Idx = g_kuiScan4[i];
854     const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
855     if (pCurDqLayer->pDec != NULL) {
856       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
857       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
858       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
859       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
860     } else {
861       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
862       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
863       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
864       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
865     }
866   }
867 }
868 
869 //update iRefIndex and iMVs of Mb, only for P16x8
870 /*need further optimization, mb_cache not work */
UpdateP16x8MotionInfo(PDqLayer pCurDqLayer,int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVs[2])871 void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
872                             int8_t iRefIndex[LIST_A][30],
873                             int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
874   const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
875   const int32_t kiMV32 = LD32 (iMVs);
876   int32_t i;
877   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
878   for (i = 0; i < 2; i++, iPartIdx += 4) {
879     const uint8_t kuiScan4Idx      = g_kuiScan4[iPartIdx];
880     const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
881     const uint8_t kuiCacheIdx      = g_kuiCache30ScanIdx[iPartIdx];
882     const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
883 
884     //mb
885     if (pCurDqLayer->pDec != NULL) {
886       ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
887       ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
888       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
889       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
890       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
891       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
892     } else {
893       ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
894       ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
895       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
896       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
897       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
898       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
899     }
900     //cache
901     ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
902     ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
903     ST32 (iMotionVector[listIdx][  kuiCacheIdx ], kiMV32);
904     ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32);
905     ST32 (iMotionVector[listIdx][  kuiCacheIdxPlus6], kiMV32);
906     ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32);
907   }
908 }
909 //update iRefIndex and iMVs of both Mb and Mb_cache, only for P8x16
UpdateP8x16MotionInfo(PDqLayer pCurDqLayer,int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVs[2])910 void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
911                             int8_t iRefIndex[LIST_A][30],
912                             int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
913   const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
914   const int32_t kiMV32 = LD32 (iMVs);
915   int32_t i;
916   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
917 
918   for (i = 0; i < 2; i++, iPartIdx += 8) {
919     const uint8_t kuiScan4Idx = g_kuiScan4[iPartIdx];
920     const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
921     const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
922     const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
923 
924     //mb
925     if (pCurDqLayer->pDec != NULL) {
926       ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
927       ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
928       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
929       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
930       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
931       ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
932     } else {
933       ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
934       ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
935       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
936       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
937       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
938       ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
939     }
940     //cache
941     ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
942     ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
943     ST32 (iMotionVector[listIdx][  kuiCacheIdx ], kiMV32);
944     ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32);
945     ST32 (iMotionVector[listIdx][  kuiCacheIdxPlus6], kiMV32);
946     ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32);
947   }
948 }
949 
FillSpatialDirect8x8Mv(PDqLayer pCurDqLayer,const int16_t & iIdx8,const int8_t & iPartCount,const int8_t & iPartW,const SubMbType & subMbType,const bool & bIsLongRef,int16_t pMvDirect[LIST_A][2],int8_t iRef[LIST_A],int16_t pMotionVector[LIST_A][30][MV_A],int16_t pMvdCache[LIST_A][30][MV_A])950 void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
951                              const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A],
952                              int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]) {
953   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
954   for (int32_t j = 0; j < iPartCount; j++) {
955     int8_t iPartIdx = iIdx8 + j * iPartW;
956     uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
957     uint8_t iColocIdx = g_kuiScan4[iPartIdx];
958     uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
959 
960     int16_t pMV[4] = { 0 };
961     if (IS_SUB_8x8 (subMbType)) {
962       * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
963       ST32 ((pMV + 2), LD32 (pMV));
964       ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
965       ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
966       ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
967       ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
968       if (pMotionVector != NULL) {
969         ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
970         ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
971       }
972       if (pMvdCache != NULL) {
973         ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
974         ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
975       }
976       * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
977       ST32 ((pMV + 2), LD32 (pMV));
978       ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
979       ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
980       ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
981       ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
982       if (pMotionVector != NULL) {
983         ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
984         ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
985       }
986       if (pMvdCache != NULL) {
987         ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
988         ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
989       }
990     } else { //SUB_4x4
991       * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
992       ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
993       ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
994       if (pMotionVector != NULL) {
995         ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV));
996       }
997       if (pMvdCache != NULL) {
998         ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
999       }
1000       * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
1001       ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
1002       ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1003       if (pMotionVector != NULL) {
1004         ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV));
1005       }
1006       if (pMvdCache != NULL) {
1007         ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
1008       }
1009     }
1010     if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
1011       uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
1012                                (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
1013                                    && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
1014       const int16_t (*mvColoc)[2] = 0 == pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurDqLayer->iColocMv[LIST_0] :
1015                                     pCurDqLayer->iColocMv[LIST_1];
1016       const int16_t* mv = mvColoc[iColocIdx];
1017       if (IS_SUB_8x8 (subMbType)) {
1018         if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
1019           if (iRef[LIST_0] == 0) {
1020             ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
1021             ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
1022             ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1023             ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
1024             if (pMotionVector != NULL) {
1025               ST64 (pMotionVector[LIST_0][iCacheIdx], 0);
1026               ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0);
1027             }
1028             if (pMvdCache != NULL) {
1029               ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
1030               ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
1031             }
1032           }
1033 
1034           if (iRef[LIST_1] == 0) {
1035             ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
1036             ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
1037             ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1038             ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
1039             if (pMotionVector != NULL) {
1040               ST64 (pMotionVector[LIST_1][iCacheIdx], 0);
1041               ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0);
1042             }
1043             if (pMvdCache != NULL) {
1044               ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
1045               ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
1046             }
1047           }
1048         }
1049       } else {
1050         if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
1051           if (iRef[LIST_0] == 0) {
1052             ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
1053             ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1054             if (pMotionVector != NULL) {
1055               ST32 (pMotionVector[LIST_0][iCacheIdx], 0);
1056             }
1057             if (pMvdCache != NULL) {
1058               ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
1059             }
1060           }
1061           if (iRef[LIST_1] == 0) {
1062             ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
1063             ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1064             if (pMotionVector != NULL) {
1065               ST32 (pMotionVector[LIST_1][iCacheIdx], 0);
1066             }
1067             if (pMvdCache != NULL) {
1068               ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
1069             }
1070           }
1071         }
1072       }
1073     }
1074   }
1075 }
1076 
FillTemporalDirect8x8Mv(PDqLayer pCurDqLayer,const int16_t & iIdx8,const int8_t & iPartCount,const int8_t & iPartW,const SubMbType & subMbType,int8_t iRef[LIST_A],int16_t (* mvColoc)[2],int16_t pMotionVector[LIST_A][30][MV_A],int16_t pMvdCache[LIST_A][30][MV_A])1077 void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount,
1078                               const int8_t& iPartW,
1079                               const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], int16_t pMotionVector[LIST_A][30][MV_A],
1080                               int16_t pMvdCache[LIST_A][30][MV_A]) {
1081   PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1082   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1083   int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
1084   for (int32_t j = 0; j < iPartCount; j++) {
1085     int8_t iPartIdx = iIdx8 + j * iPartW;
1086     uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
1087     uint8_t iColocIdx = g_kuiScan4[iPartIdx];
1088     uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
1089 
1090     int16_t* mv = mvColoc[iColocIdx];
1091 
1092     int16_t pMV[4] = { 0 };
1093     if (IS_SUB_8x8 (subMbType)) {
1094       if (!pCurDqLayer->iColocIntra[iColocIdx]) {
1095         pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
1096         pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
1097       }
1098       ST32 (pMV, LD32 (pMvDirect[LIST_0]));
1099       ST32 ((pMV + 2), LD32 (pMvDirect[LIST_0]));
1100       ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
1101       ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
1102       ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1103       ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
1104       if (pMotionVector != NULL) {
1105         ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
1106         ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
1107       }
1108       if (pMvdCache != NULL) {
1109         ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
1110         ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
1111       }
1112       if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
1113         pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
1114         pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
1115       }
1116       ST32 (pMV, LD32 (pMvDirect[LIST_1]));
1117       ST32 ((pMV + 2), LD32 (pMvDirect[LIST_1]));
1118       ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
1119       ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
1120       ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1121       ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
1122       if (pMotionVector != NULL) {
1123         ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
1124         ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
1125       }
1126       if (pMvdCache != NULL) {
1127         ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
1128         ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
1129       }
1130     } else { //SUB_4x4
1131       if (!pCurDqLayer->iColocIntra[iColocIdx]) {
1132         pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
1133         pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
1134       }
1135       ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0]));
1136       ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1137       if (pMotionVector != NULL) {
1138         ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMvDirect[LIST_0]));
1139       }
1140       if (pMvdCache != NULL) {
1141         ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
1142       }
1143       if (!pCurDqLayer->iColocIntra[iColocIdx]) {
1144         pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
1145         pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
1146       }
1147       ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1]));
1148       ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1149       if (pMotionVector != NULL) {
1150         ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMvDirect[LIST_1]));
1151       }
1152       if (pMvdCache != NULL) {
1153         ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
1154       }
1155     }
1156   }
1157 }
MapColToList0(PWelsDecoderContext & pCtx,const int8_t & colocRefIndexL0,const int32_t & ref0Count)1158 int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0,
1159                       const int32_t& ref0Count) { //ISO/IEC 14496-10:2009(E) (8-193)
1160   //When reference is lost, this function must be skipped.
1161   if ((pCtx->iErrorCode & dsRefLost) == dsRefLost) {
1162     return 0;
1163   }
1164   PPicture pic1 = pCtx->sRefPic.pRefList[LIST_1][0];
1165   if (pic1 && pic1->pRefPic[LIST_0][colocRefIndexL0]) {
1166     const int32_t iFramePoc = pic1->pRefPic[LIST_0][colocRefIndexL0]->iFramePoc;
1167     for (int32_t i = 0; i < ref0Count; i++) {
1168       if (pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc == iFramePoc) {
1169         return i;
1170       }
1171     }
1172   }
1173   return 0;
1174 }
Update8x8RefIdx(PDqLayer & pCurDqLayer,const int16_t & iPartIdx,const int32_t & listIdx,const int8_t & iRef)1175 void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef) {
1176   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1177   const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
1178   pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] =
1179         pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx +
1180             5] = iRef;
1181 
1182 }
1183 } // namespace WelsDec
1184