1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file mv_pred.c
33 *
34 * \brief Get MV predictor and update motion vector of mb cache
35 *
36 * \date 05/22/2009 Created
37 *
38 *************************************************************************************
39 */
40
41 #include "mv_pred.h"
42 #include "ls_defines.h"
43 #include "mb_cache.h"
44 #include "parse_mb_syn_cabac.h"
45
46 namespace WelsDec {
47
SetRectBlock(void * vp,int32_t w,const int32_t h,int32_t stride,const uint32_t val,const int32_t size)48 static inline void SetRectBlock (void* vp, int32_t w, const int32_t h, int32_t stride, const uint32_t val,
49 const int32_t size) {
50 uint8_t* p = (uint8_t*)vp;
51 w *= size;
52 if (w == 1 && h == 4) {
53 * (uint8_t*) (p + 0 * stride) =
54 * (uint8_t*) (p + 1 * stride) =
55 * (uint8_t*) (p + 2 * stride) =
56 * (uint8_t*) (p + 3 * stride) = (uint8_t)val;
57 } else if (w == 2 && h == 2) {
58 * (uint16_t*) (p + 0 * stride) =
59 * (uint16_t*) (p + 1 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U);
60 } else if (w == 2 && h == 4) {
61 * (uint16_t*) (p + 0 * stride) =
62 * (uint16_t*) (p + 1 * stride) =
63 * (uint16_t*) (p + 2 * stride) =
64 * (uint16_t*) (p + 3 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U);
65 } else if (w == 4 && h == 2) {
66 * (uint32_t*) (p + 0 * stride) =
67 * (uint32_t*) (p + 1 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
68 } else if (w == 4 && h == 4) {
69 * (uint32_t*) (p + 0 * stride) =
70 * (uint32_t*) (p + 1 * stride) =
71 * (uint32_t*) (p + 2 * stride) =
72 * (uint32_t*) (p + 3 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
73 } else if (w == 8 && h == 1) {
74 * (uint32_t*) (p + 0 * stride) =
75 * (uint32_t*) (p + 0 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
76 } else if (w == 8 && h == 2) {
77 * (uint32_t*) (p + 0 * stride) =
78 * (uint32_t*) (p + 0 * stride + 4) =
79 * (uint32_t*) (p + 1 * stride) =
80 * (uint32_t*) (p + 1 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
81 } else if (w == 8 && h == 4) {
82 * (uint32_t*) (p + 0 * stride) =
83 * (uint32_t*) (p + 0 * stride + 4) =
84 * (uint32_t*) (p + 1 * stride) =
85 * (uint32_t*) (p + 1 * stride + 4) =
86 * (uint32_t*) (p + 2 * stride) =
87 * (uint32_t*) (p + 2 * stride + 4) =
88 * (uint32_t*) (p + 3 * stride) =
89 * (uint32_t*) (p + 3 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
90 } else if (w == 16 && h == 2) {
91 * (uint32_t*) (p + 0 * stride + 0) =
92 * (uint32_t*) (p + 0 * stride + 4) =
93 * (uint32_t*) (p + 0 * stride + 8) =
94 * (uint32_t*) (p + 0 * stride + 12) =
95 * (uint32_t*) (p + 1 * stride + 0) =
96 * (uint32_t*) (p + 1 * stride + 4) =
97 * (uint32_t*) (p + 1 * stride + 8) =
98 * (uint32_t*) (p + 1 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
99 } else if (w == 16 && h == 3) {
100 * (uint32_t*) (p + 0 * stride + 0) =
101 * (uint32_t*) (p + 0 * stride + 4) =
102 * (uint32_t*) (p + 0 * stride + 8) =
103 * (uint32_t*) (p + 0 * stride + 12) =
104 * (uint32_t*) (p + 1 * stride + 0) =
105 * (uint32_t*) (p + 1 * stride + 4) =
106 * (uint32_t*) (p + 1 * stride + 8) =
107 * (uint32_t*) (p + 1 * stride + 12) =
108 * (uint32_t*) (p + 2 * stride + 0) =
109 * (uint32_t*) (p + 2 * stride + 4) =
110 * (uint32_t*) (p + 2 * stride + 8) =
111 * (uint32_t*) (p + 2 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
112 } else if (w == 16 && h == 4) {
113 * (uint32_t*) (p + 0 * stride + 0) =
114 * (uint32_t*) (p + 0 * stride + 4) =
115 * (uint32_t*) (p + 0 * stride + 8) =
116 * (uint32_t*) (p + 0 * stride + 12) =
117 * (uint32_t*) (p + 1 * stride + 0) =
118 * (uint32_t*) (p + 1 * stride + 4) =
119 * (uint32_t*) (p + 1 * stride + 8) =
120 * (uint32_t*) (p + 1 * stride + 12) =
121 * (uint32_t*) (p + 2 * stride + 0) =
122 * (uint32_t*) (p + 2 * stride + 4) =
123 * (uint32_t*) (p + 2 * stride + 8) =
124 * (uint32_t*) (p + 2 * stride + 12) =
125 * (uint32_t*) (p + 3 * stride + 0) =
126 * (uint32_t*) (p + 3 * stride + 4) =
127 * (uint32_t*) (p + 3 * stride + 8) =
128 * (uint32_t*) (p + 3 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
129 }
130 }
CopyRectBlock4Cols(void * vdst,void * vsrc,const int32_t stride_dst,const int32_t stride_src,int32_t w,const int32_t size)131 void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const int32_t stride_src, int32_t w,
132 const int32_t size) {
133 uint8_t* dst = (uint8_t*)vdst;
134 uint8_t* src = (uint8_t*)vsrc;
135 w *= size;
136 if (w == 1) {
137 dst[stride_dst * 0] = src[stride_src * 0];
138 dst[stride_dst * 1] = src[stride_src * 1];
139 dst[stride_dst * 2] = src[stride_src * 2];
140 dst[stride_dst * 3] = src[stride_src * 3];
141 } else if (w == 2) {
142 * (uint16_t*) (&dst[stride_dst * 0]) = * (uint16_t*) (&src[stride_src * 0]);
143 * (uint16_t*) (&dst[stride_dst * 1]) = * (uint16_t*) (&src[stride_src * 1]);
144 * (uint16_t*) (&dst[stride_dst * 2]) = * (uint16_t*) (&src[stride_src * 2]);
145 * (uint16_t*) (&dst[stride_dst * 3]) = * (uint16_t*) (&src[stride_src * 3]);
146 } else if (w == 4) {
147 * (uint32_t*) (&dst[stride_dst * 0]) = * (uint32_t*) (&src[stride_src * 0]);
148 * (uint32_t*) (&dst[stride_dst * 1]) = * (uint32_t*) (&src[stride_src * 1]);
149 * (uint32_t*) (&dst[stride_dst * 2]) = * (uint32_t*) (&src[stride_src * 2]);
150 * (uint32_t*) (&dst[stride_dst * 3]) = * (uint32_t*) (&src[stride_src * 3]);
151 } else if (w == 16) {
152 memcpy (&dst[stride_dst * 0], &src[stride_src * 0], 16);
153 memcpy (&dst[stride_dst * 1], &src[stride_src * 1], 16);
154 memcpy (&dst[stride_dst * 2], &src[stride_src * 2], 16);
155 memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16);
156 }
157 }
PredPSkipMvFromNeighbor(PDqLayer pCurDqLayer,int16_t iMvp[2])158 void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]) {
159 bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
160
161 int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
162 int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
163 int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
164
165 int8_t iLeftRef;
166 int8_t iTopRef;
167 int8_t iRightTopRef;
168 int8_t iLeftTopRef;
169 int8_t iDiagonalRef;
170 int8_t iMatchRef;
171 int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2];
172
173 iCurXy = pCurDqLayer->iMbXyIndex;
174 iCurX = pCurDqLayer->iMbX;
175 iCurY = pCurDqLayer->iMbY;
176 iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
177
178 if (iCurX != 0) {
179 iLeftXy = iCurXy - 1;
180 iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
181 bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
182 } else {
183 bLeftAvail = 0;
184 bLeftTopAvail = 0;
185 }
186
187 if (iCurY != 0) {
188 iTopXy = iCurXy - pCurDqLayer->iMbWidth;
189 iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
190 bTopAvail = (iTopSliceIdc == iCurSliceIdc);
191 if (iCurX != 0) {
192 iLeftTopXy = iTopXy - 1;
193 iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
194 bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
195 } else {
196 bLeftTopAvail = 0;
197 }
198 if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
199 iRightTopXy = iTopXy + 1;
200 iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
201 bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
202 } else {
203 bRightTopAvail = 0;
204 }
205 } else {
206 bTopAvail = 0;
207 bLeftTopAvail = 0;
208 bRightTopAvail = 0;
209 }
210
211 iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
212 iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
213 iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
214 ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
215 iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
216 ? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
217
218 /*get neb mv&iRefIdxArray*/
219 /*left*/
220 if (bLeftAvail && IS_INTER (iLeftType)) {
221 ST32 (iMvA, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftXy][3] : pCurDqLayer->pMv[0][iLeftXy][3]));
222 iLeftRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftXy][3] : pCurDqLayer->pRefIndex[0][iLeftXy][3];
223 } else {
224 ST32 (iMvA, 0);
225 if (0 == bLeftAvail) { //not available
226 iLeftRef = REF_NOT_AVAIL;
227 } else { //available but is intra mb type
228 iLeftRef = REF_NOT_IN_LIST;
229 }
230 }
231 if (REF_NOT_AVAIL == iLeftRef ||
232 (0 == iLeftRef && 0 == * (int32_t*)iMvA)) {
233 ST32 (iMvp, 0);
234 return;
235 }
236
237 /*top*/
238 if (bTopAvail && IS_INTER (iTopType)) {
239 ST32 (iMvB, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iTopXy][12] : pCurDqLayer->pMv[0][iTopXy][12]));
240 iTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iTopXy][12] : pCurDqLayer->pRefIndex[0][iTopXy][12];
241 } else {
242 ST32 (iMvB, 0);
243 if (0 == bTopAvail) { //not available
244 iTopRef = REF_NOT_AVAIL;
245 } else { //available but is intra mb type
246 iTopRef = REF_NOT_IN_LIST;
247 }
248 }
249 if (REF_NOT_AVAIL == iTopRef ||
250 (0 == iTopRef && 0 == * (int32_t*)iMvB)) {
251 ST32 (iMvp, 0);
252 return;
253 }
254
255 /*right_top*/
256 if (bRightTopAvail && IS_INTER (iRightTopType)) {
257 ST32 (iMvC, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iRightTopXy][12] :
258 pCurDqLayer->pMv[0][iRightTopXy][12]));
259 iRightTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iRightTopXy][12] :
260 pCurDqLayer->pRefIndex[0][iRightTopXy][12];
261 } else {
262 ST32 (iMvC, 0);
263 if (0 == bRightTopAvail) { //not available
264 iRightTopRef = REF_NOT_AVAIL;
265 } else { //available but is intra mb type
266 iRightTopRef = REF_NOT_IN_LIST;
267 }
268 }
269
270 /*left_top*/
271 if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
272 ST32 (iMvD, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftTopXy][15] : pCurDqLayer->pMv[0][iLeftTopXy][15]));
273 iLeftTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftTopXy][15] :
274 pCurDqLayer->pRefIndex[0][iLeftTopXy][15];
275 } else {
276 ST32 (iMvD, 0);
277 if (0 == bLeftTopAvail) { //not available
278 iLeftTopRef = REF_NOT_AVAIL;
279 } else { //available but is intra mb type
280 iLeftTopRef = REF_NOT_IN_LIST;
281 }
282 }
283
284 iDiagonalRef = iRightTopRef;
285 if (REF_NOT_AVAIL == iDiagonalRef) {
286 iDiagonalRef = iLeftTopRef;
287 * (int32_t*)iMvC = * (int32_t*)iMvD;
288 }
289
290 if (REF_NOT_AVAIL == iTopRef && REF_NOT_AVAIL == iDiagonalRef && iLeftRef >= REF_NOT_IN_LIST) {
291 ST32 (iMvp, LD32 (iMvA));
292 return;
293 }
294
295 iMatchRef = (0 == iLeftRef) + (0 == iTopRef) + (0 == iDiagonalRef);
296 if (1 == iMatchRef) {
297 if (0 == iLeftRef) {
298 ST32 (iMvp, LD32 (iMvA));
299 } else if (0 == iTopRef) {
300 ST32 (iMvp, LD32 (iMvB));
301 } else {
302 ST32 (iMvp, LD32 (iMvC));
303 }
304 } else {
305 iMvp[0] = WelsMedian (iMvA[0], iMvB[0], iMvC[0]);
306 iMvp[1] = WelsMedian (iMvA[1], iMvB[1], iMvC[1]);
307 }
308 }
309
GetColocatedMb(PWelsDecoderContext pCtx,MbType & mbType,SubMbType & subMbType)310 int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) {
311 PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
312 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
313
314 uint32_t is8x8 = IS_Inter_8x8 (GetMbType (pCurDqLayer)[iMbXy]);
315 mbType = GetMbType (pCurDqLayer)[iMbXy];
316
317 PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0];
318 if (GetThreadCount (pCtx) > 1) {
319 if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) {
320 if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) {
321 WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE);
322 }
323 pCtx->lastReadyHeightOffset[1][0] = 16 * pCurDqLayer->iMbY;
324 }
325 }
326
327 if (colocPic == NULL) {
328 SLogContext* pLogCtx = & (pCtx->sLogCtx);
329 WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!");
330 return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST);
331 }
332
333 MbType coloc_mbType = colocPic->pMbType[iMbXy];
334 if (coloc_mbType == MB_TYPE_SKIP) {
335 //This indicates the colocated MB is P SKIP MB
336 coloc_mbType |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0;
337 }
338 if (IS_Inter_8x8 (coloc_mbType) && !pCtx->pSps->bDirect8x8InferenceFlag) {
339 subMbType = SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
340 mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
341 } else if (!is8x8 && (IS_INTER_16x16 (coloc_mbType) || IS_INTRA (coloc_mbType)/* || IS_SKIP(coloc_mbType)*/)) {
342 subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
343 mbType |= MB_TYPE_16x16 | MB_TYPE_L0 | MB_TYPE_L1;
344 } else {
345 subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
346 mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
347 }
348
349 if (IS_INTRA (coloc_mbType)) {
350 SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
351 return ERR_NONE;
352 }
353 SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
354
355 if (IS_INTER_16x16 (mbType)) {
356 int16_t iMVZero[2] = { 0 };
357 int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero;
358 ST32 (pCurDqLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
359 ST32 (pCurDqLayer->iColocMv[LIST_1][0], LD32 (pMv));
360 pCurDqLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
361 pCurDqLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
362 REF_NOT_IN_LIST;
363 } else {
364 if (!pCtx->pSps->bDirect8x8InferenceFlag) {
365 CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
366 CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
367 if (IS_TYPE_L1 (coloc_mbType)) {
368 CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
369 CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
370 } else { // only forward prediction
371 SetRectBlock (pCurDqLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
372 }
373 } else {
374 for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) {
375 SetRectBlock (pCurDqLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
376 SetRectBlock (pCurDqLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
377 SetRectBlock (pCurDqLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
378 SetRectBlock (pCurDqLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
379
380 SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
381 SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
382 SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
383 SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
384 }
385 if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction
386 SetRectBlock (&pCurDqLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
387 }
388 }
389 return ERR_NONE;
390 }
391
PredMvBDirectSpatial(PWelsDecoderContext pCtx,int16_t iMvp[LIST_A][2],int8_t ref[LIST_A],SubMbType & subMbType)392 int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
393 SubMbType& subMbType) {
394
395 int32_t ret = ERR_NONE;
396 PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
397 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
398 bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
399
400 MbType mbType;
401 ret = GetColocatedMb (pCtx, mbType, subMbType);
402 if (ret != ERR_NONE) {
403 return ret;
404 }
405
406 bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
407 int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
408 int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
409 int32_t iCurX, iCurY, iCurXy, iLeftXy = 0, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
410
411 int8_t iLeftRef[LIST_A];
412 int8_t iTopRef[LIST_A];
413 int8_t iRightTopRef[LIST_A];
414 int8_t iLeftTopRef[LIST_A];
415 int8_t iDiagonalRef[LIST_A];
416 int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2];
417
418 iCurXy = pCurDqLayer->iMbXyIndex;
419
420 iCurX = pCurDqLayer->iMbX;
421 iCurY = pCurDqLayer->iMbY;
422 iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
423
424 if (iCurX != 0) {
425 iLeftXy = iCurXy - 1;
426 iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
427 bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
428 } else {
429 bLeftAvail = 0;
430 bLeftTopAvail = 0;
431 }
432
433 if (iCurY != 0) {
434 iTopXy = iCurXy - pCurDqLayer->iMbWidth;
435 iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
436 bTopAvail = (iTopSliceIdc == iCurSliceIdc);
437 if (iCurX != 0) {
438 iLeftTopXy = iTopXy - 1;
439 iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
440 bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
441 } else {
442 bLeftTopAvail = 0;
443 }
444 if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
445 iRightTopXy = iTopXy + 1;
446 iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
447 bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
448 } else {
449 bRightTopAvail = 0;
450 }
451 } else {
452 bTopAvail = 0;
453 bLeftTopAvail = 0;
454 bRightTopAvail = 0;
455 }
456
457 iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
458 iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
459 iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
460 ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
461 iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
462 ? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
463
464 /*get neb mv&iRefIdxArray*/
465 for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
466
467 /*left*/
468 if (bLeftAvail && IS_INTER (iLeftType)) {
469 ST32 (iMvA[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3] :
470 pCurDqLayer->pMv[listIdx][iLeftXy][3]));
471 iLeftRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3] :
472 pCurDqLayer->pRefIndex[listIdx][iLeftXy][3];
473 } else {
474 ST32 (iMvA[listIdx], 0);
475 if (0 == bLeftAvail) { //not available
476 iLeftRef[listIdx] = REF_NOT_AVAIL;
477 } else { //available but is intra mb type
478 iLeftRef[listIdx] = REF_NOT_IN_LIST;
479 }
480 }
481
482 /*top*/
483 if (bTopAvail && IS_INTER (iTopType)) {
484 ST32 (iMvB[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iTopXy][12] :
485 pCurDqLayer->pMv[listIdx][iTopXy][12]));
486 iTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12] :
487 pCurDqLayer->pRefIndex[listIdx][iTopXy][12];
488 } else {
489 ST32 (iMvB[listIdx], 0);
490 if (0 == bTopAvail) { //not available
491 iTopRef[listIdx] = REF_NOT_AVAIL;
492 } else { //available but is intra mb type
493 iTopRef[listIdx] = REF_NOT_IN_LIST;
494 }
495 }
496
497 /*right_top*/
498 if (bRightTopAvail && IS_INTER (iRightTopType)) {
499 ST32 (iMvC[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12] :
500 pCurDqLayer->pMv[listIdx][iRightTopXy][12]));
501 iRightTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12] :
502 pCurDqLayer->pRefIndex[listIdx][iRightTopXy][12];
503 } else {
504 ST32 (iMvC[listIdx], 0);
505 if (0 == bRightTopAvail) { //not available
506 iRightTopRef[listIdx] = REF_NOT_AVAIL;
507 } else { //available but is intra mb type
508 iRightTopRef[listIdx] = REF_NOT_IN_LIST;
509 }
510 }
511 /*left_top*/
512 if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
513 ST32 (iMvD[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15] :
514 pCurDqLayer->pMv[listIdx][iLeftTopXy][15]));
515 iLeftTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15] :
516 pCurDqLayer->pRefIndex[listIdx][iLeftTopXy][15];
517 } else {
518 ST32 (iMvD[listIdx], 0);
519 if (0 == bLeftTopAvail) { //not available
520 iLeftTopRef[listIdx] = REF_NOT_AVAIL;
521 } else { //available but is intra mb type
522 iLeftTopRef[listIdx] = REF_NOT_IN_LIST;
523 }
524 }
525
526 iDiagonalRef[listIdx] = iRightTopRef[listIdx];
527 if (REF_NOT_AVAIL == iDiagonalRef[listIdx]) {
528 iDiagonalRef[listIdx] = iLeftTopRef[listIdx];
529 ST32 (iMvC[listIdx], LD32 (iMvD[listIdx]));
530 }
531
532 int8_t ref_temp = WELS_MIN_POSITIVE (iTopRef[listIdx], iDiagonalRef[listIdx]);
533 ref[listIdx] = WELS_MIN_POSITIVE (iLeftRef[listIdx], ref_temp);
534 if (ref[listIdx] >= 0) {
535
536 uint32_t match_count = (iLeftRef[listIdx] == ref[listIdx]) + (iTopRef[listIdx] == ref[listIdx]) +
537 (iDiagonalRef[listIdx] == ref[listIdx]);
538 if (match_count == 1) {
539 if (iLeftRef[listIdx] == ref[listIdx]) {
540 ST32 (iMvp[listIdx], LD32 (iMvA[listIdx]));
541 } else if (iTopRef[listIdx] == ref[listIdx]) {
542 ST32 (iMvp[listIdx], LD32 (iMvB[listIdx]));
543 } else {
544 ST32 (iMvp[listIdx], LD32 (iMvC[listIdx]));
545 }
546 } else {
547 iMvp[listIdx][0] = WelsMedian (iMvA[listIdx][0], iMvB[listIdx][0], iMvC[listIdx][0]);
548 iMvp[listIdx][1] = WelsMedian (iMvA[listIdx][1], iMvB[listIdx][1], iMvC[listIdx][1]);
549 }
550 } else {
551 iMvp[listIdx][0] = 0;
552 iMvp[listIdx][1] = 0;
553 ref[listIdx] = REF_NOT_IN_LIST;
554 }
555 }
556 if (ref[LIST_0] <= REF_NOT_IN_LIST && ref[LIST_1] <= REF_NOT_IN_LIST) {
557 ref[LIST_0] = ref[LIST_1] = 0;
558 } else if (ref[LIST_1] < 0) {
559 mbType &= ~MB_TYPE_L1;
560 subMbType &= ~MB_TYPE_L1;
561 } else if (ref[LIST_0] < 0) {
562 mbType &= ~MB_TYPE_L0;
563 subMbType &= ~MB_TYPE_L0;
564 }
565 GetMbType (pCurDqLayer)[iMbXy] = mbType;
566
567 int16_t pMvd[4] = { 0 };
568
569 bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
570
571 if (IS_INTER_16x16 (mbType)) {
572 if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
573 if (0 == pCurDqLayer->iColocIntra[0] && !bIsLongRef
574 && ((pCurDqLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][0] + 1) <= 2
575 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
576 || (pCurDqLayer->iColocRefIndex[LIST_0][0] < 0 && pCurDqLayer->iColocRefIndex[LIST_1][0] == 0
577 && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][0] + 1) <= 2
578 && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
579 if (0 >= ref[0]) * (uint32_t*)iMvp[LIST_0] = 0;
580 if (0 >= ref[1]) * (uint32_t*)iMvp[LIST_1] = 0;
581 }
582 }
583 UpdateP16x16DirectCabac (pCurDqLayer);
584 for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
585 UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref[listIdx], iMvp[listIdx]);
586 UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx);
587 }
588 } else {
589 if (bSkipOrDirect) {
590 int8_t pSubPartCount[4], pPartW[4];
591 for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
592 int16_t iIdx8 = i << 2;
593 pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
594 int8_t pRefIndex[LIST_A][30];
595 UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
596 UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
597 UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
598
599 pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
600 pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
601
602 if (IS_SUB_4x4 (subMbType)) {
603 pSubPartCount[i] = 4;
604 pPartW[i] = 1;
605 }
606 FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL);
607 }
608 }
609 }
610 return ret;
611 }
612
PredBDirectTemporal(PWelsDecoderContext pCtx,int16_t iMvp[LIST_A][2],int8_t ref[LIST_A],SubMbType & subMbType)613 int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
614 SubMbType& subMbType) {
615 int32_t ret = ERR_NONE;
616 PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
617 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
618 bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
619
620 MbType mbType;
621 ret = GetColocatedMb (pCtx, mbType, subMbType);
622 if (ret != ERR_NONE) {
623 return ret;
624 }
625
626 GetMbType (pCurDqLayer)[iMbXy] = mbType;
627
628 PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
629 PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
630 int16_t pMvd[4] = { 0 };
631 const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]);
632 if (IS_INTER_16x16 (mbType)) {
633 ref[LIST_0] = 0;
634 ref[LIST_1] = 0;
635 UpdateP16x16DirectCabac (pCurDqLayer);
636 UpdateP16x16RefIdx (pCurDqLayer, LIST_1, ref[LIST_1]);
637 ST64 (iMvp, 0);
638 if (pCurDqLayer->iColocIntra[0]) {
639 UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
640 UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
641 UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
642 } else {
643 ref[LIST_0] = 0;
644 int16_t* mv = pCurDqLayer->iColocMv[LIST_0][0];
645 int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][0];
646 if (colocRefIndexL0 >= 0) {
647 ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
648 } else {
649 mv = pCurDqLayer->iColocMv[LIST_1][0];
650 }
651 UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
652
653 iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
654 iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
655 UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
656 iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0];
657 iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1];
658 UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
659 }
660 UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
661 UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_1);
662 } else {
663 if (bSkipOrDirect) {
664 int8_t pSubPartCount[4], pPartW[4];
665 int8_t pRefIndex[LIST_A][30];
666 for (int32_t i = 0; i < 4; i++) {
667 int16_t iIdx8 = i << 2;
668 const uint8_t iScan4Idx = g_kuiScan4[iIdx8];
669 pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
670
671 int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
672
673 ref[LIST_1] = 0;
674 UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
675 if (pCurDqLayer->iColocIntra[iScan4Idx]) {
676 ref[LIST_0] = 0;
677 UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
678 ST64 (iMvp, 0);
679 } else {
680 ref[LIST_0] = 0;
681 int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][iScan4Idx];
682 if (colocRefIndexL0 >= 0) {
683 ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
684 } else {
685 mvColoc = pCurDqLayer->iColocMv[LIST_1];
686 }
687 UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
688 }
689 UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
690
691 pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
692 pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
693
694 if (IS_SUB_4x4 (subMbType)) {
695 pSubPartCount[i] = 4;
696 pPartW[i] = 1;
697 }
698 FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL);
699 }
700 }
701 }
702 return ret;
703 }
704
705 //basic iMVs prediction unit for iMVs partition width (4, 2, 1)
PredMv(int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int32_t iPartWidth,int8_t iRef,int16_t iMVP[2])706 void PredMv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
707 int32_t listIdx, int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]) {
708 const uint8_t kuiLeftIdx = g_kuiCache30ScanIdx[iPartIdx] - 1;
709 const uint8_t kuiTopIdx = g_kuiCache30ScanIdx[iPartIdx] - 6;
710 const uint8_t kuiRightTopIdx = kuiTopIdx + iPartWidth;
711 const uint8_t kuiLeftTopIdx = kuiTopIdx - 1;
712
713 const int8_t kiLeftRef = iRefIndex[listIdx][kuiLeftIdx];
714 const int8_t kiTopRef = iRefIndex[listIdx][ kuiTopIdx];
715 const int8_t kiRightTopRef = iRefIndex[listIdx][kuiRightTopIdx];
716 const int8_t kiLeftTopRef = iRefIndex[listIdx][ kuiLeftTopIdx];
717 int8_t iDiagonalRef = kiRightTopRef;
718
719 int8_t iMatchRef = 0;
720
721
722 int16_t iAMV[2], iBMV[2], iCMV[2];
723
724 ST32 (iAMV, LD32 (iMotionVector[listIdx][ kuiLeftIdx]));
725 ST32 (iBMV, LD32 (iMotionVector[listIdx][ kuiTopIdx]));
726 ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiRightTopIdx]));
727
728 if (REF_NOT_AVAIL == iDiagonalRef) {
729 iDiagonalRef = kiLeftTopRef;
730 ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiLeftTopIdx]));
731 }
732
733 iMatchRef = (iRef == kiLeftRef) + (iRef == kiTopRef) + (iRef == iDiagonalRef);
734
735 if (REF_NOT_AVAIL == kiTopRef && REF_NOT_AVAIL == iDiagonalRef && kiLeftRef >= REF_NOT_IN_LIST) {
736 ST32 (iMVP, LD32 (iAMV));
737 return;
738 }
739
740 if (1 == iMatchRef) {
741 if (iRef == kiLeftRef) {
742 ST32 (iMVP, LD32 (iAMV));
743 } else if (iRef == kiTopRef) {
744 ST32 (iMVP, LD32 (iBMV));
745 } else {
746 ST32 (iMVP, LD32 (iCMV));
747 }
748 } else {
749 iMVP[0] = WelsMedian (iAMV[0], iBMV[0], iCMV[0]);
750 iMVP[1] = WelsMedian (iAMV[1], iBMV[1], iCMV[1]);
751 }
752 }
PredInter8x16Mv(int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVP[2])753 void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
754 int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
755 if (0 == iPartIdx) {
756 const int8_t kiLeftRef = iRefIndex[listIdx][6];
757 if (iRef == kiLeftRef) {
758 ST32 (iMVP, LD32 (&iMotionVector[listIdx][6][0]));
759 return;
760 }
761 } else { // 1 == iPartIdx
762 int8_t iDiagonalRef = iRefIndex[listIdx][5]; //top-right
763 int8_t index = 5;
764 if (REF_NOT_AVAIL == iDiagonalRef) {
765 iDiagonalRef = iRefIndex[listIdx][2]; //top-left for 8*8 block(index 1)
766 index = 2;
767 }
768 if (iRef == iDiagonalRef) {
769 ST32 (iMVP, LD32 (&iMotionVector[listIdx][index][0]));
770 return;
771 }
772 }
773
774 PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 2, iRef, iMVP);
775 }
PredInter16x8Mv(int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVP[2])776 void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
777 int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
778 if (0 == iPartIdx) {
779 const int8_t kiTopRef = iRefIndex[listIdx][1];
780 if (iRef == kiTopRef) {
781 ST32 (iMVP, LD32 (&iMotionVector[listIdx][1][0]));
782 return;
783 }
784 } else { // 8 == iPartIdx
785 const int8_t kiLeftRef = iRefIndex[listIdx][18];
786 if (iRef == kiLeftRef) {
787 ST32 (iMVP, LD32 (&iMotionVector[listIdx][18][0]));
788 return;
789 }
790 }
791
792 PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 4, iRef, iMVP);
793 }
794
795 //update iMVs and iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive)
796 /* can be further optimized */
UpdateP16x16MotionInfo(PDqLayer pCurDqLayer,int32_t listIdx,int8_t iRef,int16_t iMVs[2])797 void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, int16_t iMVs[2]) {
798 const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
799 const int32_t kiMV32 = LD32 (iMVs);
800 int32_t i;
801 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
802
803 for (i = 0; i < 16; i += 4) {
804 //mb
805 const uint8_t kuiScan4Idx = g_kuiScan4[i];
806 const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
807 if (pCurDqLayer->pDec != NULL) {
808 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
809 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
810
811 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
812 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
813 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
814 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
815 } else {
816 ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
817 ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
818
819 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
820 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
821 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
822 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
823 }
824 }
825 }
826
827 //update iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive)
828 /* can be further optimized */
UpdateP16x16RefIdx(PDqLayer pCurDqLayer,int32_t listIdx,int8_t iRef)829 void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) {
830 const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
831 int32_t i;
832 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
833
834 for (i = 0; i < 16; i += 4) {
835 //mb
836 const uint8_t kuiScan4Idx = g_kuiScan4[i];
837 const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
838
839 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
840 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
841 }
842 }
843
844 //update iMVs only cache for current MB, only for P_16*16 (SKIP inclusive)
845 /* can be further optimized */
UpdateP16x16MotionOnly(PDqLayer pCurDqLayer,int32_t listIdx,int16_t iMVs[2])846 void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs[2]) {
847 const int32_t kiMV32 = LD32 (iMVs);
848 int32_t i;
849 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
850
851 for (i = 0; i < 16; i += 4) {
852 //mb
853 const uint8_t kuiScan4Idx = g_kuiScan4[i];
854 const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
855 if (pCurDqLayer->pDec != NULL) {
856 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
857 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
858 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
859 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
860 } else {
861 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
862 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
863 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
864 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
865 }
866 }
867 }
868
869 //update iRefIndex and iMVs of Mb, only for P16x8
870 /*need further optimization, mb_cache not work */
UpdateP16x8MotionInfo(PDqLayer pCurDqLayer,int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVs[2])871 void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
872 int8_t iRefIndex[LIST_A][30],
873 int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
874 const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
875 const int32_t kiMV32 = LD32 (iMVs);
876 int32_t i;
877 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
878 for (i = 0; i < 2; i++, iPartIdx += 4) {
879 const uint8_t kuiScan4Idx = g_kuiScan4[iPartIdx];
880 const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
881 const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
882 const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
883
884 //mb
885 if (pCurDqLayer->pDec != NULL) {
886 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
887 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
888 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
889 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
890 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
891 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
892 } else {
893 ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
894 ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
895 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
896 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
897 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
898 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
899 }
900 //cache
901 ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
902 ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
903 ST32 (iMotionVector[listIdx][ kuiCacheIdx ], kiMV32);
904 ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32);
905 ST32 (iMotionVector[listIdx][ kuiCacheIdxPlus6], kiMV32);
906 ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32);
907 }
908 }
909 //update iRefIndex and iMVs of both Mb and Mb_cache, only for P8x16
UpdateP8x16MotionInfo(PDqLayer pCurDqLayer,int16_t iMotionVector[LIST_A][30][MV_A],int8_t iRefIndex[LIST_A][30],int32_t listIdx,int32_t iPartIdx,int8_t iRef,int16_t iMVs[2])910 void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
911 int8_t iRefIndex[LIST_A][30],
912 int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
913 const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
914 const int32_t kiMV32 = LD32 (iMVs);
915 int32_t i;
916 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
917
918 for (i = 0; i < 2; i++, iPartIdx += 8) {
919 const uint8_t kuiScan4Idx = g_kuiScan4[iPartIdx];
920 const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
921 const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
922 const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
923
924 //mb
925 if (pCurDqLayer->pDec != NULL) {
926 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
927 ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
928 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
929 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
930 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
931 ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
932 } else {
933 ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
934 ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
935 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
936 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
937 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
938 ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
939 }
940 //cache
941 ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
942 ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
943 ST32 (iMotionVector[listIdx][ kuiCacheIdx ], kiMV32);
944 ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32);
945 ST32 (iMotionVector[listIdx][ kuiCacheIdxPlus6], kiMV32);
946 ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32);
947 }
948 }
949
FillSpatialDirect8x8Mv(PDqLayer pCurDqLayer,const int16_t & iIdx8,const int8_t & iPartCount,const int8_t & iPartW,const SubMbType & subMbType,const bool & bIsLongRef,int16_t pMvDirect[LIST_A][2],int8_t iRef[LIST_A],int16_t pMotionVector[LIST_A][30][MV_A],int16_t pMvdCache[LIST_A][30][MV_A])950 void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
951 const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A],
952 int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]) {
953 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
954 for (int32_t j = 0; j < iPartCount; j++) {
955 int8_t iPartIdx = iIdx8 + j * iPartW;
956 uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
957 uint8_t iColocIdx = g_kuiScan4[iPartIdx];
958 uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
959
960 int16_t pMV[4] = { 0 };
961 if (IS_SUB_8x8 (subMbType)) {
962 * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
963 ST32 ((pMV + 2), LD32 (pMV));
964 ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
965 ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
966 ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
967 ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
968 if (pMotionVector != NULL) {
969 ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
970 ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
971 }
972 if (pMvdCache != NULL) {
973 ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
974 ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
975 }
976 * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
977 ST32 ((pMV + 2), LD32 (pMV));
978 ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
979 ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
980 ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
981 ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
982 if (pMotionVector != NULL) {
983 ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
984 ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
985 }
986 if (pMvdCache != NULL) {
987 ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
988 ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
989 }
990 } else { //SUB_4x4
991 * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
992 ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
993 ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
994 if (pMotionVector != NULL) {
995 ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV));
996 }
997 if (pMvdCache != NULL) {
998 ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
999 }
1000 * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
1001 ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
1002 ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1003 if (pMotionVector != NULL) {
1004 ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV));
1005 }
1006 if (pMvdCache != NULL) {
1007 ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
1008 }
1009 }
1010 if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
1011 uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
1012 (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
1013 && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
1014 const int16_t (*mvColoc)[2] = 0 == pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurDqLayer->iColocMv[LIST_0] :
1015 pCurDqLayer->iColocMv[LIST_1];
1016 const int16_t* mv = mvColoc[iColocIdx];
1017 if (IS_SUB_8x8 (subMbType)) {
1018 if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
1019 if (iRef[LIST_0] == 0) {
1020 ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
1021 ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
1022 ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1023 ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
1024 if (pMotionVector != NULL) {
1025 ST64 (pMotionVector[LIST_0][iCacheIdx], 0);
1026 ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0);
1027 }
1028 if (pMvdCache != NULL) {
1029 ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
1030 ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
1031 }
1032 }
1033
1034 if (iRef[LIST_1] == 0) {
1035 ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
1036 ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
1037 ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1038 ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
1039 if (pMotionVector != NULL) {
1040 ST64 (pMotionVector[LIST_1][iCacheIdx], 0);
1041 ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0);
1042 }
1043 if (pMvdCache != NULL) {
1044 ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
1045 ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
1046 }
1047 }
1048 }
1049 } else {
1050 if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
1051 if (iRef[LIST_0] == 0) {
1052 ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
1053 ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1054 if (pMotionVector != NULL) {
1055 ST32 (pMotionVector[LIST_0][iCacheIdx], 0);
1056 }
1057 if (pMvdCache != NULL) {
1058 ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
1059 }
1060 }
1061 if (iRef[LIST_1] == 0) {
1062 ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
1063 ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1064 if (pMotionVector != NULL) {
1065 ST32 (pMotionVector[LIST_1][iCacheIdx], 0);
1066 }
1067 if (pMvdCache != NULL) {
1068 ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
1069 }
1070 }
1071 }
1072 }
1073 }
1074 }
1075 }
1076
FillTemporalDirect8x8Mv(PDqLayer pCurDqLayer,const int16_t & iIdx8,const int8_t & iPartCount,const int8_t & iPartW,const SubMbType & subMbType,int8_t iRef[LIST_A],int16_t (* mvColoc)[2],int16_t pMotionVector[LIST_A][30][MV_A],int16_t pMvdCache[LIST_A][30][MV_A])1077 void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount,
1078 const int8_t& iPartW,
1079 const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], int16_t pMotionVector[LIST_A][30][MV_A],
1080 int16_t pMvdCache[LIST_A][30][MV_A]) {
1081 PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
1082 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1083 int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
1084 for (int32_t j = 0; j < iPartCount; j++) {
1085 int8_t iPartIdx = iIdx8 + j * iPartW;
1086 uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
1087 uint8_t iColocIdx = g_kuiScan4[iPartIdx];
1088 uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
1089
1090 int16_t* mv = mvColoc[iColocIdx];
1091
1092 int16_t pMV[4] = { 0 };
1093 if (IS_SUB_8x8 (subMbType)) {
1094 if (!pCurDqLayer->iColocIntra[iColocIdx]) {
1095 pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
1096 pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
1097 }
1098 ST32 (pMV, LD32 (pMvDirect[LIST_0]));
1099 ST32 ((pMV + 2), LD32 (pMvDirect[LIST_0]));
1100 ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
1101 ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
1102 ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1103 ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
1104 if (pMotionVector != NULL) {
1105 ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
1106 ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
1107 }
1108 if (pMvdCache != NULL) {
1109 ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
1110 ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
1111 }
1112 if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
1113 pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
1114 pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
1115 }
1116 ST32 (pMV, LD32 (pMvDirect[LIST_1]));
1117 ST32 ((pMV + 2), LD32 (pMvDirect[LIST_1]));
1118 ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
1119 ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
1120 ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1121 ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
1122 if (pMotionVector != NULL) {
1123 ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
1124 ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
1125 }
1126 if (pMvdCache != NULL) {
1127 ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
1128 ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
1129 }
1130 } else { //SUB_4x4
1131 if (!pCurDqLayer->iColocIntra[iColocIdx]) {
1132 pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
1133 pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
1134 }
1135 ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0]));
1136 ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
1137 if (pMotionVector != NULL) {
1138 ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMvDirect[LIST_0]));
1139 }
1140 if (pMvdCache != NULL) {
1141 ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
1142 }
1143 if (!pCurDqLayer->iColocIntra[iColocIdx]) {
1144 pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
1145 pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
1146 }
1147 ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1]));
1148 ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
1149 if (pMotionVector != NULL) {
1150 ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMvDirect[LIST_1]));
1151 }
1152 if (pMvdCache != NULL) {
1153 ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
1154 }
1155 }
1156 }
1157 }
MapColToList0(PWelsDecoderContext & pCtx,const int8_t & colocRefIndexL0,const int32_t & ref0Count)1158 int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0,
1159 const int32_t& ref0Count) { //ISO/IEC 14496-10:2009(E) (8-193)
1160 //When reference is lost, this function must be skipped.
1161 if ((pCtx->iErrorCode & dsRefLost) == dsRefLost) {
1162 return 0;
1163 }
1164 PPicture pic1 = pCtx->sRefPic.pRefList[LIST_1][0];
1165 if (pic1 && pic1->pRefPic[LIST_0][colocRefIndexL0]) {
1166 const int32_t iFramePoc = pic1->pRefPic[LIST_0][colocRefIndexL0]->iFramePoc;
1167 for (int32_t i = 0; i < ref0Count; i++) {
1168 if (pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc == iFramePoc) {
1169 return i;
1170 }
1171 }
1172 }
1173 return 0;
1174 }
Update8x8RefIdx(PDqLayer & pCurDqLayer,const int16_t & iPartIdx,const int32_t & listIdx,const int8_t & iRef)1175 void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef) {
1176 int32_t iMbXy = pCurDqLayer->iMbXyIndex;
1177 const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
1178 pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] =
1179 pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx +
1180 5] = iRef;
1181
1182 }
1183 } // namespace WelsDec
1184