1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file md.c
33 *
34 * \brief mode decision
35 *
36 * \date 2009.05.14 Created
37 *
38 *************************************************************************************
39 */
40
41 #include "ls_defines.h"
42 #include "md.h"
43 #include "cpu_core.h"
44 #include "svc_enc_golomb.h"
45
46 namespace WelsEnc {
47 #define INTRA_VARIANCE_SAD_THRESHOLD 150
48 #define INTER_VARIANCE_SAD_THRESHOLD 20
49
50 //fill cache of neighbor MB, containing pNonZeroCount, sample_avail, pIntra4x4PredMode
FillNeighborCacheIntra(SMbCache * pMbCache,SMB * pCurMb,int32_t iMbWidth)51 void FillNeighborCacheIntra (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth) {
52 uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail;
53 uint32_t uiNeighborIntra = 0;
54
55 if (uiNeighborAvail & LEFT_MB_POS) { //LEFT MB
56 int8_t* pLeftMbNonZeroCount = pCurMb->pNonZeroCount - MB_LUMA_CHROMA_BLOCK4x4_NUM;
57 pMbCache->iNonZeroCoeffCount[8] = pLeftMbNonZeroCount[ 3];
58 pMbCache->iNonZeroCoeffCount[16] = pLeftMbNonZeroCount[ 7];
59 pMbCache->iNonZeroCoeffCount[24] = pLeftMbNonZeroCount[11];
60 pMbCache->iNonZeroCoeffCount[32] = pLeftMbNonZeroCount[15];
61
62 pMbCache->iNonZeroCoeffCount[ 13] = pLeftMbNonZeroCount[17];
63 pMbCache->iNonZeroCoeffCount[21] = pLeftMbNonZeroCount[21];
64 pMbCache->iNonZeroCoeffCount[37] = pLeftMbNonZeroCount[19];
65 pMbCache->iNonZeroCoeffCount[45] = pLeftMbNonZeroCount[23];
66
67 uiNeighborIntra |= LEFT_MB_POS;
68
69 if (IS_INTRA4x4 ((pCurMb - 1)->uiMbType)) {
70 int8_t* pLeftMbIntra4x4PredMode = pCurMb->pIntra4x4PredMode - INTRA_4x4_MODE_NUM;
71 pMbCache->iIntraPredMode[8] = pLeftMbIntra4x4PredMode[4];
72 pMbCache->iIntraPredMode[16] = pLeftMbIntra4x4PredMode[5];
73 pMbCache->iIntraPredMode[24] = pLeftMbIntra4x4PredMode[6];
74 pMbCache->iIntraPredMode[32] = pLeftMbIntra4x4PredMode[3];
75 } else { // if ( 0 == constrained_intra_pred_flag || IS_INTRA16x16((pCurMb-1)->uiMbType ))
76 pMbCache->iIntraPredMode[8] =
77 pMbCache->iIntraPredMode[16] =
78 pMbCache->iIntraPredMode[24] =
79 pMbCache->iIntraPredMode[32] = 2; //DC
80 }
81 } else {
82 pMbCache->iNonZeroCoeffCount[ 8] =
83 pMbCache->iNonZeroCoeffCount[16] =
84 pMbCache->iNonZeroCoeffCount[24] =
85 pMbCache->iNonZeroCoeffCount[32] = -1;//unavailable
86 pMbCache->iNonZeroCoeffCount[13] =
87 pMbCache->iNonZeroCoeffCount[21] =
88 pMbCache->iNonZeroCoeffCount[37] =
89 pMbCache->iNonZeroCoeffCount[45] = -1;//unavailable
90
91 pMbCache->iIntraPredMode[8] =
92 pMbCache->iIntraPredMode[16] =
93 pMbCache->iIntraPredMode[24] =
94 pMbCache->iIntraPredMode[32] = -1;//unavailable
95 }
96
97 if (uiNeighborAvail & TOP_MB_POS) { //TOP MB
98 SMB* pTopMb = pCurMb - iMbWidth;
99 ST32 (&pMbCache->iNonZeroCoeffCount[1], LD32 (&pTopMb->pNonZeroCount[12]));
100
101 ST16 (&pMbCache->iNonZeroCoeffCount[6], LD16 (&pTopMb->pNonZeroCount[20]));
102 ST16 (&pMbCache->iNonZeroCoeffCount[30], LD16 (&pTopMb->pNonZeroCount[22]));
103
104 uiNeighborIntra |= TOP_MB_POS;
105
106 if (IS_INTRA4x4 (pTopMb->uiMbType)) {
107 ST32 (pMbCache->iIntraPredMode + 1, LD32 (&pTopMb->pIntra4x4PredMode[0]));
108 } else { // if ( 0 == constrained_intra_pred_flag || IS_INTRA16x16( pTopMb->uiMbType ))
109 const uint32_t kuiDc32 = 0x02020202;
110 ST32 (pMbCache->iIntraPredMode + 1 , kuiDc32);
111 }
112 } else {
113 const uint32_t kuiUnavail32 = 0xffffffff;
114 ST32 (pMbCache->iIntraPredMode + 1 , kuiUnavail32);
115 ST32 (&pMbCache->iNonZeroCoeffCount[1], kuiUnavail32);
116
117 ST16 (&pMbCache->iNonZeroCoeffCount[6], 0xffff);
118 ST16 (&pMbCache->iNonZeroCoeffCount[30], 0xffff);
119 }
120
121 if (uiNeighborAvail & TOPLEFT_MB_POS) {
122 uiNeighborIntra |= 0x04;
123 }
124
125
126 if (uiNeighborAvail & TOPRIGHT_MB_POS) {
127 uiNeighborIntra |= 0x08;
128 }
129 pMbCache->uiNeighborIntra = uiNeighborIntra;
130 }
131 //fill cache of neighbor MB, containing motion_vector and uiRefIndex
FillNeighborCacheInterWithoutBGD(SMbCache * pMbCache,SMB * pCurMb,int32_t iMbWidth,int8_t * pVaaBgMbFlag)132 void FillNeighborCacheInterWithoutBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag) {
133 uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail;
134 SMB* pLeftMb = pCurMb - 1 ;
135 SMB* pTopMb = pCurMb - iMbWidth;
136 SMB* pLeftTopMb = pCurMb - iMbWidth - 1 ;
137 SMB* iRightTopMb = pCurMb - iMbWidth + 1 ;
138 SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
139 if ((uiNeighborAvail & LEFT_MB_POS) && IS_SVC_INTER (pLeftMb->uiMbType)) {
140 pMvComp->sMotionVectorCache[ 6] = pLeftMb->sMv[ 3];
141 pMvComp->sMotionVectorCache[12] = pLeftMb->sMv[ 7];
142 pMvComp->sMotionVectorCache[18] = pLeftMb->sMv[11];
143 pMvComp->sMotionVectorCache[24] = pLeftMb->sMv[15];
144 pMvComp->iRefIndexCache[ 6] = pLeftMb->pRefIndex[1];
145 pMvComp->iRefIndexCache[12] = pLeftMb->pRefIndex[1];
146 pMvComp->iRefIndexCache[18] = pLeftMb->pRefIndex[3];
147 pMvComp->iRefIndexCache[24] = pLeftMb->pRefIndex[3];
148 pMbCache->iSadCost[3] = pLeftMb->pSadCost[0];
149
150 if (pLeftMb->uiMbType == MB_TYPE_SKIP) {
151 pMbCache->bMbTypeSkip[3] = 1;
152 pMbCache->iSadCostSkip[3] = pMbCache->pEncSad[-1];
153 } else {
154 pMbCache->bMbTypeSkip[3] = 0;
155 pMbCache->iSadCostSkip[3] = 0;
156 }
157 } else { //avail or non-inter
158 ST32 (&pMvComp->sMotionVectorCache[ 6], 0);
159 ST32 (&pMvComp->sMotionVectorCache[12], 0);
160 ST32 (&pMvComp->sMotionVectorCache[18], 0);
161 ST32 (&pMvComp->sMotionVectorCache[24], 0);
162 pMvComp->iRefIndexCache[ 6] =
163 pMvComp->iRefIndexCache[12] =
164 pMvComp->iRefIndexCache[18] =
165 pMvComp->iRefIndexCache[24] = (uiNeighborAvail & LEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
166 pMbCache->iSadCost[3] = 0;
167 pMbCache->bMbTypeSkip[3] = 0;
168 pMbCache->iSadCostSkip[3] = 0;
169 }
170
171 if ((uiNeighborAvail & TOP_MB_POS) && IS_SVC_INTER (pTopMb->uiMbType)) { //TOP MB
172 ST64 (&pMvComp->sMotionVectorCache[1], LD64 (&pTopMb->sMv[12]));
173 ST64 (&pMvComp->sMotionVectorCache[3], LD64 (&pTopMb->sMv[14]));
174 pMvComp->iRefIndexCache[1] = pTopMb->pRefIndex[2];
175 pMvComp->iRefIndexCache[2] = pTopMb->pRefIndex[2];
176 pMvComp->iRefIndexCache[3] = pTopMb->pRefIndex[3];
177 pMvComp->iRefIndexCache[4] = pTopMb->pRefIndex[3];
178 pMbCache->iSadCost[1] = pTopMb->pSadCost[0];
179
180 if (pTopMb->uiMbType == MB_TYPE_SKIP) {
181 pMbCache->bMbTypeSkip[1] = 1;
182 pMbCache->iSadCostSkip[1] = pMbCache->pEncSad[-iMbWidth];
183 } else {
184 pMbCache->bMbTypeSkip[1] = 0;
185 pMbCache->iSadCostSkip[1] = 0;
186 }
187 } else { //unavail
188 ST64 (&pMvComp->sMotionVectorCache[1], 0);
189 ST64 (&pMvComp->sMotionVectorCache[3], 0);
190 pMvComp->iRefIndexCache[1] =
191 pMvComp->iRefIndexCache[2] =
192 pMvComp->iRefIndexCache[3] =
193 pMvComp->iRefIndexCache[4] = (uiNeighborAvail & TOP_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
194 pMbCache->iSadCost[1] = 0;
195
196 pMbCache->bMbTypeSkip[1] = 0;
197 pMbCache->iSadCostSkip[1] = 0;
198 }
199
200 if ((uiNeighborAvail & TOPLEFT_MB_POS) && IS_SVC_INTER (pLeftTopMb->uiMbType)) { //LEFT_TOP MB
201 pMvComp->sMotionVectorCache[0] = pLeftTopMb->sMv[15];
202 pMvComp->iRefIndexCache[0] = pLeftTopMb->pRefIndex[3];
203 pMbCache->iSadCost[0] = pLeftTopMb->pSadCost[0];
204
205 if (pLeftTopMb->uiMbType == MB_TYPE_SKIP) {
206 pMbCache->bMbTypeSkip[0] = 1;
207 pMbCache->iSadCostSkip[0] = pMbCache->pEncSad[-iMbWidth - 1];
208 } else {
209 pMbCache->bMbTypeSkip[0] = 0;
210 pMbCache->iSadCostSkip[0] = 0;
211 }
212 } else { //unavail
213 ST32 (&pMvComp->sMotionVectorCache[0], 0);
214 pMvComp->iRefIndexCache[0] = (uiNeighborAvail & TOPLEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
215 pMbCache->iSadCost[0] = 0;
216 pMbCache->bMbTypeSkip[0] = 0;
217 pMbCache->iSadCostSkip[0] = 0;
218 }
219
220 if ((uiNeighborAvail & TOPRIGHT_MB_POS) && IS_SVC_INTER (iRightTopMb->uiMbType)) { //RIGHT_TOP MB
221 pMvComp->sMotionVectorCache[5] = iRightTopMb->sMv[12];
222 pMvComp->iRefIndexCache[5] = iRightTopMb->pRefIndex[2];
223 pMbCache->iSadCost[2] = iRightTopMb->pSadCost[0];
224
225 if (iRightTopMb->uiMbType == MB_TYPE_SKIP) {
226 pMbCache->bMbTypeSkip[2] = 1;
227 pMbCache->iSadCostSkip[2] = pMbCache->pEncSad[-iMbWidth + 1];
228 } else {
229 pMbCache->bMbTypeSkip[2] = 0;
230 pMbCache->iSadCostSkip[2] = 0;
231 }
232 } else { //unavail
233 ST32 (&pMvComp->sMotionVectorCache[5], 0);
234 pMvComp->iRefIndexCache[5] = (uiNeighborAvail & TOPRIGHT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
235 pMbCache->iSadCost[2] = 0;
236 pMbCache->bMbTypeSkip[2] = 0;
237 pMbCache->iSadCostSkip[2] = 0;
238 }
239
240 //right-top 4*4 pBlock unavailable
241 ST32 (&pMvComp->sMotionVectorCache[ 9], 0);
242 ST32 (&pMvComp->sMotionVectorCache[21], 0);
243 ST32 (&pMvComp->sMotionVectorCache[11], 0);
244 ST32 (&pMvComp->sMotionVectorCache[17], 0);
245 ST32 (&pMvComp->sMotionVectorCache[23], 0);
246 pMvComp->iRefIndexCache[ 9] =
247 pMvComp->iRefIndexCache[11] =
248 pMvComp->iRefIndexCache[17] =
249 pMvComp->iRefIndexCache[21] =
250 pMvComp->iRefIndexCache[23] = REF_NOT_AVAIL;
251 }
252
FillNeighborCacheInterWithBGD(SMbCache * pMbCache,SMB * pCurMb,int32_t iMbWidth,int8_t * pVaaBgMbFlag)253 void FillNeighborCacheInterWithBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag) {
254 uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail;
255 SMB* pLeftMb = pCurMb - 1 ;
256 SMB* pTopMb = pCurMb - iMbWidth;
257 SMB* pLeftTopMb = pCurMb - iMbWidth - 1 ;
258 SMB* iRightTopMb = pCurMb - iMbWidth + 1 ;
259 SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
260
261 if ((uiNeighborAvail & LEFT_MB_POS) && IS_SVC_INTER (pLeftMb->uiMbType)) {
262 pMvComp->sMotionVectorCache[ 6] = pLeftMb->sMv[ 3];
263 pMvComp->sMotionVectorCache[12] = pLeftMb->sMv[ 7];
264 pMvComp->sMotionVectorCache[18] = pLeftMb->sMv[11];
265 pMvComp->sMotionVectorCache[24] = pLeftMb->sMv[15];
266 pMvComp->iRefIndexCache[ 6] = pLeftMb->pRefIndex[1];
267 pMvComp->iRefIndexCache[12] = pLeftMb->pRefIndex[1];
268 pMvComp->iRefIndexCache[18] = pLeftMb->pRefIndex[3];
269 pMvComp->iRefIndexCache[24] = pLeftMb->pRefIndex[3];
270 pMbCache->iSadCost[3] = pLeftMb->pSadCost[0];
271
272 if (pLeftMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-1] == 0) {
273 pMbCache->bMbTypeSkip[3] = 1;
274 pMbCache->iSadCostSkip[3] = pMbCache->pEncSad[-1];
275 } else {
276 pMbCache->bMbTypeSkip[3] = 0;
277 pMbCache->iSadCostSkip[3] = 0;
278 }
279 } else { //avail or non-inter
280 ST32 (&pMvComp->sMotionVectorCache[ 6], 0);
281 ST32 (&pMvComp->sMotionVectorCache[12], 0);
282 ST32 (&pMvComp->sMotionVectorCache[18], 0);
283 ST32 (&pMvComp->sMotionVectorCache[24], 0);
284 pMvComp->iRefIndexCache[ 6] =
285 pMvComp->iRefIndexCache[12] =
286 pMvComp->iRefIndexCache[18] =
287 pMvComp->iRefIndexCache[24] = (uiNeighborAvail & LEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
288 pMbCache->iSadCost[3] = 0;
289 pMbCache->bMbTypeSkip[3] = 0;
290 pMbCache->iSadCostSkip[3] = 0;
291 }
292
293 if ((uiNeighborAvail & TOP_MB_POS) && IS_SVC_INTER (pTopMb->uiMbType)) { //TOP MB
294 ST64 (&pMvComp->sMotionVectorCache[1], LD64 (&pTopMb->sMv[12]));
295 ST64 (&pMvComp->sMotionVectorCache[3], LD64 (&pTopMb->sMv[14]));
296 pMvComp->iRefIndexCache[1] = pTopMb->pRefIndex[2];
297 pMvComp->iRefIndexCache[2] = pTopMb->pRefIndex[2];
298 pMvComp->iRefIndexCache[3] = pTopMb->pRefIndex[3];
299 pMvComp->iRefIndexCache[4] = pTopMb->pRefIndex[3];
300 pMbCache->iSadCost[1] = pTopMb->pSadCost[0];
301 if (pTopMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-iMbWidth] == 0) {
302 pMbCache->bMbTypeSkip[1] = 1;
303 pMbCache->iSadCostSkip[1] = pMbCache->pEncSad[-iMbWidth];
304 } else {
305 pMbCache->bMbTypeSkip[1] = 0;
306 pMbCache->iSadCostSkip[1] = 0;
307 }
308 } else { //unavail
309 ST64 (&pMvComp->sMotionVectorCache[1], 0);
310 ST64 (&pMvComp->sMotionVectorCache[3], 0);
311 pMvComp->iRefIndexCache[1] =
312 pMvComp->iRefIndexCache[2] =
313 pMvComp->iRefIndexCache[3] =
314 pMvComp->iRefIndexCache[4] = (uiNeighborAvail & TOP_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
315 pMbCache->iSadCost[1] = 0;
316 pMbCache->bMbTypeSkip[1] = 0;
317 pMbCache->iSadCostSkip[1] = 0;
318 }
319
320
321 if ((uiNeighborAvail & TOPLEFT_MB_POS) && IS_SVC_INTER (pLeftTopMb->uiMbType)) { //LEFT_TOP MB
322 pMvComp->sMotionVectorCache[0] = pLeftTopMb->sMv[15];
323 pMvComp->iRefIndexCache[0] = pLeftTopMb->pRefIndex[3];
324 pMbCache->iSadCost[0] = pLeftTopMb->pSadCost[0];
325
326 if (pLeftTopMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-iMbWidth - 1] == 0) {
327 pMbCache->bMbTypeSkip[0] = 1;
328 pMbCache->iSadCostSkip[0] = pMbCache->pEncSad[-iMbWidth - 1];
329 } else {
330 pMbCache->bMbTypeSkip[0] = 0;
331 pMbCache->iSadCostSkip[0] = 0;
332 }
333 } else { //unavail
334 ST32 (&pMvComp->sMotionVectorCache[0], 0);
335 pMvComp->iRefIndexCache[0] = (uiNeighborAvail & TOPLEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
336 pMbCache->iSadCost[0] = 0;
337 pMbCache->bMbTypeSkip[0] = 0;
338 pMbCache->iSadCostSkip[0] = 0;
339 }
340
341 if ((uiNeighborAvail & TOPRIGHT_MB_POS) && IS_SVC_INTER (iRightTopMb->uiMbType)) { //RIGHT_TOP MB
342 pMvComp->sMotionVectorCache[5] = iRightTopMb->sMv[12];
343 pMvComp->iRefIndexCache[5] = iRightTopMb->pRefIndex[2];
344 pMbCache->iSadCost[2] = iRightTopMb->pSadCost[0];
345
346 if (iRightTopMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-iMbWidth + 1] == 0) {
347 pMbCache->bMbTypeSkip[2] = 1;
348 pMbCache->iSadCostSkip[2] = pMbCache->pEncSad[-iMbWidth + 1];
349 } else {
350 pMbCache->bMbTypeSkip[2] = 0;
351 pMbCache->iSadCostSkip[2] = 0;
352 }
353 } else { //unavail
354 ST32 (&pMvComp->sMotionVectorCache[5], 0);
355 pMvComp->iRefIndexCache[5] = (uiNeighborAvail & TOPRIGHT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
356 pMbCache->iSadCost[2] = 0;
357 pMbCache->bMbTypeSkip[2] = 0;
358 pMbCache->iSadCostSkip[2] = 0;
359 }
360
361 //right-top 4*4 pBlock unavailable
362 ST32 (&pMvComp->sMotionVectorCache[ 9], 0);
363 ST32 (&pMvComp->sMotionVectorCache[21], 0);
364 ST32 (&pMvComp->sMotionVectorCache[11], 0);
365 ST32 (&pMvComp->sMotionVectorCache[17], 0);
366 ST32 (&pMvComp->sMotionVectorCache[23], 0);
367 pMvComp->iRefIndexCache[ 9] =
368 pMvComp->iRefIndexCache[11] =
369 pMvComp->iRefIndexCache[17] =
370 pMvComp->iRefIndexCache[21] =
371 pMvComp->iRefIndexCache[23] = REF_NOT_AVAIL;
372 }
373
InitFillNeighborCacheInterFunc(SWelsFuncPtrList * pFuncList,const int32_t kiFlag)374 void InitFillNeighborCacheInterFunc (SWelsFuncPtrList* pFuncList, const int32_t kiFlag) {
375 pFuncList->pfFillInterNeighborCache = kiFlag ? FillNeighborCacheInterWithBGD : FillNeighborCacheInterWithoutBGD;
376 }
377
UpdateMbMv_c(SMVUnitXY * pMvBuffer,const SMVUnitXY ksMv)378 void UpdateMbMv_c (SMVUnitXY* pMvBuffer, const SMVUnitXY ksMv) {
379 int32_t k = 0;
380 for (; k < MB_BLOCK4x4_NUM; k += 4) {
381 pMvBuffer[k ] =
382 pMvBuffer[k + 1] =
383 pMvBuffer[k + 2] =
384 pMvBuffer[k + 3] = ksMv;
385 }
386 }
387
388
MdInterAnalysisVaaInfo_c(int32_t * pSad8x8)389 uint8_t MdInterAnalysisVaaInfo_c (int32_t* pSad8x8) {
390 int32_t iSadBlock[4], iAverageSadBlock[4];
391 int32_t iAverageSad, iVarianceSad;
392
393 iSadBlock[0] = pSad8x8[0];
394 iAverageSad = iSadBlock[0];
395
396 iSadBlock[1] = pSad8x8[1];
397 iAverageSad += iSadBlock[1];
398
399 iSadBlock[2] = pSad8x8[2];
400 iAverageSad += iSadBlock[2];
401
402 iSadBlock[3] = pSad8x8[3];
403 iAverageSad += iSadBlock[3];
404
405 iAverageSad = iAverageSad >> 2;
406
407 iAverageSadBlock[0] = (iSadBlock[0] >> 6) - (iAverageSad >> 6);
408 iVarianceSad = iAverageSadBlock[0] * iAverageSadBlock[0];
409
410 iAverageSadBlock[1] = (iSadBlock[1] >> 6) - (iAverageSad >> 6);
411 iVarianceSad += iAverageSadBlock[1] * iAverageSadBlock[1];
412
413 iAverageSadBlock[2] = (iSadBlock[2] >> 6) - (iAverageSad >> 6);
414 iVarianceSad += iAverageSadBlock[2] * iAverageSadBlock[2];
415
416 iAverageSadBlock[3] = (iSadBlock[3] >> 6) - (iAverageSad >> 6);
417 iVarianceSad += iAverageSadBlock[3] * iAverageSadBlock[3];
418
419 if (iVarianceSad < INTER_VARIANCE_SAD_THRESHOLD) {
420 return 15;
421 }
422
423 uint8_t uiMbSign = 0;
424 if (iSadBlock[0] > iAverageSad)
425 uiMbSign |= 0x08;
426 if (iSadBlock[1] > iAverageSad)
427 uiMbSign |= 0x04;
428 if (iSadBlock[2] > iAverageSad)
429 uiMbSign |= 0x02;
430 if (iSadBlock[3] > iAverageSad)
431 uiMbSign |= 0x01;
432 return (uiMbSign);
433 }
434
AnalysisVaaInfoIntra_c(uint8_t * pDataY,const int32_t kiLineSize)435 int32_t AnalysisVaaInfoIntra_c (uint8_t* pDataY, const int32_t kiLineSize) {
436 ENFORCE_STACK_ALIGN_1D (uint16_t, uiAvgBlock, 16, 16)
437 uint16_t* pBlock = &uiAvgBlock[0];
438 uint8_t* pEncData = pDataY;
439 const int32_t kiLineSize2 = kiLineSize << 1;
440 const int32_t kiLineSize3 = kiLineSize + kiLineSize2;
441 const int32_t kiLineSize4 = kiLineSize << 2;
442 int32_t i = 0, j = 0, num = 0;
443 int32_t iSumAvg = 0, iSumSqr = 0;
444
445 // analysis_vaa_info_intra_core_c( pDataY, iLineSize, pBlock );
446 for (; j < 16; j += 4) {
447 num = 0;
448 for (i = 0; i < 16; i += 4, num ++) {
449 pBlock[num] = pEncData[i ] + pEncData[i + 1 ] + pEncData[i + 2 ] + pEncData[i +
450 3 ];
451 pBlock[num] += pEncData[i + kiLineSize ] + pEncData[i + kiLineSize + 1] + pEncData[i + kiLineSize + 2] + pEncData[i +
452 kiLineSize + 3];
453 pBlock[num] += pEncData[i + kiLineSize2] + pEncData[i + kiLineSize2 + 1] + pEncData[i + kiLineSize2 + 2] + pEncData[i +
454 kiLineSize2 + 3];
455 pBlock[num] += pEncData[i + kiLineSize3] + pEncData[i + kiLineSize3 + 1] + pEncData[i + kiLineSize3 + 2] + pEncData[i +
456 kiLineSize3 + 3];
457 pBlock[num] >>= 4;
458 }
459 pBlock += 4;
460 pEncData += kiLineSize4;
461 }
462
463 pBlock = &uiAvgBlock[0];
464 i = 4;
465 for (; i > 0; --i) {
466 iSumAvg += pBlock[0] + pBlock[1] + pBlock[2] + pBlock[3];
467 iSumSqr += pBlock[0] * pBlock[0] + pBlock[1] * pBlock[1] + pBlock[2] * pBlock[2] + pBlock[3] * pBlock[3];
468
469 pBlock += 4;
470 }
471
472
473 return /*variance =*/ (iSumSqr - ((iSumAvg * iSumAvg) >> 4));
474 }
475
476 // for pfGetVarianceFromIntraVaa function ptr adaptive by CPU features, 6/7/2010
InitIntraAnalysisVaaInfo(SWelsFuncPtrList * pFuncList,const uint32_t kuiCpuFlag)477 void InitIntraAnalysisVaaInfo (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag) {
478 pFuncList->pfGetVarianceFromIntraVaa = AnalysisVaaInfoIntra_c;
479 pFuncList->pfGetMbSignFromInterVaa = MdInterAnalysisVaaInfo_c;
480 pFuncList->pfUpdateMbMv = UpdateMbMv_c;
481
482 #if defined(X86_ASM)
483 if ((kuiCpuFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) {
484 pFuncList->pfGetVarianceFromIntraVaa = AnalysisVaaInfoIntra_sse2;
485 pFuncList->pfGetMbSignFromInterVaa = MdInterAnalysisVaaInfo_sse2;
486 pFuncList->pfUpdateMbMv = UpdateMbMv_sse2;
487 }
488 if ((kuiCpuFlag & WELS_CPU_SSSE3) == WELS_CPU_SSSE3) {
489 pFuncList->pfGetVarianceFromIntraVaa = AnalysisVaaInfoIntra_ssse3;
490 }
491 if ((kuiCpuFlag & WELS_CPU_SSE41) == WELS_CPU_SSE41) {
492 pFuncList->pfGetMbSignFromInterVaa = MdInterAnalysisVaaInfo_sse41;
493 }
494 #endif//X86_ASM
495 }
496
MdIntraAnalysisVaaInfo(sWelsEncCtx * pEncCtx,uint8_t * pEncMb)497 bool MdIntraAnalysisVaaInfo (sWelsEncCtx* pEncCtx, uint8_t* pEncMb) {
498
499 SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
500 const int32_t kiLineSize = pCurDqLayer->iEncStride[0];
501 const int32_t kiVariance = pEncCtx->pFuncList->pfGetVarianceFromIntraVaa (pEncMb, kiLineSize);
502 return (kiVariance >= INTRA_VARIANCE_SAD_THRESHOLD);
503 }
504
InitMeRefinePointer(SMeRefinePointer * pMeRefine,SMbCache * pMbCache,int32_t iStride)505 void InitMeRefinePointer (SMeRefinePointer* pMeRefine, SMbCache* pMbCache, int32_t iStride) {
506 pMeRefine->pHalfPixH = &pMbCache->pBufferInterPredMe[0] + iStride;
507 pMeRefine->pHalfPixV = &pMbCache->pBufferInterPredMe[640] + iStride;
508
509 pMeRefine->pQuarPixBest = &pMbCache->pBufferInterPredMe[1280] + iStride;
510 pMeRefine->pQuarPixTmp = &pMbCache->pBufferInterPredMe[1920] + iStride;
511 }
512 typedef struct TagQuarParams {
513 int32_t iBestCost;
514 int32_t iBestHalfPix;
515 int32_t iStrideA;
516 int32_t iStrideB;
517 uint8_t* pRef;
518 uint8_t* pSrcB[4];
519 uint8_t* pSrcA[4];
520 int32_t iLms[4];
521 int32_t iBestQuarPix;
522 } SQuarRefineParams;
523
524 #define SWITCH_BEST_TMP_BUF(prev_best, curr_best){\
525 pParams->iBestCost = iCurCost;\
526 pTmp = prev_best;\
527 prev_best = curr_best;\
528 curr_best = pTmp;\
529 }
530 #define CALC_COST(me_buf, lm) ( pFunc->sSampleDealingFuncs.pfMeCost[kuiPixel](pEncMb, iStrideEnc, me_buf, ME_REFINE_BUF_STRIDE) + lm )
531
MeRefineQuarPixel(SWelsFuncPtrList * pFunc,SWelsME * pMe,SMeRefinePointer * pMeRefine,const int32_t kiWidth,const int32_t kiHeight,SQuarRefineParams * pParams,int32_t iStrideEnc)532 inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine,
533 const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) {
534 PWelsSampleAveragingFunc pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
535 int32_t iCurCost;
536 uint8_t* pEncMb = pMe->pEncMb;
537 uint8_t* pTmp = NULL;
538 const uint8_t kuiPixel = pMe->uiBlockSize;
539
540 pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
541 pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight);
542
543 iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]);
544 if (iCurCost < pParams->iBestCost) {
545 pParams->iBestQuarPix = ME_QUAR_PIXEL_TOP;
546 SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
547 }
548 //=========================(0, 1)=======================//
549 pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
550 ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight);
551 iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]);
552 if (iCurCost < pParams->iBestCost) {
553 pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM;
554 SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
555 }
556 //==========================(-1, 0)=========================//
557 pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
558 ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight);
559 iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]);
560 if (iCurCost < pParams->iBestCost) {
561 pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT;
562 SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
563 }
564 //==========================(1, 0)=========================//
565 pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
566 ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiWidth, kiHeight);
567
568 iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]);
569 if (iCurCost < pParams->iBestCost) {
570 pParams->iBestQuarPix = ME_QUAR_PIXEL_RIGHT;
571 SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
572 }
573 }
574
MeRefineFracPixel(sWelsEncCtx * pEncCtx,uint8_t * pMemPredInterMb,SWelsME * pMe,SMeRefinePointer * pMeRefine,int32_t iWidth,int32_t iHeight)575 void MeRefineFracPixel (sWelsEncCtx* pEncCtx, uint8_t* pMemPredInterMb, SWelsME* pMe,
576 SMeRefinePointer* pMeRefine, int32_t iWidth, int32_t iHeight) {
577 SWelsFuncPtrList* pFunc = pEncCtx->pFuncList;
578 int16_t iMvx = pMe->sMv.iMvX;
579 int16_t iMvy = pMe->sMv.iMvY;
580
581 int16_t iHalfMvx = iMvx;
582 int16_t iHalfMvy = iMvy;
583 const int32_t kiStrideEnc = pEncCtx->pCurDqLayer->iEncStride[0];
584 const int32_t kiStrideRef = pEncCtx->pCurDqLayer->pRefPic->iLineSize[0];
585
586 uint8_t* pEncData = pMe->pEncMb;
587 uint8_t* pRef = pMe->pRefMb;//091010
588
589 int32_t iBestQuarPix = ME_NO_BEST_QUAR_PIXEL;
590
591 SQuarRefineParams sParams;
592 static const int32_t iMvQuarAddX[10] = {0, 0, -1, 1, 0, 0, 0, -1, 1, 0};
593 const int32_t* pMvQuarAddY = iMvQuarAddX + 3;
594 uint8_t* pBestPredInter = pRef;
595 int32_t iInterBlk4Stride = ME_REFINE_BUF_STRIDE;
596
597 int32_t iBestCost;
598 int32_t iCurCost;
599 int32_t iBestHalfPix;
600
601 if (pEncCtx->pCurDqLayer->bSatdInMdFlag) {
602 iBestCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
603 } else {
604 iBestCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pRef, kiStrideRef) +
605 COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
606 }
607
608 iBestHalfPix = REFINE_ME_NO_BEST_HALF_PIXEL;
609
610 pFunc->sMcFuncs.pfLumaHalfpelVer (pRef - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixV, ME_REFINE_BUF_STRIDE, iWidth,
611 iHeight + 1);
612
613 //step 1: get [iWidth][iHeight+1] half pixel from vertical filter
614 //===========================(0, -2)==============================//
615 iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixV,
616 ME_REFINE_BUF_STRIDE) +
617 COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - 2 - pMe->sMvp.iMvY);
618 if (iCurCost < iBestCost) {
619 iBestCost = iCurCost;
620 iBestHalfPix = REFINE_ME_HALF_PIXEL_TOP;
621 pBestPredInter = pMeRefine->pHalfPixV;
622 }
623 //===========================(0, 2)==============================//
624 iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc,
625 pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE, ME_REFINE_BUF_STRIDE) +
626 COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy + 2 - pMe->sMvp.iMvY);
627 if (iCurCost < iBestCost) {
628 iBestCost = iCurCost;
629 iBestHalfPix = REFINE_ME_HALF_PIXEL_BOTTOM;
630 pBestPredInter = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE;
631 }
632 pFunc->sMcFuncs.pfLumaHalfpelHor (pRef - 1, kiStrideRef, pMeRefine->pHalfPixH, ME_REFINE_BUF_STRIDE, iWidth + 1,
633 iHeight);
634 //step 2: get [iWidth][iHeight+1] half pixel from horizon filter
635
636 //===========================(-2, 0)==============================//
637 iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixH,
638 ME_REFINE_BUF_STRIDE) +
639 COST_MVD (pMe->pMvdCost, iMvx - 2 - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
640 if (iCurCost < iBestCost) {
641 iBestCost = iCurCost;
642 iBestHalfPix = REFINE_ME_HALF_PIXEL_LEFT;
643 pBestPredInter = pMeRefine->pHalfPixH;
644 }
645 //===========================(2, 0)===============================//
646 iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixH + 1,
647 ME_REFINE_BUF_STRIDE) +
648 COST_MVD (pMe->pMvdCost, iMvx + 2 - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
649 if (iCurCost < iBestCost) {
650 iBestCost = iCurCost;
651 iBestHalfPix = REFINE_ME_HALF_PIXEL_RIGHT;
652 pBestPredInter = pMeRefine->pHalfPixH + 1;
653 }
654
655 sParams.iBestCost = iBestCost;
656 sParams.iBestHalfPix = iBestHalfPix;
657 sParams.pRef = pRef;
658 sParams.iBestQuarPix = ME_NO_BEST_QUAR_PIXEL;
659
660 //step 5: if no best half-pixel prediction, try quarter pixel prediction
661 // if yes, must get [X+1][X+1] half-pixel from (2, 2) horizontal and vertical filter
662 if (REFINE_ME_NO_BEST_HALF_PIXEL == iBestHalfPix) {
663 sParams.iStrideA = kiStrideRef;
664 sParams.iStrideB = kiStrideRef;
665 sParams.pSrcA[0] = pMeRefine->pHalfPixV;
666 sParams.pSrcA[1] = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE;
667 sParams.pSrcA[2] = pMeRefine->pHalfPixH;
668 sParams.pSrcA[3] = pMeRefine->pHalfPixH + 1;
669
670 sParams.pSrcB[0] = sParams.pSrcB[1] = sParams.pSrcB[2] = sParams.pSrcB[3] = pRef;
671
672 sParams.iLms[0] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy - 1 - pMe->sMvp.iMvY);
673 sParams.iLms[1] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy + 1 - pMe->sMvp.iMvY);
674 sParams.iLms[2] = COST_MVD (pMe->pMvdCost, iHalfMvx - 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
675 sParams.iLms[3] = COST_MVD (pMe->pMvdCost, iHalfMvx + 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
676 } else { //must get [X+1][X+1] half-pixel from (2, 2) horizontal and vertical filter
677 switch (iBestHalfPix) {
678 case REFINE_ME_HALF_PIXEL_LEFT: {
679 pMeRefine->pHalfPixHV = pMeRefine->pHalfPixV;//reuse pBuffer, here only h&hv
680 pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
681 iWidth + 1, iHeight + 1);
682
683 iHalfMvx -= 2;
684 sParams.iStrideA = ME_REFINE_BUF_STRIDE;
685 sParams.iStrideB = kiStrideRef;
686 sParams.pSrcA[0] = pMeRefine->pHalfPixH;
687 sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
688 sParams.pSrcB[0] = pMeRefine->pHalfPixHV;
689 sParams.pSrcB[1] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE;
690 sParams.pSrcB[2] = pRef - 1;
691 sParams.pSrcB[3] = pRef;
692
693 }
694 break;
695 case REFINE_ME_HALF_PIXEL_RIGHT: {
696 pMeRefine->pHalfPixHV = pMeRefine->pHalfPixV;//reuse pBuffer, here only h&hv
697 pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
698 iWidth + 1, iHeight + 1);
699 iHalfMvx += 2;
700 sParams.iStrideA = ME_REFINE_BUF_STRIDE;
701 sParams.iStrideB = kiStrideRef;
702 sParams.pSrcA[0] = pMeRefine->pHalfPixH + 1;
703 sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
704 sParams.pSrcB[0] = pMeRefine->pHalfPixHV + 1;
705 sParams.pSrcB[1] = pMeRefine->pHalfPixHV + 1 + ME_REFINE_BUF_STRIDE;
706 sParams.pSrcB[2] = pRef;
707 sParams.pSrcB[3] = pRef + 1;
708 }
709 break;
710 case REFINE_ME_HALF_PIXEL_TOP: {
711 pMeRefine->pHalfPixHV = pMeRefine->pHalfPixH;//reuse pBuffer, here only v&hv
712 pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
713 iWidth + 1, iHeight + 1);
714
715 iHalfMvy -= 2;
716 sParams.iStrideA = kiStrideRef;
717 sParams.iStrideB = ME_REFINE_BUF_STRIDE;
718 sParams.pSrcA[0] = pMeRefine->pHalfPixV;
719 sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
720 sParams.pSrcB[0] = pRef - kiStrideRef;
721 sParams.pSrcB[1] = pRef;
722 sParams.pSrcB[2] = pMeRefine->pHalfPixHV;
723 sParams.pSrcB[3] = pMeRefine->pHalfPixHV + 1;
724 }
725 break;
726 case REFINE_ME_HALF_PIXEL_BOTTOM: {
727 pMeRefine->pHalfPixHV = pMeRefine->pHalfPixH;//reuse pBuffer, here only v&hv
728 pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
729 iWidth + 1, iHeight + 1);
730 iHalfMvy += 2;
731 sParams.iStrideA = kiStrideRef;
732 sParams.iStrideB = ME_REFINE_BUF_STRIDE;
733 sParams.pSrcA[0] = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE;
734 sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
735 sParams.pSrcB[0] = pRef;
736 sParams.pSrcB[1] = pRef + kiStrideRef;
737 sParams.pSrcB[2] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE;
738 sParams.pSrcB[3] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE + 1;
739 }
740 break;
741 default:
742 break;
743 }
744 sParams.iLms[0] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy - 1 - pMe->sMvp.iMvY);
745 sParams.iLms[1] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy + 1 - pMe->sMvp.iMvY);
746 sParams.iLms[2] = COST_MVD (pMe->pMvdCost, iHalfMvx - 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
747 sParams.iLms[3] = COST_MVD (pMe->pMvdCost, iHalfMvx + 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
748 }
749 MeRefineQuarPixel (pFunc, pMe, pMeRefine, iWidth, iHeight, &sParams, kiStrideEnc);
750
751 if (iBestCost > sParams.iBestCost) {
752 pBestPredInter = pMeRefine->pQuarPixBest;
753 iBestCost = sParams.iBestCost;
754 }
755 iBestQuarPix = sParams.iBestQuarPix;
756
757 //update final best MV
758 pMe->sMv.iMvX = iHalfMvx + iMvQuarAddX[iBestQuarPix];
759 pMe->sMv.iMvY = iHalfMvy + pMvQuarAddY[iBestQuarPix];
760 pMe->uiSatdCost = iBestCost;
761
762 //No half or quarter pixel best, so do MC with integer pixel MV
763 if (iBestHalfPix + iBestQuarPix == NO_BEST_FRAC_PIX) {
764 pBestPredInter = pRef;
765 iInterBlk4Stride = kiStrideRef;
766 }
767 pMeRefine->pfCopyBlockByMode (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter,
768 iInterBlk4Stride);
769 }
770
InitBlkStrideWithRef(int32_t * pBlkStride,const int32_t kiStrideRef)771 void InitBlkStrideWithRef (int32_t* pBlkStride, const int32_t kiStrideRef) {
772 static const uint8_t kuiStrideX[16] = {
773 0, 4 , 0, 4 ,
774 8, 12, 8, 12,
775 0, 4 , 0, 4 ,
776 8, 12, 8, 12
777 };
778 static const uint8_t kuiStrideY[16] = {
779 0, 0, 4 , 4 ,
780 0, 0, 4 , 4 ,
781 8, 8, 12, 12,
782 8, 8, 12, 12
783 };
784 int32_t i;
785
786 for (i = 0; i < 16; i += 4) {
787 pBlkStride[i ] = kuiStrideX[i ] + kuiStrideY[i ] * kiStrideRef;
788 pBlkStride[i + 1] = kuiStrideX[i + 1] + kuiStrideY[i + 1] * kiStrideRef;
789 pBlkStride[i + 2] = kuiStrideX[i + 2] + kuiStrideY[i + 2] * kiStrideRef;
790 pBlkStride[i + 3] = kuiStrideX[i + 3] + kuiStrideY[i + 3] * kiStrideRef;
791 }
792 }
793
794 /*
795 * iMvdSz = (648*2+1) or (972*2+1);
796 */
MvdCostInit(uint16_t * pMvdCostInter,const int32_t kiMvdSz)797 void MvdCostInit (uint16_t* pMvdCostInter, const int32_t kiMvdSz) {
798 const int32_t kiSz = kiMvdSz >> 1;
799 uint16_t* pNegMvd = pMvdCostInter;
800 uint16_t* pPosMvd = pMvdCostInter + kiSz + 1;
801 const int32_t* kpQpLambda = &g_kiQpCostTable[0];
802 int32_t i, j;
803
804 for (i = 0; i < 52; ++ i) {
805 const uint16_t kiLambda = kpQpLambda[i];
806 int32_t iNegSe = -kiSz;
807 int32_t iPosSe = 1;
808
809 for (j = 0; j < kiSz; j += 4) {
810 *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
811 *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
812 *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
813 *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
814
815 *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
816 *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
817 *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
818 *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
819 }
820 *pNegMvd = kiLambda;
821 pNegMvd += kiSz + 1;
822 pPosMvd += kiSz + 1;
823 }
824 }
825
PredictSad(int8_t * pRefIndexCache,int32_t * pSadCostCache,int32_t uiRef,int32_t * pSadPred)826 void PredictSad (int8_t* pRefIndexCache, int32_t* pSadCostCache, int32_t uiRef, int32_t* pSadPred) {
827 const int32_t kiRefB = pRefIndexCache[1];//top g_uiCache12_8x8RefIdx[0] - 4
828 int32_t iRefC = pRefIndexCache[5];//top-right g_uiCache12_8x8RefIdx[0] - 2
829 const int32_t kiRefA = pRefIndexCache[6];//left g_uiCache12_8x8RefIdx[0] - 1
830 const int32_t kiSadB = pSadCostCache[1];
831 int32_t iSadC = pSadCostCache[2];
832 const int32_t kiSadA = pSadCostCache[3];
833
834 int32_t iCount;
835
836 if (iRefC == REF_NOT_AVAIL) {
837 iRefC = pRefIndexCache[0];//top-left g_uiCache12_8x8RefIdx[0] - 4 - 1
838 iSadC = pSadCostCache[0];
839 }
840
841 if (kiRefB == REF_NOT_AVAIL && iRefC == REF_NOT_AVAIL && kiRefA != REF_NOT_AVAIL) {
842 * pSadPred = kiSadA;
843 } else {
844 iCount = (uiRef == kiRefA) << MB_LEFT_BIT;
845 iCount |= (uiRef == kiRefB) << MB_TOP_BIT;
846 iCount |= (uiRef == iRefC) << MB_TOPRIGHT_BIT;
847 switch (iCount) {
848 case LEFT_MB_POS:// A
849 *pSadPred = kiSadA;
850 break;
851 case TOP_MB_POS:// B
852 *pSadPred = kiSadB;
853 break;
854 case TOPRIGHT_MB_POS:// C or D
855 *pSadPred = iSadC;
856 break;
857 default:
858 *pSadPred = WelsMedian (kiSadA, kiSadB, iSadC);
859 break;
860 }
861 }
862
863 #define REPLACE_SAD_MULTIPLY(x) ((x) - (x>>3) + (x >>5)) // it's 0.90625, very close with 0.9
864 iCount = (*pSadPred) << 6; // here *64 will not overflow. SAD range 0~ 255*256(max 2^16), int32_t is enough
865 *pSadPred = (REPLACE_SAD_MULTIPLY (iCount) + 32) >> 6;
866 #undef REPLACE_SAD_MULTIPLY
867 }
868
869
PredictSadSkip(int8_t * pRefIndexCache,bool * pMbSkipCache,int32_t * pSadCostCache,int32_t uiRef,int32_t * iSadPredSkip)870 void PredictSadSkip (int8_t* pRefIndexCache, bool* pMbSkipCache, int32_t* pSadCostCache, int32_t uiRef,
871 int32_t* iSadPredSkip) {
872 const int32_t kiRefB = pRefIndexCache[1];//top g_uiCache12_8x8RefIdx[0] - 4
873 int32_t iRefC = pRefIndexCache[5];//top-right g_uiCache12_8x8RefIdx[0] - 2
874 const int32_t kiRefA = pRefIndexCache[6];//left g_uiCache12_8x8RefIdx[0] - 1
875 const int32_t kiSadB = (pMbSkipCache[1] == 1 ? pSadCostCache[1] : 0);
876 int32_t iSadC = (pMbSkipCache[2] == 1 ? pSadCostCache[2] : 0);
877 const int32_t kiSadA = (pMbSkipCache[3] == 1 ? pSadCostCache[3] : 0);
878 int32_t iRefSkip = pMbSkipCache[2];
879
880 int32_t iCount = 0;
881
882 if (iRefC == REF_NOT_AVAIL) {
883 iRefC = pRefIndexCache[0];//top-left g_uiCache12_8x8RefIdx[0] - 4 - 1
884 iSadC = (pMbSkipCache[0] == 1 ? pSadCostCache[0] : 0);
885 iRefSkip = pMbSkipCache[0];
886 }
887
888 if (kiRefB == REF_NOT_AVAIL && iRefC == REF_NOT_AVAIL && kiRefA != REF_NOT_AVAIL) {
889 * iSadPredSkip = kiSadA;
890 } else {
891 iCount = ((uiRef == kiRefA) && (pMbSkipCache[3] == 1)) << MB_LEFT_BIT;
892 iCount |= ((uiRef == kiRefB) && (pMbSkipCache[1] == 1)) << MB_TOP_BIT;
893 iCount |= ((uiRef == iRefC) && (iRefSkip == 1)) << MB_TOPRIGHT_BIT;
894 switch (iCount) {
895 case LEFT_MB_POS:// A
896 *iSadPredSkip = kiSadA;
897 break;
898 case TOP_MB_POS:// B
899 *iSadPredSkip = kiSadB;
900 break;
901 case TOPRIGHT_MB_POS:// C or D
902 *iSadPredSkip = iSadC;
903 break;
904 default:
905 *iSadPredSkip = WelsMedian (kiSadA, kiSadB, iSadC);
906 break;
907 }
908 }
909 }
910 }
911