• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    md.c
33  *
34  * \brief   mode decision
35  *
36  * \date    2009.05.14 Created
37  *
38  *************************************************************************************
39  */
40 
41 #include "ls_defines.h"
42 #include "md.h"
43 #include "cpu_core.h"
44 #include "svc_enc_golomb.h"
45 
46 namespace WelsEnc {
47 #define INTRA_VARIANCE_SAD_THRESHOLD 150
48 #define INTER_VARIANCE_SAD_THRESHOLD 20
49 
50 //fill cache of neighbor MB, containing pNonZeroCount, sample_avail, pIntra4x4PredMode
FillNeighborCacheIntra(SMbCache * pMbCache,SMB * pCurMb,int32_t iMbWidth)51 void FillNeighborCacheIntra (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth) {
52   uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail;
53   uint32_t uiNeighborIntra = 0;
54 
55   if (uiNeighborAvail & LEFT_MB_POS) { //LEFT MB
56     int8_t* pLeftMbNonZeroCount = pCurMb->pNonZeroCount - MB_LUMA_CHROMA_BLOCK4x4_NUM;
57     pMbCache->iNonZeroCoeffCount[8] = pLeftMbNonZeroCount[ 3];
58     pMbCache->iNonZeroCoeffCount[16] = pLeftMbNonZeroCount[ 7];
59     pMbCache->iNonZeroCoeffCount[24] = pLeftMbNonZeroCount[11];
60     pMbCache->iNonZeroCoeffCount[32] = pLeftMbNonZeroCount[15];
61 
62     pMbCache->iNonZeroCoeffCount[ 13] = pLeftMbNonZeroCount[17];
63     pMbCache->iNonZeroCoeffCount[21] = pLeftMbNonZeroCount[21];
64     pMbCache->iNonZeroCoeffCount[37] = pLeftMbNonZeroCount[19];
65     pMbCache->iNonZeroCoeffCount[45] = pLeftMbNonZeroCount[23];
66 
67     uiNeighborIntra |= LEFT_MB_POS;
68 
69     if (IS_INTRA4x4 ((pCurMb - 1)->uiMbType)) {
70       int8_t* pLeftMbIntra4x4PredMode = pCurMb->pIntra4x4PredMode - INTRA_4x4_MODE_NUM;
71       pMbCache->iIntraPredMode[8] = pLeftMbIntra4x4PredMode[4];
72       pMbCache->iIntraPredMode[16] = pLeftMbIntra4x4PredMode[5];
73       pMbCache->iIntraPredMode[24] = pLeftMbIntra4x4PredMode[6];
74       pMbCache->iIntraPredMode[32] = pLeftMbIntra4x4PredMode[3];
75     } else { // if ( 0 == constrained_intra_pred_flag || IS_INTRA16x16((pCurMb-1)->uiMbType ))
76       pMbCache->iIntraPredMode[8] =
77         pMbCache->iIntraPredMode[16] =
78           pMbCache->iIntraPredMode[24] =
79             pMbCache->iIntraPredMode[32] = 2; //DC
80     }
81   } else {
82     pMbCache->iNonZeroCoeffCount[ 8] =
83       pMbCache->iNonZeroCoeffCount[16] =
84         pMbCache->iNonZeroCoeffCount[24] =
85           pMbCache->iNonZeroCoeffCount[32] = -1;//unavailable
86     pMbCache->iNonZeroCoeffCount[13] =
87       pMbCache->iNonZeroCoeffCount[21] =
88         pMbCache->iNonZeroCoeffCount[37] =
89           pMbCache->iNonZeroCoeffCount[45] = -1;//unavailable
90 
91     pMbCache->iIntraPredMode[8] =
92       pMbCache->iIntraPredMode[16] =
93         pMbCache->iIntraPredMode[24] =
94           pMbCache->iIntraPredMode[32] = -1;//unavailable
95   }
96 
97   if (uiNeighborAvail & TOP_MB_POS) { //TOP MB
98     SMB* pTopMb = pCurMb - iMbWidth;
99     ST32 (&pMbCache->iNonZeroCoeffCount[1], LD32 (&pTopMb->pNonZeroCount[12]));
100 
101     ST16 (&pMbCache->iNonZeroCoeffCount[6], LD16 (&pTopMb->pNonZeroCount[20]));
102     ST16 (&pMbCache->iNonZeroCoeffCount[30], LD16 (&pTopMb->pNonZeroCount[22]));
103 
104     uiNeighborIntra |= TOP_MB_POS;
105 
106     if (IS_INTRA4x4 (pTopMb->uiMbType)) {
107       ST32 (pMbCache->iIntraPredMode + 1, LD32 (&pTopMb->pIntra4x4PredMode[0]));
108     } else { // if ( 0 == constrained_intra_pred_flag || IS_INTRA16x16( pTopMb->uiMbType ))
109       const uint32_t kuiDc32 = 0x02020202;
110       ST32 (pMbCache->iIntraPredMode + 1 , kuiDc32);
111     }
112   } else {
113     const uint32_t kuiUnavail32 = 0xffffffff;
114     ST32 (pMbCache->iIntraPredMode + 1 , kuiUnavail32);
115     ST32 (&pMbCache->iNonZeroCoeffCount[1], kuiUnavail32);
116 
117     ST16 (&pMbCache->iNonZeroCoeffCount[6], 0xffff);
118     ST16 (&pMbCache->iNonZeroCoeffCount[30], 0xffff);
119   }
120 
121   if (uiNeighborAvail & TOPLEFT_MB_POS) {
122     uiNeighborIntra |= 0x04;
123   }
124 
125 
126   if (uiNeighborAvail & TOPRIGHT_MB_POS) {
127     uiNeighborIntra |= 0x08;
128   }
129   pMbCache->uiNeighborIntra = uiNeighborIntra;
130 }
131 //fill cache of neighbor MB, containing motion_vector and uiRefIndex
FillNeighborCacheInterWithoutBGD(SMbCache * pMbCache,SMB * pCurMb,int32_t iMbWidth,int8_t * pVaaBgMbFlag)132 void FillNeighborCacheInterWithoutBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag) {
133   uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail;
134   SMB* pLeftMb = pCurMb - 1 ;
135   SMB* pTopMb = pCurMb - iMbWidth;
136   SMB* pLeftTopMb = pCurMb - iMbWidth - 1 ;
137   SMB* iRightTopMb = pCurMb - iMbWidth + 1 ;
138   SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
139   if ((uiNeighborAvail & LEFT_MB_POS) && IS_SVC_INTER (pLeftMb->uiMbType)) {
140     pMvComp->sMotionVectorCache[ 6] = pLeftMb->sMv[ 3];
141     pMvComp->sMotionVectorCache[12] = pLeftMb->sMv[ 7];
142     pMvComp->sMotionVectorCache[18] = pLeftMb->sMv[11];
143     pMvComp->sMotionVectorCache[24] = pLeftMb->sMv[15];
144     pMvComp->iRefIndexCache[ 6] = pLeftMb->pRefIndex[1];
145     pMvComp->iRefIndexCache[12] = pLeftMb->pRefIndex[1];
146     pMvComp->iRefIndexCache[18] = pLeftMb->pRefIndex[3];
147     pMvComp->iRefIndexCache[24] = pLeftMb->pRefIndex[3];
148     pMbCache->iSadCost[3] = pLeftMb->pSadCost[0];
149 
150     if (pLeftMb->uiMbType == MB_TYPE_SKIP) {
151       pMbCache->bMbTypeSkip[3] = 1;
152       pMbCache->iSadCostSkip[3] = pMbCache->pEncSad[-1];
153     } else {
154       pMbCache->bMbTypeSkip[3] = 0;
155       pMbCache->iSadCostSkip[3] = 0;
156     }
157   } else { //avail or non-inter
158     ST32 (&pMvComp->sMotionVectorCache[ 6], 0);
159     ST32 (&pMvComp->sMotionVectorCache[12], 0);
160     ST32 (&pMvComp->sMotionVectorCache[18], 0);
161     ST32 (&pMvComp->sMotionVectorCache[24], 0);
162     pMvComp->iRefIndexCache[ 6] =
163       pMvComp->iRefIndexCache[12] =
164         pMvComp->iRefIndexCache[18] =
165           pMvComp->iRefIndexCache[24] = (uiNeighborAvail & LEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
166     pMbCache->iSadCost[3] = 0;
167     pMbCache->bMbTypeSkip[3] = 0;
168     pMbCache->iSadCostSkip[3] = 0;
169   }
170 
171   if ((uiNeighborAvail & TOP_MB_POS) && IS_SVC_INTER (pTopMb->uiMbType)) { //TOP MB
172     ST64 (&pMvComp->sMotionVectorCache[1], LD64 (&pTopMb->sMv[12]));
173     ST64 (&pMvComp->sMotionVectorCache[3], LD64 (&pTopMb->sMv[14]));
174     pMvComp->iRefIndexCache[1] = pTopMb->pRefIndex[2];
175     pMvComp->iRefIndexCache[2] = pTopMb->pRefIndex[2];
176     pMvComp->iRefIndexCache[3] = pTopMb->pRefIndex[3];
177     pMvComp->iRefIndexCache[4] = pTopMb->pRefIndex[3];
178     pMbCache->iSadCost[1] = pTopMb->pSadCost[0];
179 
180     if (pTopMb->uiMbType == MB_TYPE_SKIP) {
181       pMbCache->bMbTypeSkip[1] = 1;
182       pMbCache->iSadCostSkip[1] = pMbCache->pEncSad[-iMbWidth];
183     } else {
184       pMbCache->bMbTypeSkip[1] = 0;
185       pMbCache->iSadCostSkip[1] = 0;
186     }
187   } else { //unavail
188     ST64 (&pMvComp->sMotionVectorCache[1], 0);
189     ST64 (&pMvComp->sMotionVectorCache[3], 0);
190     pMvComp->iRefIndexCache[1] =
191       pMvComp->iRefIndexCache[2] =
192         pMvComp->iRefIndexCache[3] =
193           pMvComp->iRefIndexCache[4] = (uiNeighborAvail & TOP_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
194     pMbCache->iSadCost[1] = 0;
195 
196     pMbCache->bMbTypeSkip[1] = 0;
197     pMbCache->iSadCostSkip[1] = 0;
198   }
199 
200   if ((uiNeighborAvail & TOPLEFT_MB_POS) && IS_SVC_INTER (pLeftTopMb->uiMbType)) { //LEFT_TOP MB
201     pMvComp->sMotionVectorCache[0] = pLeftTopMb->sMv[15];
202     pMvComp->iRefIndexCache[0] = pLeftTopMb->pRefIndex[3];
203     pMbCache->iSadCost[0] = pLeftTopMb->pSadCost[0];
204 
205     if (pLeftTopMb->uiMbType == MB_TYPE_SKIP) {
206       pMbCache->bMbTypeSkip[0] = 1;
207       pMbCache->iSadCostSkip[0] = pMbCache->pEncSad[-iMbWidth - 1];
208     } else {
209       pMbCache->bMbTypeSkip[0] = 0;
210       pMbCache->iSadCostSkip[0] = 0;
211     }
212   } else { //unavail
213     ST32 (&pMvComp->sMotionVectorCache[0], 0);
214     pMvComp->iRefIndexCache[0] = (uiNeighborAvail & TOPLEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
215     pMbCache->iSadCost[0] = 0;
216     pMbCache->bMbTypeSkip[0] = 0;
217     pMbCache->iSadCostSkip[0] = 0;
218   }
219 
220   if ((uiNeighborAvail & TOPRIGHT_MB_POS) && IS_SVC_INTER (iRightTopMb->uiMbType)) { //RIGHT_TOP MB
221     pMvComp->sMotionVectorCache[5] = iRightTopMb->sMv[12];
222     pMvComp->iRefIndexCache[5] = iRightTopMb->pRefIndex[2];
223     pMbCache->iSadCost[2] = iRightTopMb->pSadCost[0];
224 
225     if (iRightTopMb->uiMbType == MB_TYPE_SKIP) {
226       pMbCache->bMbTypeSkip[2] = 1;
227       pMbCache->iSadCostSkip[2] = pMbCache->pEncSad[-iMbWidth + 1];
228     } else {
229       pMbCache->bMbTypeSkip[2] = 0;
230       pMbCache->iSadCostSkip[2] = 0;
231     }
232   } else { //unavail
233     ST32 (&pMvComp->sMotionVectorCache[5], 0);
234     pMvComp->iRefIndexCache[5] = (uiNeighborAvail & TOPRIGHT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
235     pMbCache->iSadCost[2] = 0;
236     pMbCache->bMbTypeSkip[2] = 0;
237     pMbCache->iSadCostSkip[2] = 0;
238   }
239 
240   //right-top 4*4 pBlock unavailable
241   ST32 (&pMvComp->sMotionVectorCache[ 9], 0);
242   ST32 (&pMvComp->sMotionVectorCache[21], 0);
243   ST32 (&pMvComp->sMotionVectorCache[11], 0);
244   ST32 (&pMvComp->sMotionVectorCache[17], 0);
245   ST32 (&pMvComp->sMotionVectorCache[23], 0);
246   pMvComp->iRefIndexCache[ 9] =
247     pMvComp->iRefIndexCache[11] =
248       pMvComp->iRefIndexCache[17] =
249         pMvComp->iRefIndexCache[21] =
250           pMvComp->iRefIndexCache[23] = REF_NOT_AVAIL;
251 }
252 
FillNeighborCacheInterWithBGD(SMbCache * pMbCache,SMB * pCurMb,int32_t iMbWidth,int8_t * pVaaBgMbFlag)253 void FillNeighborCacheInterWithBGD (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth, int8_t* pVaaBgMbFlag) {
254   uint32_t uiNeighborAvail = pCurMb->uiNeighborAvail;
255   SMB* pLeftMb = pCurMb - 1 ;
256   SMB* pTopMb = pCurMb - iMbWidth;
257   SMB* pLeftTopMb = pCurMb - iMbWidth - 1 ;
258   SMB* iRightTopMb = pCurMb - iMbWidth + 1 ;
259   SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
260 
261   if ((uiNeighborAvail & LEFT_MB_POS) && IS_SVC_INTER (pLeftMb->uiMbType)) {
262     pMvComp->sMotionVectorCache[ 6] = pLeftMb->sMv[ 3];
263     pMvComp->sMotionVectorCache[12] = pLeftMb->sMv[ 7];
264     pMvComp->sMotionVectorCache[18] = pLeftMb->sMv[11];
265     pMvComp->sMotionVectorCache[24] = pLeftMb->sMv[15];
266     pMvComp->iRefIndexCache[ 6] = pLeftMb->pRefIndex[1];
267     pMvComp->iRefIndexCache[12] = pLeftMb->pRefIndex[1];
268     pMvComp->iRefIndexCache[18] = pLeftMb->pRefIndex[3];
269     pMvComp->iRefIndexCache[24] = pLeftMb->pRefIndex[3];
270     pMbCache->iSadCost[3] = pLeftMb->pSadCost[0];
271 
272     if (pLeftMb->uiMbType == MB_TYPE_SKIP && pVaaBgMbFlag[-1] == 0) {
273       pMbCache->bMbTypeSkip[3] = 1;
274       pMbCache->iSadCostSkip[3] = pMbCache->pEncSad[-1];
275     } else {
276       pMbCache->bMbTypeSkip[3] = 0;
277       pMbCache->iSadCostSkip[3] = 0;
278     }
279   } else { //avail or non-inter
280     ST32 (&pMvComp->sMotionVectorCache[ 6], 0);
281     ST32 (&pMvComp->sMotionVectorCache[12], 0);
282     ST32 (&pMvComp->sMotionVectorCache[18], 0);
283     ST32 (&pMvComp->sMotionVectorCache[24], 0);
284     pMvComp->iRefIndexCache[ 6] =
285       pMvComp->iRefIndexCache[12] =
286         pMvComp->iRefIndexCache[18] =
287           pMvComp->iRefIndexCache[24] = (uiNeighborAvail & LEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
288     pMbCache->iSadCost[3] = 0;
289     pMbCache->bMbTypeSkip[3] = 0;
290     pMbCache->iSadCostSkip[3] = 0;
291   }
292 
293   if ((uiNeighborAvail & TOP_MB_POS) && IS_SVC_INTER (pTopMb->uiMbType)) { //TOP MB
294     ST64 (&pMvComp->sMotionVectorCache[1], LD64 (&pTopMb->sMv[12]));
295     ST64 (&pMvComp->sMotionVectorCache[3], LD64 (&pTopMb->sMv[14]));
296     pMvComp->iRefIndexCache[1] = pTopMb->pRefIndex[2];
297     pMvComp->iRefIndexCache[2] = pTopMb->pRefIndex[2];
298     pMvComp->iRefIndexCache[3] = pTopMb->pRefIndex[3];
299     pMvComp->iRefIndexCache[4] = pTopMb->pRefIndex[3];
300     pMbCache->iSadCost[1] = pTopMb->pSadCost[0];
301     if (pTopMb->uiMbType == MB_TYPE_SKIP  && pVaaBgMbFlag[-iMbWidth] == 0) {
302       pMbCache->bMbTypeSkip[1] = 1;
303       pMbCache->iSadCostSkip[1] = pMbCache->pEncSad[-iMbWidth];
304     } else {
305       pMbCache->bMbTypeSkip[1] = 0;
306       pMbCache->iSadCostSkip[1] = 0;
307     }
308   } else { //unavail
309     ST64 (&pMvComp->sMotionVectorCache[1], 0);
310     ST64 (&pMvComp->sMotionVectorCache[3], 0);
311     pMvComp->iRefIndexCache[1] =
312       pMvComp->iRefIndexCache[2] =
313         pMvComp->iRefIndexCache[3] =
314           pMvComp->iRefIndexCache[4] = (uiNeighborAvail & TOP_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
315     pMbCache->iSadCost[1] = 0;
316     pMbCache->bMbTypeSkip[1] = 0;
317     pMbCache->iSadCostSkip[1] = 0;
318   }
319 
320 
321   if ((uiNeighborAvail & TOPLEFT_MB_POS) && IS_SVC_INTER (pLeftTopMb->uiMbType)) { //LEFT_TOP MB
322     pMvComp->sMotionVectorCache[0] = pLeftTopMb->sMv[15];
323     pMvComp->iRefIndexCache[0] = pLeftTopMb->pRefIndex[3];
324     pMbCache->iSadCost[0] = pLeftTopMb->pSadCost[0];
325 
326     if (pLeftTopMb->uiMbType == MB_TYPE_SKIP  && pVaaBgMbFlag[-iMbWidth - 1] == 0) {
327       pMbCache->bMbTypeSkip[0] = 1;
328       pMbCache->iSadCostSkip[0] = pMbCache->pEncSad[-iMbWidth - 1];
329     } else {
330       pMbCache->bMbTypeSkip[0] = 0;
331       pMbCache->iSadCostSkip[0] = 0;
332     }
333   } else { //unavail
334     ST32 (&pMvComp->sMotionVectorCache[0], 0);
335     pMvComp->iRefIndexCache[0] = (uiNeighborAvail & TOPLEFT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
336     pMbCache->iSadCost[0] = 0;
337     pMbCache->bMbTypeSkip[0] = 0;
338     pMbCache->iSadCostSkip[0] = 0;
339   }
340 
341   if ((uiNeighborAvail & TOPRIGHT_MB_POS) && IS_SVC_INTER (iRightTopMb->uiMbType)) { //RIGHT_TOP MB
342     pMvComp->sMotionVectorCache[5] = iRightTopMb->sMv[12];
343     pMvComp->iRefIndexCache[5] = iRightTopMb->pRefIndex[2];
344     pMbCache->iSadCost[2] = iRightTopMb->pSadCost[0];
345 
346     if (iRightTopMb->uiMbType == MB_TYPE_SKIP  && pVaaBgMbFlag[-iMbWidth + 1] == 0) {
347       pMbCache->bMbTypeSkip[2] = 1;
348       pMbCache->iSadCostSkip[2] = pMbCache->pEncSad[-iMbWidth + 1];
349     } else {
350       pMbCache->bMbTypeSkip[2] = 0;
351       pMbCache->iSadCostSkip[2] = 0;
352     }
353   } else { //unavail
354     ST32 (&pMvComp->sMotionVectorCache[5], 0);
355     pMvComp->iRefIndexCache[5] = (uiNeighborAvail & TOPRIGHT_MB_POS) ? REF_NOT_IN_LIST : REF_NOT_AVAIL;
356     pMbCache->iSadCost[2] = 0;
357     pMbCache->bMbTypeSkip[2] = 0;
358     pMbCache->iSadCostSkip[2] = 0;
359   }
360 
361   //right-top 4*4 pBlock unavailable
362   ST32 (&pMvComp->sMotionVectorCache[ 9], 0);
363   ST32 (&pMvComp->sMotionVectorCache[21], 0);
364   ST32 (&pMvComp->sMotionVectorCache[11], 0);
365   ST32 (&pMvComp->sMotionVectorCache[17], 0);
366   ST32 (&pMvComp->sMotionVectorCache[23], 0);
367   pMvComp->iRefIndexCache[ 9] =
368     pMvComp->iRefIndexCache[11] =
369       pMvComp->iRefIndexCache[17] =
370         pMvComp->iRefIndexCache[21] =
371           pMvComp->iRefIndexCache[23] = REF_NOT_AVAIL;
372 }
373 
InitFillNeighborCacheInterFunc(SWelsFuncPtrList * pFuncList,const int32_t kiFlag)374 void InitFillNeighborCacheInterFunc (SWelsFuncPtrList* pFuncList, const int32_t kiFlag) {
375   pFuncList->pfFillInterNeighborCache = kiFlag ? FillNeighborCacheInterWithBGD : FillNeighborCacheInterWithoutBGD;
376 }
377 
UpdateMbMv_c(SMVUnitXY * pMvBuffer,const SMVUnitXY ksMv)378 void UpdateMbMv_c (SMVUnitXY* pMvBuffer, const SMVUnitXY ksMv) {
379   int32_t k = 0;
380   for (; k < MB_BLOCK4x4_NUM; k += 4) {
381     pMvBuffer[k  ] =
382       pMvBuffer[k + 1] =
383         pMvBuffer[k + 2] =
384           pMvBuffer[k + 3] = ksMv;
385   }
386 }
387 
388 
MdInterAnalysisVaaInfo_c(int32_t * pSad8x8)389 uint8_t MdInterAnalysisVaaInfo_c (int32_t* pSad8x8) {
390   int32_t iSadBlock[4], iAverageSadBlock[4];
391   int32_t iAverageSad, iVarianceSad;
392 
393   iSadBlock[0] = pSad8x8[0];
394   iAverageSad = iSadBlock[0];
395 
396   iSadBlock[1] = pSad8x8[1];
397   iAverageSad += iSadBlock[1];
398 
399   iSadBlock[2] = pSad8x8[2];
400   iAverageSad += iSadBlock[2];
401 
402   iSadBlock[3] = pSad8x8[3];
403   iAverageSad += iSadBlock[3];
404 
405   iAverageSad = iAverageSad >> 2;
406 
407   iAverageSadBlock[0] = (iSadBlock[0] >> 6) - (iAverageSad >> 6);
408   iVarianceSad = iAverageSadBlock[0] * iAverageSadBlock[0];
409 
410   iAverageSadBlock[1] = (iSadBlock[1] >> 6) - (iAverageSad >> 6);
411   iVarianceSad += iAverageSadBlock[1] * iAverageSadBlock[1];
412 
413   iAverageSadBlock[2] = (iSadBlock[2] >> 6) - (iAverageSad >> 6);
414   iVarianceSad += iAverageSadBlock[2] * iAverageSadBlock[2];
415 
416   iAverageSadBlock[3] = (iSadBlock[3] >> 6) - (iAverageSad >> 6);
417   iVarianceSad += iAverageSadBlock[3] * iAverageSadBlock[3];
418 
419   if (iVarianceSad < INTER_VARIANCE_SAD_THRESHOLD) {
420     return 15;
421   }
422 
423   uint8_t uiMbSign = 0;
424   if (iSadBlock[0] > iAverageSad)
425     uiMbSign |= 0x08;
426   if (iSadBlock[1] > iAverageSad)
427     uiMbSign |= 0x04;
428   if (iSadBlock[2] > iAverageSad)
429     uiMbSign |= 0x02;
430   if (iSadBlock[3] > iAverageSad)
431     uiMbSign |= 0x01;
432   return (uiMbSign);
433 }
434 
AnalysisVaaInfoIntra_c(uint8_t * pDataY,const int32_t kiLineSize)435 int32_t AnalysisVaaInfoIntra_c (uint8_t* pDataY, const int32_t kiLineSize) {
436   ENFORCE_STACK_ALIGN_1D (uint16_t, uiAvgBlock, 16, 16)
437   uint16_t* pBlock = &uiAvgBlock[0];
438   uint8_t* pEncData         = pDataY;
439   const int32_t kiLineSize2 = kiLineSize << 1;
440   const int32_t kiLineSize3 = kiLineSize + kiLineSize2;
441   const int32_t kiLineSize4 = kiLineSize << 2;
442   int32_t i = 0, j = 0, num = 0;
443   int32_t iSumAvg = 0, iSumSqr = 0;
444 
445 //  analysis_vaa_info_intra_core_c( pDataY, iLineSize, pBlock );
446   for (; j < 16; j += 4) {
447     num = 0;
448     for (i = 0; i < 16; i += 4, num ++) {
449       pBlock[num] =  pEncData[i              ] + pEncData[i + 1              ] + pEncData[i + 2              ] + pEncData[i +
450                      3              ];
451       pBlock[num] += pEncData[i + kiLineSize ] + pEncData[i + kiLineSize  + 1] + pEncData[i + kiLineSize  + 2] + pEncData[i +
452                      kiLineSize  + 3];
453       pBlock[num] += pEncData[i + kiLineSize2] + pEncData[i + kiLineSize2 + 1] + pEncData[i + kiLineSize2 + 2] + pEncData[i +
454                      kiLineSize2 + 3];
455       pBlock[num] += pEncData[i + kiLineSize3] + pEncData[i + kiLineSize3 + 1] + pEncData[i + kiLineSize3 + 2] + pEncData[i +
456                      kiLineSize3 + 3];
457       pBlock[num] >>=  4;
458     }
459     pBlock += 4;
460     pEncData += kiLineSize4;
461   }
462 
463   pBlock = &uiAvgBlock[0];
464   i = 4;
465   for (; i > 0; --i) {
466     iSumAvg += pBlock[0] + pBlock[1] + pBlock[2] + pBlock[3];
467     iSumSqr += pBlock[0] * pBlock[0] + pBlock[1] * pBlock[1] + pBlock[2] * pBlock[2] + pBlock[3] * pBlock[3];
468 
469     pBlock += 4;
470   }
471 
472 
473   return /*variance =*/ (iSumSqr - ((iSumAvg * iSumAvg) >> 4));
474 }
475 
476 // for pfGetVarianceFromIntraVaa function ptr adaptive by CPU features, 6/7/2010
InitIntraAnalysisVaaInfo(SWelsFuncPtrList * pFuncList,const uint32_t kuiCpuFlag)477 void InitIntraAnalysisVaaInfo (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag) {
478   pFuncList->pfGetVarianceFromIntraVaa      = AnalysisVaaInfoIntra_c;
479   pFuncList->pfGetMbSignFromInterVaa        = MdInterAnalysisVaaInfo_c;
480   pFuncList->pfUpdateMbMv                   = UpdateMbMv_c;
481 
482 #if defined(X86_ASM)
483   if ((kuiCpuFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) {
484     pFuncList->pfGetVarianceFromIntraVaa    = AnalysisVaaInfoIntra_sse2;
485     pFuncList->pfGetMbSignFromInterVaa      = MdInterAnalysisVaaInfo_sse2;
486     pFuncList->pfUpdateMbMv                 = UpdateMbMv_sse2;
487   }
488   if ((kuiCpuFlag & WELS_CPU_SSSE3) == WELS_CPU_SSSE3) {
489     pFuncList->pfGetVarianceFromIntraVaa    = AnalysisVaaInfoIntra_ssse3;
490   }
491   if ((kuiCpuFlag & WELS_CPU_SSE41) == WELS_CPU_SSE41) {
492     pFuncList->pfGetMbSignFromInterVaa      = MdInterAnalysisVaaInfo_sse41;
493   }
494 #endif//X86_ASM
495 }
496 
MdIntraAnalysisVaaInfo(sWelsEncCtx * pEncCtx,uint8_t * pEncMb)497 bool MdIntraAnalysisVaaInfo (sWelsEncCtx* pEncCtx, uint8_t* pEncMb) {
498 
499   SDqLayer* pCurDqLayer     = pEncCtx->pCurDqLayer;
500   const int32_t kiLineSize  = pCurDqLayer->iEncStride[0];
501   const int32_t kiVariance  = pEncCtx->pFuncList->pfGetVarianceFromIntraVaa (pEncMb, kiLineSize);
502   return (kiVariance >= INTRA_VARIANCE_SAD_THRESHOLD);
503 }
504 
InitMeRefinePointer(SMeRefinePointer * pMeRefine,SMbCache * pMbCache,int32_t iStride)505 void InitMeRefinePointer (SMeRefinePointer* pMeRefine, SMbCache* pMbCache, int32_t iStride) {
506   pMeRefine->pHalfPixH    = &pMbCache->pBufferInterPredMe[0] + iStride;
507   pMeRefine->pHalfPixV    = &pMbCache->pBufferInterPredMe[640] + iStride;
508 
509   pMeRefine->pQuarPixBest = &pMbCache->pBufferInterPredMe[1280] + iStride;
510   pMeRefine->pQuarPixTmp  = &pMbCache->pBufferInterPredMe[1920] + iStride;
511 }
512 typedef struct TagQuarParams {
513   int32_t iBestCost;
514   int32_t iBestHalfPix;
515   int32_t iStrideA;
516   int32_t iStrideB;
517   uint8_t* pRef;
518   uint8_t* pSrcB[4];
519   uint8_t* pSrcA[4];
520   int32_t iLms[4];
521   int32_t iBestQuarPix;
522 } SQuarRefineParams;
523 
524 #define SWITCH_BEST_TMP_BUF(prev_best, curr_best){\
525   pParams->iBestCost = iCurCost;\
526   pTmp = prev_best;\
527   prev_best = curr_best;\
528   curr_best = pTmp;\
529 }
530 #define CALC_COST(me_buf, lm) ( pFunc->sSampleDealingFuncs.pfMeCost[kuiPixel](pEncMb, iStrideEnc, me_buf, ME_REFINE_BUF_STRIDE) + lm )
531 
MeRefineQuarPixel(SWelsFuncPtrList * pFunc,SWelsME * pMe,SMeRefinePointer * pMeRefine,const int32_t kiWidth,const int32_t kiHeight,SQuarRefineParams * pParams,int32_t iStrideEnc)532 inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine,
533                                const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) {
534   PWelsSampleAveragingFunc pSampleAvg   = pFunc->sMcFuncs.pfSampleAveraging;
535   int32_t iCurCost;
536   uint8_t* pEncMb                       = pMe->pEncMb;
537   uint8_t* pTmp                         = NULL;
538   const uint8_t kuiPixel                = pMe->uiBlockSize;
539 
540   pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
541               pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight);
542 
543   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]);
544   if (iCurCost < pParams->iBestCost) {
545     pParams->iBestQuarPix = ME_QUAR_PIXEL_TOP;
546     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
547   }
548   //=========================(0, 1)=======================//
549   pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
550               ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight);
551   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]);
552   if (iCurCost < pParams->iBestCost) {
553     pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM;
554     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
555   }
556   //==========================(-1, 0)=========================//
557   pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
558               ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight);
559   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]);
560   if (iCurCost < pParams->iBestCost) {
561     pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT;
562     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
563   }
564   //==========================(1, 0)=========================//
565   pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
566               ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB,  kiWidth, kiHeight);
567 
568   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]);
569   if (iCurCost < pParams->iBestCost) {
570     pParams->iBestQuarPix = ME_QUAR_PIXEL_RIGHT;
571     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
572   }
573 }
574 
MeRefineFracPixel(sWelsEncCtx * pEncCtx,uint8_t * pMemPredInterMb,SWelsME * pMe,SMeRefinePointer * pMeRefine,int32_t iWidth,int32_t iHeight)575 void MeRefineFracPixel (sWelsEncCtx* pEncCtx, uint8_t* pMemPredInterMb, SWelsME* pMe,
576                         SMeRefinePointer* pMeRefine, int32_t iWidth, int32_t iHeight) {
577   SWelsFuncPtrList* pFunc = pEncCtx->pFuncList;
578   int16_t iMvx = pMe->sMv.iMvX;
579   int16_t iMvy = pMe->sMv.iMvY;
580 
581   int16_t iHalfMvx = iMvx;
582   int16_t iHalfMvy = iMvy;
583   const int32_t kiStrideEnc = pEncCtx->pCurDqLayer->iEncStride[0];
584   const int32_t kiStrideRef = pEncCtx->pCurDqLayer->pRefPic->iLineSize[0];
585 
586   uint8_t* pEncData = pMe->pEncMb;
587   uint8_t* pRef = pMe->pRefMb;//091010
588 
589   int32_t iBestQuarPix = ME_NO_BEST_QUAR_PIXEL;
590 
591   SQuarRefineParams sParams;
592   static const int32_t iMvQuarAddX[10] = {0, 0, -1, 1, 0, 0, 0, -1, 1, 0};
593   const int32_t* pMvQuarAddY = iMvQuarAddX + 3;
594   uint8_t* pBestPredInter = pRef;
595   int32_t iInterBlk4Stride = ME_REFINE_BUF_STRIDE;
596 
597   int32_t iBestCost;
598   int32_t iCurCost;
599   int32_t iBestHalfPix;
600 
601   if (pEncCtx->pCurDqLayer->bSatdInMdFlag) {
602     iBestCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
603   } else {
604     iBestCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pRef, kiStrideRef) +
605                 COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
606   }
607 
608   iBestHalfPix = REFINE_ME_NO_BEST_HALF_PIXEL;
609 
610   pFunc->sMcFuncs.pfLumaHalfpelVer (pRef - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixV, ME_REFINE_BUF_STRIDE, iWidth,
611                                     iHeight + 1);
612 
613   //step 1: get [iWidth][iHeight+1] half pixel from vertical filter
614   //===========================(0, -2)==============================//
615   iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixV,
616              ME_REFINE_BUF_STRIDE) +
617              COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - 2 - pMe->sMvp.iMvY);
618   if (iCurCost < iBestCost) {
619     iBestCost = iCurCost;
620     iBestHalfPix = REFINE_ME_HALF_PIXEL_TOP;
621     pBestPredInter = pMeRefine->pHalfPixV;
622   }
623   //===========================(0, 2)==============================//
624   iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc,
625              pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE, ME_REFINE_BUF_STRIDE) +
626              COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy + 2 - pMe->sMvp.iMvY);
627   if (iCurCost < iBestCost) {
628     iBestCost = iCurCost;
629     iBestHalfPix = REFINE_ME_HALF_PIXEL_BOTTOM;
630     pBestPredInter = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE;
631   }
632   pFunc->sMcFuncs.pfLumaHalfpelHor (pRef - 1, kiStrideRef, pMeRefine->pHalfPixH, ME_REFINE_BUF_STRIDE, iWidth + 1,
633                                     iHeight);
634   //step 2: get [iWidth][iHeight+1] half pixel from horizon filter
635 
636   //===========================(-2, 0)==============================//
637   iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixH,
638              ME_REFINE_BUF_STRIDE) +
639              COST_MVD (pMe->pMvdCost, iMvx - 2 - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
640   if (iCurCost < iBestCost) {
641     iBestCost = iCurCost;
642     iBestHalfPix = REFINE_ME_HALF_PIXEL_LEFT;
643     pBestPredInter = pMeRefine->pHalfPixH;
644   }
645   //===========================(2, 0)===============================//
646   iCurCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pMeRefine->pHalfPixH + 1,
647              ME_REFINE_BUF_STRIDE) +
648              COST_MVD (pMe->pMvdCost, iMvx + 2 - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
649   if (iCurCost < iBestCost) {
650     iBestCost = iCurCost;
651     iBestHalfPix = REFINE_ME_HALF_PIXEL_RIGHT;
652     pBestPredInter = pMeRefine->pHalfPixH + 1;
653   }
654 
655   sParams.iBestCost = iBestCost;
656   sParams.iBestHalfPix = iBestHalfPix;
657   sParams.pRef = pRef;
658   sParams.iBestQuarPix = ME_NO_BEST_QUAR_PIXEL;
659 
660   //step 5: if no best half-pixel prediction, try quarter pixel prediction
661   //        if yes, must get [X+1][X+1] half-pixel from (2, 2) horizontal and vertical filter
662   if (REFINE_ME_NO_BEST_HALF_PIXEL == iBestHalfPix) {
663     sParams.iStrideA = kiStrideRef;
664     sParams.iStrideB = kiStrideRef;
665     sParams.pSrcA[0] = pMeRefine->pHalfPixV;
666     sParams.pSrcA[1] = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE;
667     sParams.pSrcA[2] = pMeRefine->pHalfPixH;
668     sParams.pSrcA[3] = pMeRefine->pHalfPixH + 1;
669 
670     sParams.pSrcB[0] = sParams.pSrcB[1] = sParams.pSrcB[2] = sParams.pSrcB[3] = pRef;
671 
672     sParams.iLms[0] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy - 1 - pMe->sMvp.iMvY);
673     sParams.iLms[1] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy + 1 - pMe->sMvp.iMvY);
674     sParams.iLms[2] = COST_MVD (pMe->pMvdCost, iHalfMvx - 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
675     sParams.iLms[3] = COST_MVD (pMe->pMvdCost, iHalfMvx + 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
676   } else { //must get [X+1][X+1] half-pixel from (2, 2) horizontal and vertical filter
677     switch (iBestHalfPix) {
678     case REFINE_ME_HALF_PIXEL_LEFT: {
679       pMeRefine->pHalfPixHV = pMeRefine->pHalfPixV;//reuse pBuffer, here only h&hv
680       pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
681                                         iWidth + 1, iHeight + 1);
682 
683       iHalfMvx -= 2;
684       sParams.iStrideA = ME_REFINE_BUF_STRIDE;
685       sParams.iStrideB = kiStrideRef;
686       sParams.pSrcA[0] = pMeRefine->pHalfPixH;
687       sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
688       sParams.pSrcB[0] = pMeRefine->pHalfPixHV;
689       sParams.pSrcB[1] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE;
690       sParams.pSrcB[2] = pRef - 1;
691       sParams.pSrcB[3] = pRef;
692 
693     }
694     break;
695     case REFINE_ME_HALF_PIXEL_RIGHT: {
696       pMeRefine->pHalfPixHV = pMeRefine->pHalfPixV;//reuse pBuffer, here only h&hv
697       pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
698                                         iWidth + 1, iHeight + 1);
699       iHalfMvx += 2;
700       sParams.iStrideA = ME_REFINE_BUF_STRIDE;
701       sParams.iStrideB = kiStrideRef;
702       sParams.pSrcA[0] = pMeRefine->pHalfPixH + 1;
703       sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
704       sParams.pSrcB[0] = pMeRefine->pHalfPixHV + 1;
705       sParams.pSrcB[1] = pMeRefine->pHalfPixHV + 1 + ME_REFINE_BUF_STRIDE;
706       sParams.pSrcB[2] = pRef;
707       sParams.pSrcB[3] = pRef + 1;
708     }
709     break;
710     case REFINE_ME_HALF_PIXEL_TOP: {
711       pMeRefine->pHalfPixHV = pMeRefine->pHalfPixH;//reuse pBuffer, here only v&hv
712       pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
713                                         iWidth + 1, iHeight + 1);
714 
715       iHalfMvy -= 2;
716       sParams.iStrideA = kiStrideRef;
717       sParams.iStrideB = ME_REFINE_BUF_STRIDE;
718       sParams.pSrcA[0] = pMeRefine->pHalfPixV;
719       sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
720       sParams.pSrcB[0] = pRef - kiStrideRef;
721       sParams.pSrcB[1] = pRef;
722       sParams.pSrcB[2] = pMeRefine->pHalfPixHV;
723       sParams.pSrcB[3] = pMeRefine->pHalfPixHV + 1;
724     }
725     break;
726     case REFINE_ME_HALF_PIXEL_BOTTOM: {
727       pMeRefine->pHalfPixHV = pMeRefine->pHalfPixH;//reuse pBuffer, here only v&hv
728       pFunc->sMcFuncs.pfLumaHalfpelCen (pRef - 1 - kiStrideRef, kiStrideRef, pMeRefine->pHalfPixHV, ME_REFINE_BUF_STRIDE,
729                                         iWidth + 1, iHeight + 1);
730       iHalfMvy += 2;
731       sParams.iStrideA = kiStrideRef;
732       sParams.iStrideB = ME_REFINE_BUF_STRIDE;
733       sParams.pSrcA[0] = pMeRefine->pHalfPixV + ME_REFINE_BUF_STRIDE;
734       sParams.pSrcA[3] = sParams.pSrcA[2] = sParams.pSrcA[1] = sParams.pSrcA[0];
735       sParams.pSrcB[0] = pRef;
736       sParams.pSrcB[1] = pRef + kiStrideRef;
737       sParams.pSrcB[2] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE;
738       sParams.pSrcB[3] = pMeRefine->pHalfPixHV + ME_REFINE_BUF_STRIDE + 1;
739     }
740     break;
741     default:
742       break;
743     }
744     sParams.iLms[0] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy - 1 - pMe->sMvp.iMvY);
745     sParams.iLms[1] = COST_MVD (pMe->pMvdCost, iHalfMvx - pMe->sMvp.iMvX, iHalfMvy + 1 - pMe->sMvp.iMvY);
746     sParams.iLms[2] = COST_MVD (pMe->pMvdCost, iHalfMvx - 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
747     sParams.iLms[3] = COST_MVD (pMe->pMvdCost, iHalfMvx + 1 - pMe->sMvp.iMvX, iHalfMvy - pMe->sMvp.iMvY);
748   }
749   MeRefineQuarPixel (pFunc, pMe, pMeRefine, iWidth, iHeight, &sParams, kiStrideEnc);
750 
751   if (iBestCost > sParams.iBestCost) {
752     pBestPredInter = pMeRefine->pQuarPixBest;
753     iBestCost = sParams.iBestCost;
754   }
755   iBestQuarPix = sParams.iBestQuarPix;
756 
757   //update final best MV
758   pMe->sMv.iMvX = iHalfMvx + iMvQuarAddX[iBestQuarPix];
759   pMe->sMv.iMvY = iHalfMvy + pMvQuarAddY[iBestQuarPix];
760   pMe->uiSatdCost = iBestCost;
761 
762   //No half or quarter pixel best, so do MC with integer pixel MV
763   if (iBestHalfPix + iBestQuarPix == NO_BEST_FRAC_PIX) {
764     pBestPredInter = pRef;
765     iInterBlk4Stride = kiStrideRef;
766   }
767   pMeRefine->pfCopyBlockByMode (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter,
768                                 iInterBlk4Stride);
769 }
770 
InitBlkStrideWithRef(int32_t * pBlkStride,const int32_t kiStrideRef)771 void InitBlkStrideWithRef (int32_t* pBlkStride, const int32_t kiStrideRef) {
772   static const uint8_t kuiStrideX[16] = {
773     0, 4 , 0, 4 ,
774     8, 12, 8, 12,
775     0, 4 , 0, 4 ,
776     8, 12, 8, 12
777   };
778   static const uint8_t kuiStrideY[16] = {
779     0, 0, 4 , 4 ,
780     0, 0, 4 , 4 ,
781     8, 8, 12, 12,
782     8, 8, 12, 12
783   };
784   int32_t i;
785 
786   for (i = 0; i < 16; i += 4) {
787     pBlkStride[i  ] = kuiStrideX[i  ] + kuiStrideY[i  ] * kiStrideRef;
788     pBlkStride[i + 1] = kuiStrideX[i + 1] + kuiStrideY[i + 1] * kiStrideRef;
789     pBlkStride[i + 2] = kuiStrideX[i + 2] + kuiStrideY[i + 2] * kiStrideRef;
790     pBlkStride[i + 3] = kuiStrideX[i + 3] + kuiStrideY[i + 3] * kiStrideRef;
791   }
792 }
793 
794 /*
795  * iMvdSz = (648*2+1) or (972*2+1);
796  */
MvdCostInit(uint16_t * pMvdCostInter,const int32_t kiMvdSz)797 void MvdCostInit (uint16_t* pMvdCostInter, const int32_t kiMvdSz) {
798   const int32_t kiSz        = kiMvdSz >> 1;
799   uint16_t* pNegMvd         = pMvdCostInter;
800   uint16_t* pPosMvd         = pMvdCostInter + kiSz + 1;
801   const int32_t* kpQpLambda = &g_kiQpCostTable[0];
802   int32_t i, j;
803 
804   for (i = 0; i < 52; ++ i) {
805     const uint16_t kiLambda = kpQpLambda[i];
806     int32_t iNegSe = -kiSz;
807     int32_t iPosSe = 1;
808 
809     for (j = 0; j < kiSz; j += 4) {
810       *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
811       *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
812       *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
813       *pNegMvd++ = kiLambda * BsSizeSE (iNegSe++);
814 
815       *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
816       *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
817       *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
818       *pPosMvd++ = kiLambda * BsSizeSE (iPosSe++);
819     }
820     *pNegMvd = kiLambda;
821     pNegMvd += kiSz + 1;
822     pPosMvd += kiSz + 1;
823   }
824 }
825 
PredictSad(int8_t * pRefIndexCache,int32_t * pSadCostCache,int32_t uiRef,int32_t * pSadPred)826 void PredictSad (int8_t* pRefIndexCache, int32_t* pSadCostCache, int32_t uiRef, int32_t* pSadPred) {
827   const int32_t kiRefB  = pRefIndexCache[1];//top g_uiCache12_8x8RefIdx[0] - 4
828   int32_t iRefC         = pRefIndexCache[5];//top-right g_uiCache12_8x8RefIdx[0] - 2
829   const int32_t kiRefA  = pRefIndexCache[6];//left g_uiCache12_8x8RefIdx[0] - 1
830   const int32_t kiSadB  = pSadCostCache[1];
831   int32_t iSadC         = pSadCostCache[2];
832   const int32_t kiSadA  = pSadCostCache[3];
833 
834   int32_t iCount;
835 
836   if (iRefC == REF_NOT_AVAIL) {
837     iRefC = pRefIndexCache[0];//top-left g_uiCache12_8x8RefIdx[0] - 4 - 1
838     iSadC  = pSadCostCache[0];
839   }
840 
841   if (kiRefB == REF_NOT_AVAIL && iRefC == REF_NOT_AVAIL && kiRefA != REF_NOT_AVAIL) {
842     * pSadPred = kiSadA;
843   } else {
844     iCount  = (uiRef == kiRefA) << MB_LEFT_BIT;
845     iCount |= (uiRef == kiRefB) << MB_TOP_BIT;
846     iCount |= (uiRef == iRefC) << MB_TOPRIGHT_BIT;
847     switch (iCount) {
848     case LEFT_MB_POS:// A
849       *pSadPred = kiSadA;
850       break;
851     case TOP_MB_POS:// B
852       *pSadPred = kiSadB;
853       break;
854     case TOPRIGHT_MB_POS:// C or D
855       *pSadPred = iSadC;
856       break;
857     default:
858       *pSadPred = WelsMedian (kiSadA, kiSadB, iSadC);
859       break;
860     }
861   }
862 
863 #define REPLACE_SAD_MULTIPLY(x)   ((x) - (x>>3) + (x >>5))    // it's 0.90625, very close with 0.9
864   iCount = (*pSadPred) << 6;  // here *64 will not overflow. SAD range 0~ 255*256(max 2^16), int32_t is enough
865   *pSadPred = (REPLACE_SAD_MULTIPLY (iCount) + 32) >> 6;
866 #undef REPLACE_SAD_MULTIPLY
867 }
868 
869 
PredictSadSkip(int8_t * pRefIndexCache,bool * pMbSkipCache,int32_t * pSadCostCache,int32_t uiRef,int32_t * iSadPredSkip)870 void PredictSadSkip (int8_t* pRefIndexCache, bool* pMbSkipCache, int32_t* pSadCostCache, int32_t uiRef,
871                      int32_t* iSadPredSkip) {
872   const int32_t kiRefB  = pRefIndexCache[1];//top g_uiCache12_8x8RefIdx[0] - 4
873   int32_t iRefC         = pRefIndexCache[5];//top-right g_uiCache12_8x8RefIdx[0] - 2
874   const int32_t kiRefA  = pRefIndexCache[6];//left g_uiCache12_8x8RefIdx[0] - 1
875   const int32_t kiSadB  = (pMbSkipCache[1] == 1 ? pSadCostCache[1] : 0);
876   int32_t iSadC         = (pMbSkipCache[2] == 1 ? pSadCostCache[2] : 0);
877   const int32_t kiSadA  = (pMbSkipCache[3] == 1 ? pSadCostCache[3] : 0);
878   int32_t iRefSkip      = pMbSkipCache[2];
879 
880   int32_t iCount = 0;
881 
882   if (iRefC == REF_NOT_AVAIL) {
883     iRefC = pRefIndexCache[0];//top-left g_uiCache12_8x8RefIdx[0] - 4 - 1
884     iSadC  = (pMbSkipCache[0] == 1 ? pSadCostCache[0] : 0);
885     iRefSkip = pMbSkipCache[0];
886   }
887 
888   if (kiRefB == REF_NOT_AVAIL && iRefC == REF_NOT_AVAIL && kiRefA != REF_NOT_AVAIL) {
889     * iSadPredSkip = kiSadA;
890   } else {
891     iCount  = ((uiRef == kiRefA) && (pMbSkipCache[3] == 1)) << MB_LEFT_BIT;
892     iCount |= ((uiRef == kiRefB) && (pMbSkipCache[1] == 1)) << MB_TOP_BIT;
893     iCount |= ((uiRef == iRefC) && (iRefSkip == 1)) << MB_TOPRIGHT_BIT;
894     switch (iCount) {
895     case LEFT_MB_POS:// A
896       *iSadPredSkip = kiSadA;
897       break;
898     case TOP_MB_POS:// B
899       *iSadPredSkip = kiSadB;
900       break;
901     case TOPRIGHT_MB_POS:// C or D
902       *iSadPredSkip = iSadC;
903       break;
904     default:
905       *iSadPredSkip = WelsMedian (kiSadA, kiSadB, iSadC);
906       break;
907     }
908   }
909 }
910 }
911